Java tutorial
/* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ package org.apache.poi.ss.examples; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.FilenameFilter; import java.io.IOException; import java.net.URLClassLoader; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.FormulaEvaluator; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; /** * Demonstrates <em>one</em> way to convert an Excel spreadsheet into a CSV * file. This class makes the following assumptions; * <list> * <li>1. Where the Excel workbook contains more that one worksheet, then a single * CSV file will contain the data from all of the worksheets.</li> * <li>2. The data matrix contained in the CSV file will be square. This means that * the number of fields in each record of the CSV file will match the number * of cells in the longest row found in the Excel workbook. Any short records * will be 'padded' with empty fields - an empty field is represented in the * the CSV file in this way - ,,.</li> * <li>3. Empty fields will represent missing cells.</li> * <li>4. A record consisting of empty fields will be used to represent an empty row * in the Excel workbook.</li> * </list> * Therefore, if the worksheet looked like this; * * <pre> * ___________________________________________ * | | | | | | * | A | B | C | D | E | * ___|_______|_______|_______|_______|_______| * | | | | | | * 1 | 1 | 2 | 3 | 4 | 5 | * ___|_______|_______|_______|_______|_______| * | | | | | | * 2 | | | | | | * ___|_______|_______|_______|_______|_______| * | | | | | | * 3 | | A | | B | | * ___|_______|_______|_______|_______|_______| * | | | | | | * 4 | | | | | Z | * ___|_______|_______|_______|_______|_______| * | | | | | | * 5 | 1,400 | | 250 | | | * ___|_______|_______|_______|_______|_______| * * </pre> * * Then, the resulting CSV file will contain the following lines (records); * <pre> * 1,2,3,4,5 * ,,,, * ,A,,B, * ,,,,Z * "1,400",,250,, * </pre><p> * Typically, the comma is used to separate each of the fields that, together, * constitute a single record or line within the CSV file. This is not however * a hard and fast rule and so this class allows the user to determine which * character is used as the field separator and assumes the comma if none other * is specified. * </p><p> * If a field contains the separator then it will be escaped. If the file should * obey Excel's CSV formatting rules, then the field will be surrounded with * speech marks whilst if it should obey UNIX conventions, each occurrence of * the separator will be preceded by the backslash character. * </p><p> * If a field contains an end of line (EOL) character then it too will be * escaped. If the file should obey Excel's CSV formatting rules then the field * will again be surrounded by speech marks. On the other hand, if the file * should follow UNIX conventions then a single backslash will precede the * EOL character. There is no single applicable standard for UNIX and some * appications replace the CR with \r and the LF with \n but this class will * not do so. * </p><p> * If the field contains double quotes then that character will be escaped. It * seems as though UNIX does not define a standard for this whilst Excel does. * Should the CSV file have to obey Excel's formmating rules then the speech * mark character will be escaped with a second set of speech marks. Finally, an * enclosing set of speah marks will also surround the entire field. Thus, if * the following line of text appeared in a cell - "Hello" he said - it would * look like this when converted into a field within a CSV file - """Hello"" he * said". * </p><p> * Finally, it is worth noting that talk of CSV 'standards' is really slightly * missleading as there is no such thing. It may well be that the code in this * class has to be modified to produce files to suit a specific application * or requirement. * </p> * @author Mark B * @version 1.00 9th April 2010 * 1.10 13th April 2010 - Added support for processing all Excel * workbooks in a folder along with the ability * to specify a field separator character. * 2.00 14th April 2010 - Added support for embedded characters; the * field separator, EOL and double quotes or * speech marks. In addition, gave the client * the ability to select how these are handled, * either obeying Excel's or UNIX formatting * conventions. */ public class ToCSV { private Workbook workbook = null; private int sheetIndex = 0; private List<SheetInfo> csvData = null; private int formattingConvention = 0; private DataFormatter formatter = null; private FormulaEvaluator evaluator = null; private String separator = null; private int rowLimit = Integer.MAX_VALUE; private String strSource; private String strDestination; private static final String CSV_FILE_EXTENSION = ".csv"; private static final String DEFAULT_SEPARATOR = ","; /** * Identifies that the CSV file should obey Excel's formatting conventions * with regard to escaping certain embedded characters - the field separator, * speech mark and end of line (EOL) character */ public static final int EXCEL_STYLE_ESCAPING = 0; /** * Identifies that the CSV file should obey UNIX formatting conventions * with regard to escaping certain embedded characters - the field separator * and end of line (EOL) character */ public static final int UNIX_STYLE_ESCAPING = 1; public ToCSV(Params params) { this.rowLimit = params.rowLimit; this.strSource = params.filename; this.strDestination = params.destination; this.separator = params.separator; this.formattingConvention = params.formattingConvention; } /** * Process the contents of a folder, convert the contents of each Excel * workbook into CSV format and save the resulting file to the specified * folder using the same name as the original workbook with the .xls or * .xlsx extension replaced by .csv * * @param strSource An instance of the String class that encapsulates the * name of and path to either a folder containing those Excel * workbook(s) or the name of and path to an individual Excel workbook * that is/are to be converted. * @param strDestination An instance of the String class encapsulating the name * of and path to a folder that will contain the resulting CSV files. * @param formattingConvention A primitive int whose value will determine * whether certain embedded characters should be escaped in accordance * with Excel's or UNIX formatting conventions. Two constants are * defined to support this option; ToCSV.EXCEL_STYLE_ESCAPING and * ToCSV.UNIX_STYLE_ESCAPING * @param separator An instance of the String class encapsulating the * characters or characters that should be used to separate items * on a line within the CSV file. * @throws java.io.FileNotFoundException Thrown if any file cannot be located * on the filesystem during processing. * @throws java.io.IOException Thrown if the filesystem encounters any * problems during processing. * @throws java.lang.IllegalArgumentException Thrown if the values passed * to the strSource parameter refers to a file or folder that does not * exist, if the value passed to the strDestination paramater refers * to a folder that does not exist, if the value passed to the * strDestination parameter does not refer to a folder or if the * value passed to the formattingConvention parameter is other than * one of the values defined by the constants ToCSV.EXCEL_STYLE_ESCAPING * and ToCSV.UNIX_STYLE_ESCAPING. * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown * if the xml markup encounetered whilst parsing a SpreadsheetML * file (.xlsx) is invalid. * @throws JSONException */ public void process() throws FileNotFoundException, IOException, IllegalArgumentException, InvalidFormatException, JSONException { File source = new File(strSource); File[] filesList = null; String destinationFilename = null; // Ensure the value passed to the formattingConvention parameter is // within range. if (formattingConvention != ToCSV.EXCEL_STYLE_ESCAPING && formattingConvention != ToCSV.UNIX_STYLE_ESCAPING) { throw new IllegalArgumentException( "The value passed to the " + "formattingConvention parameter is out of range."); } // Check to see if the sourceFolder variable holds a reference to // a file or a folder full of files. if (source.isDirectory()) { // Get a list of all of the Excel spreadsheet files (workbooks) in // the source folder/directory filesList = source.listFiles(new ExcelFilenameFilter()); } else { // Assume that it must be a file handle - although there are other // options the code should perhaps check - and store the reference // into the filesList variable. filesList = new File[] { source }; } // Step through each of the files in the source folder and for each // open the workbook, convert it's contents to CSV format and then // save the resulting file away into the folder specified by the // contents of the destination variable. Note that the name of the // csv file will be created by taking the name of the Excel file, // removing the extension and replacing it with .csv. Note that there // is one drawback with this approach; if the folder holding the files // contains two workbooks whose names match but one is a binary file // (.xls) and the other a SpreadsheetML file (.xlsx), then the names // for both CSV files will be identical and one CSV file will, // therefore, over-write the other. for (File excelFile : filesList) { // Open the workbook this.openWorkbook(excelFile); long end = System.currentTimeMillis(); System.out.println(end - start); start = end; // Convert it's contents into a CSV file this.convertToCSV(); removeLastEmptySheets(); end = System.currentTimeMillis(); System.out.println(end - start); start = end; // Build the name of the csv folder from that of the Excel workbook. // Simply replace the .xls or .xlsx file extension with .csv destinationFilename = excelFile.getName(); destinationFilename = destinationFilename.substring(0, destinationFilename.lastIndexOf(".")) + ToCSV.CSV_FILE_EXTENSION; // Save the CSV file away using the newly constructed file name // and to the specified directory. this.saveCSVFile(); } } private void removeLastEmptySheets() { for (int i = csvData.size() - 1; i >= 0; i--) { if (csvData.get(i).getRowCount() == 0) { csvData.remove(i); } else { break; } } } /** * Open an Excel workbook ready for conversion. * * @param file An instance of the File class that encapsulates a handle * to a valid Excel workbook. Note that the workbook can be in * either binary (.xls) or SpreadsheetML (.xlsx) format. * @throws java.io.FileNotFoundException Thrown if the file cannot be located. * @throws java.io.IOException Thrown if a problem occurs in the file system. * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown * if invalid xml is found whilst parsing an input SpreadsheetML * file. */ private void openWorkbook(File file) throws FileNotFoundException, IOException, InvalidFormatException { FileInputStream fis = null; try { System.out.println("Opening workbook [" + file.getName() + "]"); fis = new FileInputStream(file); // Open the workbook and then create the FormulaEvaluator and // DataFormatter instances that will be needed to, respectively, // force evaluation of forumlae found in cells and create a // formatted String encapsulating the cells contents. this.workbook = WorkbookFactory.create(fis); this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator(); this.formatter = new DataFormatter(true); } finally { if (fis != null) { fis.close(); } } } /** * Called to convert the contents of the currently opened workbook into * a CSV file. */ private void convertToCSV() { Sheet sheet = null; Row row = null; int lastRowNum = 0; System.out.println("Converting files contents to CSV format."); // Discover how many sheets there are in the workbook.... int numSheets = this.workbook.getNumberOfSheets(); this.csvData = new ArrayList<SheetInfo>(numSheets); // and then iterate through them. for (int i = 0; i < numSheets; i++) { this.sheetIndex = i; this.csvData.add(new SheetInfo()); // Get a reference to a sheet and check to see if it contains // any rows. sheet = this.workbook.getSheetAt(i); if (sheet.getPhysicalNumberOfRows() > 0) { // Note down the index number of the bottom-most row and // then iterate through all of the rows on the sheet starting // from the very first row - number 1 - even if it is missing. // Recover a reference to the row and then call another method // which will strip the data from the cells and build lines // for inclusion in the resylting CSV file. lastRowNum = sheet.getLastRowNum(); for (int j = 0; j <= lastRowNum; j++) { row = sheet.getRow(j); this.rowToCSV(row); } } } } private void saveCSVFile() throws FileNotFoundException, IOException, JSONException { System.out.println("Saving the CSV files [" + strDestination + "]"); JSONObject csvInfo = new JSONObject(); csvInfo.put("sourceFile", strSource); csvInfo.put("targetDir", strDestination); csvInfo.put("generatePreviews", rowLimit != Integer.MAX_VALUE); csvInfo.put("perSheetRowLimitForPreviews", rowLimit); JSONArray sheets = new JSONArray(); csvInfo.put("sheets", sheets); for (int s = 0; s < this.csvData.size(); s++) { SheetInfo sheetInfo = this.csvData.get(s); JSONObject sheet = new JSONObject(); File file = saveToFile(s, sheetInfo, false); sheet.put("title", this.workbook.getSheetAt(s).getSheetName()); sheet.put("rowCount", sheetInfo.getRowCount()); sheet.put("fullOutput", file.getName()); if (rowLimit != Integer.MAX_VALUE) { file = saveToFile(s, sheetInfo, true); sheet.put("previewOutput", file.getName()); } sheets.put(sheet); } BufferedWriter writer = new BufferedWriter(new FileWriter(new File(strDestination, "info.json"))); writer.write(csvInfo.toString(2)); writer.close(); } private File saveToFile(int s, SheetInfo sheetInfo, boolean preview) throws IOException { FileWriter fw = null; BufferedWriter bw = null; List<String> line; StringBuffer buffer; String csvLineElement; try { File file = preview ? new File(strDestination, (s + 1) + "-" + this.rowLimit + "-of-" + sheetInfo.getRowCount() + "-preview.csv") : new File(strDestination, (s + 1) + "-" + sheetInfo.getRowCount() + ".csv"); if (file.exists()) { file.delete(); } // Open a writer onto the CSV file. fw = new FileWriter(file); bw = new BufferedWriter(fw); // Step through the elements of the ArrayList that was used to hold // all of the data recovered from the Excel workbooks' sheets, rows // and cells. for (int i = 0; i < (preview ? Math.min(sheetInfo.getRowCount(), rowLimit) : sheetInfo.getRowCount()); i++) { buffer = new StringBuffer(); // Get an element from the ArrayList that contains the data for // the workbook. This element will itself be an ArrayList // containing Strings and each String will hold the data recovered // from a single cell. The for() loop is used to recover elements // from this 'row' ArrayList one at a time and to write the Strings // away to a StringBuffer thus assembling a single line for inclusion // in the CSV file. If a row was empty or if it was short, then // the ArrayList that contains it's data will also be shorter than // some of the others. Therefore, it is necessary to check within // the for loop to ensure that the ArrayList contains data to be // processed. If it does, then an element will be recovered and // appended to the StringBuffer. line = sheetInfo.getRow(i); for (int j = 0; j < sheetInfo.getMaxRowWidth(); j++) { if (line.size() > j) { csvLineElement = line.get(j); if (csvLineElement != null) { buffer.append(this.escapeEmbeddedCharacters(csvLineElement)); } } if (j < (sheetInfo.getMaxRowWidth() - 1)) { buffer.append(this.separator); } } // Once the line is built, write it away to the CSV file. bw.write(buffer.toString().trim()); // Condition the inclusion of new line characters so as to // avoid an additional, superfluous, new line at the end of // the file. if (i < (sheetInfo.getRowCount() - 1)) { bw.newLine(); } } return file; } finally { if (bw != null) { bw.flush(); bw.close(); } } } /** * Called to convert a row of cells into a line of data that can later be * output to the CSV file. * * @param row An instance of either the HSSFRow or XSSFRow classes that * encapsulates information about a row of cells recovered from * an Excel workbook. */ private void rowToCSV(Row row) { Cell cell = null; int lastCellNum = 0; ArrayList<String> csvLine = new ArrayList<String>(); // Check to ensure that a row was recovered from the sheet as it is // possible that one or more rows between other populated rows could be // missing - blank. If the row does contain cells then... if (row != null) { // Get the index for the right most cell on the row and then // step along the row from left to right recovering the contents // of each cell, converting that into a formatted String and // then storing the String into the csvLine ArrayList. lastCellNum = row.getLastCellNum(); for (int i = 0; i <= lastCellNum; i++) { cell = row.getCell(i); if (cell == null) { csvLine.add(""); } else { try { if (cell.getCellType() != Cell.CELL_TYPE_FORMULA) { if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC && DateUtil.isCellDateFormatted(cell)) { // Output local time DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); // df.setTimeZone(TimeZone.getTimeZone("UTC")); csvLine.add(df.format(cell.getDateCellValue())); } else { csvLine.add(this.formatter.formatCellValue(cell)); } } else { String value = cell.getCellFormula(); try { value = this.formatter.formatCellValue(cell, this.evaluator); } catch (Exception e) { } csvLine.add(value); } } catch (RuntimeException e) { if (e.getMessage() != null && e.getMessage().startsWith("Could not resolve external workbook")) { // Issue #1 csvLine.add(""); } else { throw e; } } } } } this.csvData.get(sheetIndex).addRow(csvLine); } public static boolean isNullOrEmpty(String string) { return string == null || string.length() == 0; } /** * Checks to see whether the field - which consists of the formatted * contents of an Excel worksheet cell encapsulated within a String - contains * any embedded characters that must be escaped. The method is able to * comply with either Excel's or UNIX formatting conventions in the * following manner; * * With regard to UNIX conventions, if the field contains any embedded * field separator or EOL characters they will each be escaped by prefixing * a leading backspace character. These are the only changes that have yet * emerged following some research as being required. * * Excel has other embedded character escaping requirements, some that emerged * from empirical testing, other through research. Firstly, with regards to * any embedded speech marks ("), each occurrence should be escaped with * another speech mark and the whole field then surrounded with speech marks. * Thus if a field holds <em>"Hello" he said</em> then it should be modified * to appear as <em>"""Hello"" he said"</em>. Furthermore, if the field * contains either embedded separator or EOL characters, it should also * be surrounded with speech marks. As a result <em>1,400</em> would become * <em>"1,400"</em> assuming that the comma is the required field separator. * This has one consequence in, if a field contains embedded speech marks * and embedded separator characters, checks for both are not required as the * additional set of speech marks that should be placed around ay field * containing embedded speech marks will also account for the embedded * separator. * * It is worth making one further note with regard to embedded EOL * characters. If the data in a worksheet is exported as a CSV file using * Excel itself, then the field will be surounded with speech marks. If the * resulting CSV file is then re-imports into another worksheet, the EOL * character will result in the original simgle field occupying more than * one cell. This same 'feature' is replicated in this classes behaviour. * * @param field An instance of the String class encapsulating the formatted * contents of a cell on an Excel worksheet. * @return A String that encapsulates the formatted contents of that * Excel worksheet cell but with any embedded separator, EOL or * speech mark characters correctly escaped. */ private String escapeEmbeddedCharacters(String field) { StringBuffer buffer = null; // If the fields contents should be formatted to confrom with Excel's // convention.... if (this.formattingConvention == ToCSV.EXCEL_STYLE_ESCAPING) { // Firstly, check if there are any speech marks (") in the field; // each occurrence must be escaped with another set of spech marks // and then the entire field should be enclosed within another // set of speech marks. Thus, "Yes" he said would become // """Yes"" he said" if (field.contains("\"")) { buffer = new StringBuffer(field.replaceAll("\"", "\\\"\\\"")); buffer.insert(0, "\""); buffer.append("\""); } else { // If the field contains either embedded separator or EOL // characters, then escape the whole field by surrounding it // with speech marks. buffer = new StringBuffer(field); if ((buffer.indexOf(this.separator)) > -1 || (buffer.indexOf("\n")) > -1) { buffer.insert(0, "\""); buffer.append("\""); } } return (buffer.toString().trim()); } // The only other formatting convention this class obeys is the UNIX one // where any occurrence of the field separator or EOL character will // be escaped by preceding it with a backslash. else { if (field.contains(this.separator)) { field = field.replaceAll(this.separator, ("\\\\" + this.separator)); } if (field.contains("\n")) { field = field.replaceAll("\n", "\\\\\n"); } return (field); } } private static class Params { public String destination; public int rowLimit = Integer.MAX_VALUE; public int formattingConvention = EXCEL_STYLE_ESCAPING; public String separator = DEFAULT_SEPARATOR; public String filename; } public static long start = System.currentTimeMillis(); public static void main(String[] args) throws Exception { printStartupTime(System.currentTimeMillis()); Params params = processArguments(args); ToCSV converter = new ToCSV(params); converter.process(); long end = System.currentTimeMillis(); System.out.println(end - start); } private static Params processArguments(String[] args) { Params params = new Params(); Options options = getOptions(); CommandLineParser parser = new PosixParser(); CommandLine cmd; try { cmd = parser.parse(options, args); } catch (ParseException e) { printHelp(options); throw new RuntimeException(e); } if (cmd.hasOption("h")) { printHelp(options); System.exit(0); } if (cmd.getArgs().length <= 1) { printHelp(options); System.exit(1); } String filename = cmd.getArgs()[0]; File file = new File(filename); if (!file.exists()) { throw new RuntimeException(new FileNotFoundException(file.getAbsolutePath())); } params.filename = filename; if (cmd.getArgs().length > 1) { params.destination = cmd.getArgs()[1]; } else { params.destination = file.getPath(); } // Ensure that the folder the user has chosen to save the CSV files // away into firstly exists and secondly is a folder rather than, for // instance, a data file. File dest = new File(params.destination); if (!dest.exists()) { throw new IllegalArgumentException( "The folder/directory for the " + "converted CSV file(s) does not exist."); } if (!dest.isDirectory()) { throw new IllegalArgumentException( "The destination for the CSV " + "file(s) is not a directory/folder."); } if (cmd.getOptionValue("r") != null) { params.rowLimit = Integer.parseInt(cmd.getOptionValue("r")); } if (cmd.getOptionValue("f") != null) { params.formattingConvention = Integer.parseInt(cmd.getOptionValue("f")); } if (cmd.getOptionValue("s") != null) { params.separator = cmd.getOptionValue("s"); } return params; } private static void printHelp(Options options) { HelpFormatter formatter = new HelpFormatter(); String jarName = ((URLClassLoader) ToCSV.class.getClassLoader()).getURLs()[0].getFile(); jarName = new File(jarName).getName(); formatter.printHelp(100, "java -jar " + jarName + " [OPTIONS] <source> [<destination-folder>]" + "\nHere <source> is XLS or XSLX file " + "<destination-folder> folder for CSV files. " + "If not specified <source> folder will be used.", "", options, "\nExample:\njava -Dfile.encoding=utf8 -Xms512m -Xmx512m -XX:MaxPermSize=256m -jar " + jarName + " file.xls"); } private static void printStartupTime(long mainStart) { String startTime = System.getProperty("startTime"); if (startTime != null) { System.out.println("startTime = " + startTime); Calendar calendar = Calendar.getInstance(); calendar.setTimeInMillis(mainStart); System.out.println("curreTime = " + calendar.getTime()); Pattern pattern = Pattern.compile("(\\d{2}):(\\d{2}):(\\d{2})"); Matcher matcher = pattern.matcher(startTime); matcher.find(); int hours = Integer.parseInt(matcher.group(1)); int mins = Integer.parseInt(matcher.group(2)); int secs = Integer.parseInt(matcher.group(3)); calendar = Calendar.getInstance(); calendar.set(Calendar.HOUR_OF_DAY, hours); calendar.set(Calendar.MINUTE, mins); calendar.set(Calendar.SECOND, secs); long systemStart = calendar.getTimeInMillis(); System.out.println(mainStart - systemStart); } } /** * An instance of this class can be used to control the files returned * be a call to the listFiles() method when made on an instance of the * File class and that object refers to a folder/directory */ class ExcelFilenameFilter implements FilenameFilter { /** * Determine those files that will be returned by a call to the * listFiles() method. In this case, the name of the file must end with * either of the following two extension; '.xls' or '.xlsx'. For the * future, it is very possible to parameterise this and allow the * containing class to pass, for example, an array of Strings to this * class on instantiation. Each element in that array could encapsulate * a valid file extension - '.xls', '.xlsx', '.xlt', '.xlst', etc. These * could then be used to control which files were returned by the call * to the listFiles() method. * * @param file An instance of the File class that encapsulates a handle * referring to the folder/directory that contains the file. * @param name An instance of the String class that encapsulates the * name of the file. * @return A boolean value that indicates whether the file should be * included in the array retirned by the call to the listFiles() * method. In this case true will be returned if the name of the * file ends with either '.xls' or '.xlsx' and false will be * returned in all other instances. */ public boolean accept(File file, String name) { return (name.endsWith(".xls") || name.endsWith(".xlsx")); } } @SuppressWarnings("static-access") private static Options getOptions() { Options options = new Options(); options.addOption( OptionBuilder.withLongOpt("help").withDescription("Prints this help message.").create("h")); options.addOption(OptionBuilder.withLongOpt("row-limit") .withDescription( "Rows limit per sheet. Default unlimited (-1). " + "Will produce preview file per sheet.") .hasArg().create("r")); options.addOption(OptionBuilder.withLongOpt("separator") .withDescription("Optional. The character or characters that " + "should be used to separate fields in the CSV " + "record. If no value is passed then the comma " + "will be assumed.") .hasArg().create("s")); options.addOption(OptionBuilder.withLongOpt("format") .withDescription("Optional. This argument can take one of two " + "values. Passing 0 (zero) will result in a CSV " + "file that obeys Excel's formatting conventions " + "whilst passing 1 (one) will result in a file " + "that obeys UNIX formatting conventions. If no " + "value is passed, then the CSV file produced " + "will obey Excel's formatting conventions.") .hasArg().create("f")); return options; } }