Java tutorial
/* * This file is part of CSV package. * * CSV is free software: you can redistribute it * and/or modify it under the terms of version 3 of the GNU * Lesser General Public License as published by the Free Software * Foundation. * * CSV is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with CSV. If not, see * <http://www.gnu.org/licenses/lgpl-3.0.html>. */ package csv.impl; import java.io.File; import java.io.FileNotFoundException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellValue; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.FormulaEvaluator; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import csv.util.CSVUtils; /** * Implements Excel reading. * This class reads Excel sheets like a stream, meaning * delivering rows one by one from the current sheet. * * Use this reader if you want to load an Excel file by creating a {@link java.io.File} * and passing it to the constructor. * <p> * Example: * </p> * <p> * <pre> java.io.File f = new java.io.File("excel-test.xls"); ExcelReader in = new ExcelReader(f); while (in.hasNext()) { Object columns[] = in.next(); // Do something here } in.close(); </pre> * </p> * @author RalphSchuster * @see #selectSheet(int) * @see #selectSheet(String) */ public class ExcelReader extends AbstractStreamTableReader { /** The workbook */ private Workbook workbook; /** The evaluator for cell formulas */ private FormulaEvaluator formulaEvaluator = null; /** Whether formulas shall be evaluated or not (default is <code>true</code> = yes) */ private boolean evaluateFormulas = true; /** The sheet we are dealing with */ private Sheet sheet; /** The current row we are reading */ private Row currentRow; /** The row that was delivered by {@link #next()}. */ private Row lastDeliveredRow; /** Index of first row */ private int firstRow; /** Index of last row */ private int lastRow; /** The row currently to read next */ private int rowNum; /** Whether to skip blank rows (not deliver them) */ private boolean skipBlankRows = true; /** * Default constructor. */ public ExcelReader() { } /** * Constructor for reading from a file. * @param file file to read from * @throws FileNotFoundException when file does not exist */ public ExcelReader(File file) throws FileNotFoundException { super(file); } /** * Constructor to read from an existing stream. * @param in input stream to be used */ public ExcelReader(InputStream in) { super(in); } /** * Constructor to read from an existing workbook. * @param workbook the workbook be used */ public ExcelReader(Workbook workbook) { this.workbook = workbook; } /** * Constructor for reading from a file. * @param file file to read from * @throws FileNotFoundException when file does not exist */ public ExcelReader(String file) throws FileNotFoundException { super(file); } /** * Opens the stream by retrieving the workbook and selecting the first sheet. * @see csv.impl.AbstractStreamTableReader#open() */ @Override public void open() { super.open(); try { workbook = WorkbookFactory.create(getInputStream()); selectSheet(0); } catch (Exception e) { throw new IllegalStateException("Cannot create Excel workbook", e); } } /** * Returns the workbook. * @return workbook */ public Workbook getWorkbook() { if (workbook == null) open(); return workbook; } /** * Returns whether blank rows will be skipped or not while reading. * @return <code>true</code> when blank rows are skipped (default), <code>false</code> otherwise */ public boolean isSkipBlankRows() { return skipBlankRows; } /** * Sets whether blank rows will be skipped or not while reading. * @param skipBlankRows <code>true</code> when blank rows are skipped (default), <code>false</code> otherwise */ public void setSkipBlankRows(boolean skipBlankRows) { this.skipBlankRows = skipBlankRows; } /** * Computes the max row length of any rows in this sheet. * @return int length */ public int computeMaxColumnCount() { int maxColumnCount = 0; for (java.util.Iterator<Row> i = sheet.rowIterator(); i.hasNext();) { int length = i.next().getLastCellNum(); if (length > maxColumnCount) maxColumnCount = length; } return maxColumnCount; } /** * Select the given sheet to be read from. * @param name name of sheet * @return sheet selected */ public Sheet selectSheet(String name) { return selectSheet(workbook.getSheet(name)); } /** * Select the given sheet to be read from. * @param sheet sheet to be selected * @return sheet selected */ public Sheet selectSheet(Sheet sheet) { if (this.sheet != sheet) { this.sheet = sheet; firstRow = sheet.getFirstRowNum(); rowNum = firstRow; lastRow = sheet.getLastRowNum(); currentRow = null; } return this.sheet; } /** * Select the given sheet to be read from. * @param index index of sheet * @return sheet selected */ public Sheet selectSheet(int index) { return selectSheet(workbook.getSheetAt(index)); } /** * Returns the current sheet. * @return the current sheet. */ public Sheet getSheet() { return sheet; } /** * Returns the last delivered row. * This is the row delivered by last call to {@link #next()}. * @return the last row delivered by {@link #next()} */ public Row getLastExcelRow() { return lastDeliveredRow; } /** * Resets the reader by resetting the current row index * @see csv.impl.AbstractStreamTableReader#reset() * @see #getRowCount() */ @Override public void reset() { super.reset(); rowNum = firstRow; currentRow = null; } /** * Returns whether there is a row to be read in the current sheet. * This implementation stops reading when last row from a sheet was read. * You might need to manually select the next sheet if you want to read more * rows from other sheets. * @return true if a row is available in current sheet. * @see java.util.Iterator#hasNext() * @see #selectSheet(int) */ @Override public boolean hasNext() { if (currentRow == null) retrieveNextRow(); return currentRow != null; } /** * Returns the next row. * This method increases the internal row index and delivers the next row in the sheet. * Values in the array are Java objects depending on the cell type. If the cell contained * a formula, the formula is evaluated before returning the row. * @return values in row * @see java.util.Iterator#next() * @see #getRowCount() */ @Override public Object[] next() { if (hasNext()) { Object row[] = getValues(currentRow); lastDeliveredRow = currentRow; currentRow = null; incrementLineCount(); incrementRowCount(); return row; } throw new IllegalStateException("No more rows"); } /** * Returns the row at the given index. * Values in the array are Java objects depending on the cell type. If the cell contained * a formula, the formula is evaluated before returning the row. * @param rowNum row index to read * @return values of row */ public Object[] getValues(int rowNum) { Row row = getSheet().getRow(rowNum); return getValues(row); } /** * Returns the row as Java objects. * Values in the array are Java objects depending on the cell type. If the cell contained * a formula, the formula is evaluated before returning the row. * @return values in row * @param row row to read */ public Object[] getValues(Row row) { if (row == null) return null; List<Object> columns = new ArrayList<Object>(); int colCount = row.getLastCellNum(); for (int col = 0; col < colCount; col++) { Cell cell = row.getCell(col); columns.add(getValue(cell)); } return CSVUtils.convertList(columns, getMinimumColumnCount()); } /** * Returns the value of the specified cell. * If the cell contained * a formula, the formula is evaluated before returning the row. * @param rownum row index * @param cellNum column index * @return value of cell */ public Object getValue(int rownum, int cellNum) { Row row = getSheet().getRow(rowNum); return getValue(row, cellNum); } /** * Returns the value of the specified cell. * If the cell contained * a formula, the formula is evaluated before returning the row. * @param row row object * @param cellNum column index * @return value of cell */ public Object getValue(Row row, int cellNum) { if (row == null) return null; Cell cell = row.getCell(cellNum); return getValue(cell); } /** * Returns the value of the specified cell. * If the cell contained * a formula, the formula is evaluated before returning the row. * @param cell cell object * @return value of cell */ public Object getValue(Cell cell) { if (cell == null) return null; int cellType = cell.getCellType(); if (cellType == Cell.CELL_TYPE_FORMULA && !isEvaluateFormulas()) { cellType = cell.getCachedFormulaResultType(); } switch (cellType) { case Cell.CELL_TYPE_STRING: return cell.getStringCellValue(); case Cell.CELL_TYPE_NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { return cell.getDateCellValue(); } else { return cell.getNumericCellValue(); } case Cell.CELL_TYPE_BLANK: return null; case Cell.CELL_TYPE_BOOLEAN: return cell.getBooleanCellValue(); case Cell.CELL_TYPE_FORMULA: return evaluateCellValue(cell); case Cell.CELL_TYPE_ERROR: return cell.getErrorCellValue(); } return null; } /** * Returns the evaluated cell content. * This assumes the cell contains a formula. * @param cell cell to evaluate * @return cell value */ public Object evaluateCellValue(Cell cell) { FormulaEvaluator evaluator = getFormulaEvaluator(); CellValue value = evaluator.evaluate(cell); switch (value.getCellType()) { case Cell.CELL_TYPE_STRING: return value.getStringValue(); case Cell.CELL_TYPE_NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { return DateUtil.getJavaDate(value.getNumberValue()); } else { return value.getNumberValue(); } case Cell.CELL_TYPE_BLANK: return null; case Cell.CELL_TYPE_BOOLEAN: return value.getBooleanValue(); case Cell.CELL_TYPE_ERROR: return value.getErrorValue(); default: System.out.println("type=" + cell.getCellType()); } return cell.getCellFormula(); } /** * Returns a formula evaluator for the current workbook. * This is for convinience. * @return the formula evaluator */ public FormulaEvaluator getFormulaEvaluator() { if (formulaEvaluator == null) { formulaEvaluator = getWorkbook().getCreationHelper().createFormulaEvaluator(); } return formulaEvaluator; } /** * Returns whether formulas shall be evaluated or not (default is <code>true</code>). * @return <code>true</code> when formulas are evaluated */ public boolean isEvaluateFormulas() { return evaluateFormulas; } /** * Sets whether formulas shall be evaluated or not (default is <code>true</code>). * @param evaluateFormulas <code>true</code> or <code>false</code> */ public void setEvaluateFormulas(boolean evaluateFormulas) { this.evaluateFormulas = evaluateFormulas; } /** * Reads the header row from next line. * @see csv.impl.AbstractTableReader#readHeaderRow() */ @Override protected void readHeaderRow() { if (hasNext()) { setHeaderRow(CSVUtils.convertArray(next(), getMinimumColumnCount())); } } /** * Retrieves the next row from the current sheet. * The row is then internally stored for evaluation of {@link #hasNext()} * and {@link #next()}. Blank rows are skipped when {@linkplain #isSkipBlankRows()} * return <code>true</code>. */ protected void retrieveNextRow() { while (rowNum <= lastRow) { currentRow = getOrCreateRow(rowNum++); if (currentRow == null) continue; if (isSkipBlankRows() && rowHasOnlyBlankCells(currentRow)) { currentRow = null; } else { break; } } } /** * Checks whether row has only blank cells. * The method is called from * {@link #retrieveNextRow()}. * @return boolean when the row has only blank cells */ protected boolean rowHasOnlyBlankCells(Row row) { boolean blank = true; for (Cell cell : row) { if (cell.getCellType() != Cell.CELL_TYPE_BLANK) { blank = false; break; } } return blank; } /** * Ensures that the sheet contains a row at the given index. * @param rowNum index of row * @return the row from the sheet or a new blank row */ protected Row getOrCreateRow(int rowNum) { Row row = sheet.getRow(rowNum); if (!isSkipBlankRows() && (row == null)) { row = sheet.createRow(rowNum); } return row; } }