Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /** * ExcelSpreadSheetReader.java * Copyright (C) 2010-2016 University of Waikato, Hamilton, New Zealand */ package adams.data.io.input; import adams.core.DateFormat; import adams.core.DateTime; import adams.core.DateUtils; import adams.core.Utils; import adams.data.io.output.ExcelSpreadSheetWriter; import adams.data.io.output.SpreadSheetWriter; import adams.data.spreadsheet.SpreadSheet; import adams.data.spreadsheet.SpreadSheetUtils; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; /** <!-- globalinfo-start --> * Reads MS Excel files (using DOM). * <br><br> <!-- globalinfo-end --> * <!-- options-start --> * <pre>-logging-level <OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST> (property: loggingLevel) * The logging level for outputting errors and debugging output. * default: WARNING * </pre> * * <pre>-data-row-type <adams.data.spreadsheet.DataRow> (property: dataRowType) * The type of row to use for the data. * default: adams.data.spreadsheet.DenseDataRow * </pre> * * <pre>-spreadsheet-type <adams.data.spreadsheet.SpreadSheet> (property: spreadSheetType) * The type of spreadsheet to use for the data. * default: adams.data.spreadsheet.DefaultSpreadSheet * </pre> * * <pre>-sheets <adams.core.Range> (property: sheetRange) * The range of sheets to load. * default: first * example: A range is a comma-separated list of single 1-based indices or sub-ranges of indices ('start-end'); 'inv(...)' inverts the range '...'; the following placeholders can be used as well: first, second, third, last_2, last_1, last * </pre> * * <pre>-missing <java.lang.String> (property: missingValue) * The placeholder for missing values. * default: * </pre> * * <pre>-no-auto-extend-header <boolean> (property: autoExtendHeader) * If enabled, the header gets automatically extended if rows have more cells * than the header. * default: true * </pre> * * <pre>-text-columns <adams.core.Range> (property: textColumns) * The range of columns to treat as text. * default: * example: A range is a comma-separated list of single 1-based indices or sub-ranges of indices ('start-end'); 'inv(...)' inverts the range '...'; the following placeholders can be used as well: first, second, third, last_2, last_1, last * </pre> * * <pre>-no-header <boolean> (property: noHeader) * If enabled, all rows get added as data rows and a dummy header will get * inserted. * default: false * </pre> * * <pre>-custom-column-headers <java.lang.String> (property: customColumnHeaders) * The custom headers to use for the columns instead (comma-separated list); * ignored if empty. * default: * </pre> * * <pre>-first-row <int> (property: firstRow) * The index of the first row to retrieve (1-based). * default: 1 * minimum: 1 * </pre> * * <pre>-num-rows <int> (property: numRows) * The number of data rows to retrieve; use -1 for unlimited. * default: -1 * minimum: -1 * </pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public class ExcelSpreadSheetReader extends AbstractExcelSpreadSheetReader { /** for serialization. */ private static final long serialVersionUID = 4755872204697328246L; /** * Returns a string describing the object. * * @return a description suitable for displaying in the gui */ @Override public String globalInfo() { return "Reads MS Excel files (using DOM)."; } /** * Returns a string describing the format (used in the file chooser). * * @return a description suitable for displaying in the * file chooser */ @Override public String getFormatDescription() { return "MS Excel spreadsheets"; } /** * Returns the extension(s) of the format. * * @return the extension (without the dot!) */ @Override public String[] getFormatExtensions() { return new String[] { "xls", "xlsx" }; } /** * Returns, if available, the corresponding writer. * * @return the writer, null if none available */ public SpreadSheetWriter getCorrespondingWriter() { return new ExcelSpreadSheetWriter(); } /** * Returns how to read the data, from a file, stream or reader. * * @return how to read the data */ @Override protected InputType getInputType() { return InputType.STREAM; } /** * Turns a numeric cell into a string. Tries to use "long" representation * if possible. * * @param cell the cell to process * @return the string representation */ protected String numericToString(Cell cell) { Double dbl; long lng; dbl = cell.getNumericCellValue(); lng = dbl.longValue(); if (dbl == lng) return "" + lng; else return "" + dbl; } /** * Reads the spreadsheet content from the specified file. * * @param in the input stream to read from * @return the spreadsheets or null in case of an error */ @Override protected List<SpreadSheet> doReadRange(InputStream in) { List<SpreadSheet> result; int[] indices; Workbook workbook; Sheet sheet; SpreadSheet spsheet; Row exRow; Cell exCell; adams.data.spreadsheet.Row spRow; int i; int n; int cellType; DateFormat dformat; boolean numeric; int dataRowStart; int firstRow; int lastRow; List<String> header; result = new ArrayList<>(); workbook = null; dformat = DateUtils.getTimestampFormatter(); try { workbook = WorkbookFactory.create(in); m_SheetRange.setMax(workbook.getNumberOfSheets()); indices = m_SheetRange.getIntIndices(); firstRow = m_FirstRow - 1; dataRowStart = getNoHeader() ? firstRow : firstRow + 1; for (int index : indices) { if (m_Stopped) break; spsheet = m_SpreadSheetType.newInstance(); spsheet.setDataRowClass(m_DataRowType.getClass()); result.add(spsheet); if (isLoggingEnabled()) getLogger().info("sheet: " + (index + 1)); sheet = workbook.getSheetAt(index); if (sheet.getLastRowNum() == 0) { getLogger().severe("No rows in sheet #" + index); return null; } spsheet.setName(sheet.getSheetName()); // header if (isLoggingEnabled()) getLogger().info("header row"); exRow = sheet.getRow(firstRow); if (exRow == null) { getLogger().warning("No data in sheet #" + (index + 1) + "?"); } else if (exRow != null) { spRow = spsheet.getHeaderRow(); m_TextColumns.setMax(exRow.getLastCellNum()); if (getNoHeader()) { header = SpreadSheetUtils.createHeader(exRow.getLastCellNum(), m_CustomColumnHeaders); for (i = 0; i < header.size(); i++) spRow.addCell("" + (i + 1)).setContent(header.get(i)); } else { if (!m_CustomColumnHeaders.trim().isEmpty()) { header = SpreadSheetUtils.createHeader(exRow.getLastCellNum(), m_CustomColumnHeaders); for (i = 0; i < header.size(); i++) spRow.addCell("" + (i + 1)).setContent(header.get(i)); } else { for (i = 0; i < exRow.getLastCellNum(); i++) { if (m_Stopped) break; exCell = exRow.getCell(i); if (exCell == null) { spRow.addCell("" + (i + 1)).setMissing(); continue; } numeric = !m_TextColumns.isInRange(i); switch (exCell.getCellType()) { case Cell.CELL_TYPE_BLANK: case Cell.CELL_TYPE_ERROR: spRow.addCell("" + (i + 1)).setContent("column-" + (i + 1)); break; case Cell.CELL_TYPE_NUMERIC: if (HSSFDateUtil.isCellDateFormatted(exCell)) spRow.addCell("" + (i + 1)).setContent(new DateTime( HSSFDateUtil.getJavaDate(exCell.getNumericCellValue()))); else if (numeric) spRow.addCell("" + (i + 1)).setContent(exCell.getNumericCellValue()); else spRow.addCell("" + (i + 1)).setContentAsString(numericToString(exCell)); break; default: spRow.addCell("" + (i + 1)).setContentAsString(exCell.getStringCellValue()); } } } } } // data if (spsheet.getColumnCount() > 0) { if (m_NumRows < 1) lastRow = sheet.getLastRowNum(); else lastRow = Math.min(firstRow + m_NumRows - 1, sheet.getLastRowNum()); for (i = dataRowStart; i <= lastRow; i++) { if (m_Stopped) break; if (isLoggingEnabled()) getLogger().info("data row: " + (i + 1)); spRow = spsheet.addRow("" + spsheet.getRowCount()); exRow = sheet.getRow(i); if (exRow == null) continue; for (n = 0; n < exRow.getLastCellNum(); n++) { // too few columns in header? if ((n >= spsheet.getHeaderRow().getCellCount()) && m_AutoExtendHeader) spsheet.insertColumn(spsheet.getColumnCount(), ""); m_TextColumns.setMax(spsheet.getHeaderRow().getCellCount()); exCell = exRow.getCell(n); if (exCell == null) { spRow.addCell(n).setMissing(); continue; } cellType = exCell.getCellType(); if (cellType == Cell.CELL_TYPE_FORMULA) cellType = exCell.getCachedFormulaResultType(); numeric = !m_TextColumns.isInRange(n); switch (cellType) { case Cell.CELL_TYPE_BLANK: case Cell.CELL_TYPE_ERROR: if (m_MissingValue.isEmpty()) spRow.addCell(n).setMissing(); else spRow.addCell(n).setContent(""); break; case Cell.CELL_TYPE_NUMERIC: if (HSSFDateUtil.isCellDateFormatted(exCell)) spRow.addCell(n).setContent( dformat.format(HSSFDateUtil.getJavaDate(exCell.getNumericCellValue()))); else if (numeric) spRow.addCell(n).setContent(exCell.getNumericCellValue()); else spRow.addCell(n).setContentAsString(numericToString(exCell)); break; default: if (m_MissingValue.isMatch(exCell.getStringCellValue())) spRow.addCell(n).setMissing(); else spRow.addCell(n).setContentAsString(exCell.getStringCellValue()); } } } } } } catch (Exception ioe) { getLogger().log(Level.SEVERE, "Failed to read range '" + m_SheetRange + "':", ioe); result = null; m_LastError = "Failed to read range '" + m_SheetRange + "' from stream!\n" + Utils.throwableToString(ioe); } return result; } }