Java tutorial
/* * logic2j - "Bring Logic to your Java" - Copyright (C) 2011 Laurent.Tettoni@gmail.com * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package org.logic2j.contrib.excel; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Sheet; import org.logic2j.core.api.model.exception.PrologNonSpecificError; import org.logic2j.core.impl.util.TypeUtils; import java.io.*; import java.util.ArrayList; import java.util.List; /** * Read Excel files and expose whole content as {@link TabularData}. */ public class ExcelReader { private final File file; private final boolean firstRowIsHeaders; private final int primaryKeyColumn; public ExcelReader(File theFile, boolean theFirstRowIsHeaders) { this(theFile, theFirstRowIsHeaders, -1); } /** * @param theFile * @param theFirstRowIsHeaders True when first row contains column headers. * @param thePrimaryKeyColumn The column (0-based) which should be considered as a unique (primary) key, or -1 for none. */ public ExcelReader(File theFile, boolean theFirstRowIsHeaders, int thePrimaryKeyColumn) { this.file = theFile; this.firstRowIsHeaders = theFirstRowIsHeaders; this.primaryKeyColumn = thePrimaryKeyColumn; } /** * @return Data read from cache and cached. * @throws java.io.IOException */ public TabularData readCached() throws IOException { final File cached = cachedFile(); if (cached.exists() && cached.isFile() && cached.canRead() && cached.lastModified() > this.file.lastModified()) { // We can use the cached version try { return new TabularDataSerializer(cached).read(); } catch (final ClassNotFoundException e) { throw new IOException("Recent cached version of " + this.file + " located at " + cached + " was not loadable: " + e); } } // Read the file final TabularData data = read(); // Cache it cached.getParentFile().mkdirs(); new TabularDataSerializer(cached).write(data); return data; } public TabularData read() throws IOException { if (this.file.getName().endsWith(".xls")) { final InputStream myxls = new FileInputStream(this.file); final HSSFWorkbook workBook = new HSSFWorkbook(myxls); final Sheet sheet = workBook.getSheetAt(0); final int excelPhysicalRows = sheet.getPhysicalNumberOfRows(); List<String> columnNames; if (this.firstRowIsHeaders) { columnNames = readRow(sheet, 0, String.class); } else { final int nbColunms = ((HSSFSheet) sheet).getRow(0).getPhysicalNumberOfCells(); final List<String> colNames = new ArrayList<String>(); for (int i = 0; i < nbColunms; i++) { colNames.add(createSequenceElement(i)); } columnNames = colNames; } final List<List<Serializable>> listData = new ArrayList<List<Serializable>>(); for (int r = this.firstRowIsHeaders ? 1 : 0; r < excelPhysicalRows; r++) { final List<Serializable> listRow = readRow(sheet, r, Serializable.class); if (listRow != null) { // Sometimes listData.add(listRow); } } String dataSetName = this.file.getName(); if (dataSetName.lastIndexOf('.') >= 0) { dataSetName = dataSetName.substring(0, dataSetName.lastIndexOf('.')); } final TabularData tbl = new TabularData(dataSetName, columnNames, listData); tbl.setPrimaryKeyColumn(this.primaryKeyColumn); return tbl; } throw new IOException("According to extension file may not be of Excel format: " + this.file); /* OOXML else if (this.fileName.endsWith(".xlsx")) { if (this.sheet == null) { final XSSFWorkbook workBook = new XSSFWorkbook(this.fileName); this.sheet = workBook.getSheetAt(0); } final int totalRows = this.sheet.getPhysicalNumberOfRows(); for (int i = this.firstRowIsHeaders ? 0 : 1; i < totalRows; i++) { final XSSFRow row = ((XSSFSheet) this.sheet).getRow(i); if (row != null) { final int cells = row.getPhysicalNumberOfCells(); final Term[] args = new Term[cells]; for (int c = 0; c < cells; c++) { final XSSFCell cell = row.getCell(c); String value = ""; switch (cell.getCellType()) { case Cell.CELL_TYPE_FORMULA: value = cell.getCellFormula(); // If it is a formula, then it must be a numeric value. args[c] = this.termAdapter.term(value, FactoryMode.ANY_TERM); break; case Cell.CELL_TYPE_NUMERIC: value = Double.toString(cell.getNumericCellValue()); args[c] = this.termAdapter.term(value, FactoryMode.ANY_TERM); break; case Cell.CELL_TYPE_STRING: value = cell.getStringCellValue(); args[c] = this.termAdapter.term("\"" + value.replace("\"", "").replaceAll("\\r|\\n", "") + "\"", FactoryMode.LITERAL); break; default: args[c] = this.termAdapter.term("\"" + value + "\"", FactoryMode.LITERAL); } // Note: There is a problem if the content of the cell is too long // args[c] = prolog.getTermFactory().create("\""+value.replace("\"", "").replaceAll("\\r|\\n", "")+"\"", // FactoryMode.ANY_TERM); } final Clause cl = new Clause(this.prolog, new Struct(dataSetName, args)); clauses.add(cl); } } } */ } private File cachedFile() { final File tempDir = new File(System.getProperty("java.io.tmpdir")); final String relativePath = this.file.getPath(); final File pathWithinTempDir = new File(tempDir, relativePath); return pathWithinTempDir; } /** * @param sheet * @param rowNumber Row index * @param theTargetClass * @return Null if row is empty or only containing nulls. */ private <T> List<T> readRow(Sheet sheet, final int rowNumber, Class<T> theTargetClass) { final HSSFRow row = ((HSSFSheet) sheet).getRow(rowNumber); if (row == null) { return null; } final int nbCols = row.getPhysicalNumberOfCells(); final ArrayList<T> values = new ArrayList<T>(); boolean hasSomeData = false; for (int c = 0; c < nbCols; c++) { final HSSFCell cell = row.getCell(c); Object value = null; if (cell != null) { switch (cell.getCellType()) { case Cell.CELL_TYPE_FORMULA: value = cell.getCellFormula(); break; case Cell.CELL_TYPE_NUMERIC: value = cell.getNumericCellValue(); break; case Cell.CELL_TYPE_STRING: value = cell.getStringCellValue(); break; case Cell.CELL_TYPE_BLANK: break; default: throw new PrologNonSpecificError("Excel cell at row=" + rowNumber + ", column=" + c + " of type " + cell.getCellType() + " not handled, value is " + value); } } value = mapCellValue(value); if (value != null) { hasSomeData = true; } final T cast = TypeUtils.safeCastOrNull("casting Excel cell", value, theTargetClass); values.add(cast); } if (!hasSomeData) { return null; } return values; } private Object mapCellValue(Object value) { if (value instanceof CharSequence) { return value.toString().trim(); } return value; } // From : http://stackoverflow.com/questions/8710719/generating-an-alphabetic-sequence-in-java private String createSequenceElement(int index) { final int first = index / 26; final int second = index % 26; if (first < 1) { return String.valueOf((char) ('A' + second)); } return createSequenceElement(first) + (char) ('A' + second); } }