Java tutorial
/* * Copyright (C) 2009 eXo Platform SAS. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.exoplatform.services.document.impl; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.exoplatform.commons.utils.SecurityHelper; import org.exoplatform.services.document.DocumentReadException; import org.exoplatform.services.log.ExoLogger; import org.exoplatform.services.log.Log; import java.io.IOException; import java.io.InputStream; import java.security.PrivilegedExceptionAction; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Properties; /** * Created by The eXo Platform SAS A parser of Microsoft Excel 2007 files (xlsx). * * @author <a href="mailto:phunghainam@gmail.com">Phung Hai Nam</a> * @author Gennady Azarenkov * @author <a href="mailto:nikolazius@gmail.com">Nikolay Zamosenchuk</a> * @version $Id: MSXExcelDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $ * */ public class MSXExcelDocumentReader extends BaseDocumentReader { private static final Log LOG = ExoLogger.getLogger("exo.core.component.document.MSXExcelDocumentReader"); private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ"; /** * @see org.exoplatform.services.document.DocumentReader#getMimeTypes() */ public String[] getMimeTypes() { //Supported mimetypes: // "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - "x.xlsx" // //Unsupported mimetypes: // "application/vnd.ms-excel.sheet.binary.macroenabled.12" - "*.xlsb"; There is exceptions at parsing // "application/vnd.openxmlformats-officedocument.spreadsheetml.template" - "x.xltx"; Not tested // "application/vnd.ms-excel.sheet.macroenabled.12" - "x.xlsm"; Not tested // "application/vnd.ms-excel.template.macroenabled.12" - "x.xltm"; Not tested // "application/vnd.ms-excel.addin.macroenabled.12" - "x.xlam"; Not tested return new String[] { "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" }; } /** * Returns only a text from .xlsx file content. * * @param is an input stream with .xls file content. * @return The string only with text from file content. */ public String getContentAsText(final InputStream is) throws IOException, DocumentReadException { if (is == null) { throw new IllegalArgumentException("InputStream is null."); } StringBuilder builder = new StringBuilder(""); SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT); try { if (is.available() == 0) { return ""; } XSSFWorkbook wb; try { wb = SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<XSSFWorkbook>() { public XSSFWorkbook run() throws Exception { return new XSSFWorkbook(is); } }); } catch (IOException e) { throw new DocumentReadException("Can't open spreadsheet.", e); } catch (OpenXML4JRuntimeException e) { return builder.toString(); } for (int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++) { XSSFSheet sheet = wb.getSheetAt(sheetNum); if (sheet != null) { for (int rowNum = sheet.getFirstRowNum(); rowNum <= sheet.getLastRowNum(); rowNum++) { XSSFRow row = sheet.getRow(rowNum); if (row != null) { int lastcell = row.getLastCellNum(); for (int k = 0; k < lastcell; k++) { XSSFCell cell = row.getCell(k); if (cell != null) { switch (cell.getCellType()) { case XSSFCell.CELL_TYPE_NUMERIC: { double d = cell.getNumericCellValue(); if (isCellDateFormatted(cell)) { Date date = HSSFDateUtil.getJavaDate(d); String cellText = dateFormat.format(date); builder.append(cellText).append(" "); } else { builder.append(d).append(" "); } break; } case XSSFCell.CELL_TYPE_FORMULA: builder.append(cell.getCellFormula().toString()).append(" "); break; case XSSFCell.CELL_TYPE_BOOLEAN: builder.append(cell.getBooleanCellValue()).append(" "); break; case XSSFCell.CELL_TYPE_ERROR: builder.append(cell.getErrorCellValue()).append(" "); break; case XSSFCell.CELL_TYPE_STRING: builder.append(cell.getStringCellValue().toString()).append(" "); break; default: break; } } } } } } } } finally { if (is != null) { try { is.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } return builder.toString(); } public String getContentAsText(InputStream is, String encoding) throws IOException, DocumentReadException { // Ignore encoding return getContentAsText(is); } /* * (non-Javadoc) * * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io. * InputStream) */ public Properties getProperties(final InputStream is) throws IOException, DocumentReadException { POIPropertiesReader reader = new POIPropertiesReader(); reader.readDCProperties( SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<XSSFWorkbook>() { public XSSFWorkbook run() throws Exception { return new XSSFWorkbook(is); } })); return reader.getProperties(); } public static boolean isCellDateFormatted(XSSFCell cell) { boolean bDate = false; double d = cell.getNumericCellValue(); if (HSSFDateUtil.isValidExcelDate(d)) { XSSFCellStyle style = cell.getCellStyle(); int i = style.getDataFormat(); switch (i) { case 0xe: // m/d/yy case 0xf: // d-mmm-yy case 0x10: // d-mmm case 0x11: // mmm-yy case 0x12: // h:mm AM/PM case 0x13: // h:mm:ss AM/PM case 0x14: // h:mm case 0x15: // h:mm:ss case 0x16: // m/d/yy h:mm case 0x2d: // mm:ss case 0x2e: // [h]:mm:ss case 0x2f: // mm:ss.0 case 0xa5: // ?? case 0xa7: // ?? case 0xa9: // ?? case 0xac: // mm:dd:yy not specified in javadoc case 0xad: // yyyy-mm-dd not specified in javadoc case 0xae: // mm:dd:yyyy not specified in javadoc case 0xaf: // m:d:yy not specified in javadoc bDate = true; break; default: bDate = false; break; } } return bDate; } }