Java tutorial
/* * Copyright (C) 2009 eXo Platform SAS. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.exoplatform.services.document.impl; import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.exoplatform.commons.utils.SecurityHelper; import org.exoplatform.services.document.DocumentReadException; import org.exoplatform.services.log.ExoLogger; import org.exoplatform.services.log.Log; import java.io.IOException; import java.io.InputStream; import java.security.PrivilegedAction; import java.security.PrivilegedExceptionAction; import java.util.Properties; /** * Created by The eXo Platform SAS A parser of Microsoft Word 2007 files (docx). * * @author <a href="mailto:phunghainam@gmail.com">Phung Hai Nam</a> * @author Gennady Azarenkov * @author <a href="mailto:nikolazius@gmail.com">Nikolay Zamosenchuk</a> * @version $Id: MSXWordDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $ * */ public class MSXWordDocumentReader extends BaseDocumentReader { private static final Log LOG = ExoLogger.getLogger("exo.core.component.document.MSXWordDocumentReader"); /** * @see org.exoplatform.services.document.DocumentReader#getMimeTypes() */ public String[] getMimeTypes() { //Supported document types: // "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - "x.docx" // "application/vnd.openxmlformats-officedocument.wordprocessingml.template" - "x.dotx" // "application/vnd.ms-word.document.macroenabled.12" - "x.docm" // "application/vnd.ms-word.template.macroenabled.12" - "x.dotm" return new String[] { "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "application/vnd.ms-word.document.macroenabled.12", "application/vnd.ms-word.template.macroenabled.12" }; } /** * Returns only a text from .docx file content. * * @param is an input stream with .docx file content. * @return The string only with text from file content. */ public String getContentAsText(final InputStream is) throws IOException, DocumentReadException { if (is == null) { throw new IllegalArgumentException("InputStream is null."); } String text = ""; try { if (is.available() == 0) { return ""; } XWPFDocument doc; try { doc = SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<XWPFDocument>() { public XWPFDocument run() throws Exception { return new XWPFDocument(is); } }); } catch (IOException e) { throw new DocumentReadException("Can't open message.", e); } catch (OpenXML4JRuntimeException e) { throw new DocumentReadException("Can't open message.", e); } final XWPFWordExtractor extractor = new XWPFWordExtractor(doc); text = SecurityHelper.doPrivilegedAction(new PrivilegedAction<String>() { public String run() { return extractor.getText(); } }); } finally { if (is != null) { try { is.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } return text.trim(); } /** * @see org.exoplatform.services.document.DocumentReader#getContentAsText(java.io.InputStream, java.lang.String) */ public String getContentAsText(InputStream is, String encoding) throws IOException, DocumentReadException { // Ignore encoding return getContentAsText(is); } /** * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.InputStream) */ public Properties getProperties(final InputStream is) throws IOException, DocumentReadException { POIPropertiesReader reader = new POIPropertiesReader(); reader.readDCProperties( SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<XWPFDocument>() { public XWPFDocument run() throws Exception { return new XWPFDocument(is); } })); return reader.getProperties(); } }