org.olat.search.service.document.file.ExcelOOXMLDocument.java Source code

Java tutorial

Introduction

Here is the source code for org.olat.search.service.document.file.ExcelOOXMLDocument.java

Source

/**
 * OLAT - Online Learning and Training<br>
 * http://www.olat.org
 * <p>
 * Licensed under the Apache License, Version 2.0 (the "License"); <br>
 * you may not use this file except in compliance with the License.<br>
 * You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing,<br>
 * software distributed under the License is distributed on an "AS IS" BASIS, <br>
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
 * See the License for the specific language governing permissions and <br>
 * limitations under the License.
 * <p>
 * Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
 * University of Zurich, Switzerland.
 * <p>
 */

package org.olat.search.service.document.file;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.util.Iterator;

import org.apache.lucene.document.Document;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Comment;
import org.apache.poi.ss.usermodel.HeaderFooter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.vfs.VFSLeaf;
import org.olat.search.service.SearchResourceContext;

/**
 * Description:<br>
 * Parse the Excel XML document (.xslx) with Apache POI
 * <P>
 * Initial Date: 14 dec. 2009 <br>
 * 
 * @author srosse, stephane.rosse@frentix.com
 */
public class ExcelOOXMLDocument extends FileDocument {
    private static final OLog log = Tracing.createLoggerFor(ExcelOOXMLDocument.class);

    public final static String FILE_TYPE = "type.file.excel";

    public ExcelOOXMLDocument() {
        super();
    }

    public static Document createDocument(final SearchResourceContext leafResourceContext, final VFSLeaf leaf)
            throws IOException, DocumentException, DocumentAccessException {
        final ExcelOOXMLDocument excelDocument = new ExcelOOXMLDocument();
        excelDocument.init(leafResourceContext, leaf);
        excelDocument.setFileType(FILE_TYPE);
        excelDocument.setCssIcon("b_filetype_xls");
        if (log.isDebug()) {
            log.debug(excelDocument.toString());
        }
        return excelDocument.getLuceneDocument();
    }

    @Override
    protected String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
        BufferedInputStream bis = null;
        final StringBuilder buffy = new StringBuilder();
        try {
            bis = new BufferedInputStream(leaf.getInputStream());
            final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
            final POIXMLDocument document = extractor.getDocument();

            if (document instanceof XSSFWorkbook) {
                final XSSFWorkbook xDocument = (XSSFWorkbook) document;
                extractContent(buffy, xDocument);
            }

            return buffy.toString();
        } catch (final Exception e) {
            throw new DocumentException(e.getMessage());
        } finally {
            if (bis != null) {
                bis.close();
            }
        }
    }

    private void extractContent(final StringBuilder buffy, final XSSFWorkbook document) {
        for (int i = 0; i < document.getNumberOfSheets(); i++) {
            final XSSFSheet sheet = document.getSheetAt(i);
            buffy.append(document.getSheetName(i)).append(' ');

            // Header(s), if present
            extractHeaderFooter(buffy, sheet.getFirstHeader());
            extractHeaderFooter(buffy, sheet.getOddHeader());
            extractHeaderFooter(buffy, sheet.getEvenHeader());

            // Rows and cells
            for (final Object rawR : sheet) {
                final Row row = (Row) rawR;
                for (final Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
                    final Cell cell = ri.next();

                    if (cell.getCellType() == Cell.CELL_TYPE_FORMULA
                            || cell.getCellType() == Cell.CELL_TYPE_STRING) {
                        buffy.append(cell.getRichStringCellValue().getString()).append(' ');
                    } else {
                        final XSSFCell xc = (XSSFCell) cell;
                        final String rawValue = xc.getRawValue();
                        if (rawValue != null) {
                            buffy.append(rawValue).append(' ');
                        }

                    }

                    // Output the comment in the same cell as the content
                    final Comment comment = cell.getCellComment();
                    if (comment != null) {
                        buffy.append(comment.getString().getString()).append(' ');
                    }
                }
            }

            // Finally footer(s), if present
            extractHeaderFooter(buffy, sheet.getFirstFooter());
            extractHeaderFooter(buffy, sheet.getOddFooter());
            extractHeaderFooter(buffy, sheet.getEvenFooter());
        }
    }

    private void extractHeaderFooter(final StringBuilder buffy, final HeaderFooter hf) {
        final String content = ExcelExtractor._extractHeaderFooter(hf);
        if (content.length() > 0) {
            buffy.append(content).append(' ');
        }
    }
}