com.googlecode.sqlsheet.stream.XlsxSheetIterator.java Source code

Java tutorial

Introduction

Here is the source code for com.googlecode.sqlsheet.stream.XlsxSheetIterator.java

Source

/*
 * Copyright 2012 sqlsheet.googlecode.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.googlecode.sqlsheet.stream;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;

import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.*;
import java.io.InputStream;
import java.net.URL;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/**
 * Streaming iterator over XLSX files
 * Derived from:
 * http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java
 */
public class XlsxSheetIterator extends AbstractXlsSheetIterator {

    /**
     * The type of the data value is indicated by an attribute on the cell.
     * The value is usually in a "v" element within the cell.
     */
    enum xssfDataType {
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
    }

    OPCPackage xlsxPackage;
    XMLEventReader reader;
    StylesTable styles;
    ReadOnlySharedStringsTable strings;
    XSSFSheetEventHandler handler;

    public XlsxSheetIterator(URL filename, String sheetName) throws SQLException {
        super(filename, sheetName);
    }

    @Override
    protected void postConstruct() throws SQLException {
        try {
            //Open and pre process XLSX file
            xlsxPackage = OPCPackage.open(getFileName().getPath(), PackageAccess.READ);
            strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
            XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
            styles = xssfReader.getStylesTable();
            //Find appropriate sheet
            XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
            while (iter.hasNext()) {
                InputStream stream = iter.next();
                String currentSheetName = iter.getSheetName();
                if (currentSheetName.equalsIgnoreCase(getSheetName())
                        || ("\"" + currentSheetName + "\"").equalsIgnoreCase(getSheetName())) {
                    handler = new XSSFSheetEventHandler(styles, strings);
                    XMLInputFactory factory = XMLInputFactory.newInstance();
                    reader = factory.createXMLEventReader(stream);
                    //Start sheet processing
                    while (reader.hasNext() && getCurrentSheetRowIndex() == 0) {
                        processNextEvent();
                    }
                    processNextRecords();
                }
            }
        } catch (Exception e) {
            throw new SQLException(e.getMessage(), e);
        }
    }

    @Override
    protected void processNextRecords() throws SQLException {
        Long nextRowIndex = getCurrentSheetRowIndex() + 2L;
        while (reader.hasNext() && (!getCurrentSheetRowIndex().equals(nextRowIndex))) {
            try {
                processNextEvent();
            } catch (XMLStreamException e) {
                throw new SQLException(e.getMessage(), e);
            }
        }
    }

    @Override
    protected void onClose() throws SQLException {
        try {
            if (xlsxPackage != null) {
                xlsxPackage.close();
            }
            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            throw new SQLException(e.getMessage(), e);
        }
    }

    /**
     * Parses and shows the content of one sheet
     * using the specified styles and shared-strings tables.
     * @throws XMLStreamException if any
     */
    public void processNextEvent() throws XMLStreamException {
        if (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            XMLEvent nextEvent = reader.peek();
            switch (event.getEventType()) {
            case XMLEvent.START_ELEMENT:
                handler.startElement(event.asStartElement());
                if (nextEvent.isCharacters()) {
                    Characters c = reader.nextEvent().asCharacters();
                    if (!c.isWhiteSpace())
                        handler.characters(c.getData().toCharArray());
                }
                break;
            case XMLEvent.END_ELEMENT:
                handler.endElement(event.asEndElement());
                break;
            }
        }
    }

    /**
     * Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
     * <p/>
     * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
     * http://www.ecma-international.org/publications/standards/Ecma-376.htm
     * <p/>
     */
    class XSSFSheetEventHandler {

        /**
         * Table with styles
         */
        private StylesTable stylesTable;

        /**
         * Table with unique strings
         */
        private ReadOnlySharedStringsTable sharedStringsTable;

        // Set when V start element is seen
        private boolean vIsOpen;

        // Set when cell start element is seen;
        // used when cell close element is seen.
        private XlsxSheetIterator.xssfDataType nextDataType;

        // Used to format numeric cell values.
        private short formatIndex;
        private String formatString;
        private final DataFormatter formatter;

        private int thisColumn;
        // The last column printed to the output stream
        private int lastColumnNumber;

        // Gathers characters as they are seen.
        private StringBuffer value;

        /**
         * Accepts objects needed while parsing.
         *
         * @param styles  Table of styles
         * @param strings Table of shared strings
         */
        public XSSFSheetEventHandler(StylesTable styles, ReadOnlySharedStringsTable strings) {
            thisColumn = -1;
            lastColumnNumber = -1;
            this.stylesTable = styles;
            this.sharedStringsTable = strings;
            this.value = new StringBuffer();
            this.nextDataType = XlsxSheetIterator.xssfDataType.NUMBER;
            this.formatter = new DataFormatter();
        }

        public void startElement(StartElement startElement) {
            Map<String, String> attributes = new HashMap<String, String>();
            Iterator attributesIterator = startElement.getAttributes();
            while (attributesIterator.hasNext()) {
                Attribute attr = (Attribute) attributesIterator.next();
                attributes.put(attr.getName().getLocalPart(), attr.getValue());
            }

            if ("inlineStr".equals(startElement.getName().getLocalPart())
                    || "v".equals(startElement.getName().getLocalPart())
                    || "is".equals(startElement.getName().getLocalPart())) {
                vIsOpen = true;
                // Clear contents cache
                value.setLength(0);
            }
            // c => cell
            else if ("c".equals(startElement.getName().getLocalPart())) {
                // Get the cell reference
                String r = attributes.get("r");
                int firstDigit = -1;
                for (int c = 0; c < r.length(); ++c) {
                    if (Character.isDigit(r.charAt(c))) {
                        firstDigit = c;
                        break;
                    }
                }
                thisColumn = nameToColumn(r.substring(0, firstDigit));

                // Set up defaults.
                this.nextDataType = XlsxSheetIterator.xssfDataType.NUMBER;
                this.formatIndex = -1;
                this.formatString = null;
                String cellType = attributes.get("t");
                String cellStyleStr = attributes.get("s");
                if ("b".equals(cellType))
                    nextDataType = XlsxSheetIterator.xssfDataType.BOOL;
                else if ("e".equals(cellType))
                    nextDataType = XlsxSheetIterator.xssfDataType.ERROR;
                else if ("inlineStr".equals(cellType))
                    nextDataType = XlsxSheetIterator.xssfDataType.INLINESTR;
                else if ("s".equals(cellType))
                    nextDataType = XlsxSheetIterator.xssfDataType.SSTINDEX;
                else if ("str".equals(cellType))
                    nextDataType = XlsxSheetIterator.xssfDataType.FORMULA;
                else if (cellStyleStr != null) {
                    // It's a number, but almost certainly one
                    //  with a special style or format
                    int styleIndex = Integer.parseInt(cellStyleStr);
                    XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
                    this.formatIndex = style.getDataFormat();
                    this.formatString = style.getDataFormatString();
                    if (this.formatString == null)
                        this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
                }
            }

        }

        public void endElement(EndElement endElement) {
            CellValueHolder thisCellValue = new CellValueHolder();
            //String thisStr = null;
            // v => contents of a cell
            if ("v".equals(endElement.getName().getLocalPart()) || ("c".equals(endElement.getName().getLocalPart())
                    && xssfDataType.INLINESTR.equals(nextDataType))) {
                // Process the value contents as required.
                // Do now, as characters() may be called more than once
                switch (nextDataType) {
                case BOOL:
                    char first = value.charAt(0);
                    thisCellValue.stringValue = first == '0' ? "FALSE" : "TRUE";
                    break;
                case ERROR:
                    thisCellValue.stringValue = "\"ERROR:" + value.toString() + '"';
                    break;
                case FORMULA:
                    // A formula could result in a string value,
                    // so always add double-quote characters.
                    thisCellValue.stringValue = value.toString();
                    break;
                case INLINESTR:
                    // TODO: have seen an example of this, so it's untested.
                    XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
                    thisCellValue.stringValue = rtsi.toString();
                    break;
                case SSTINDEX:
                    String sstIndex = value.toString();
                    try {
                        int idx = Integer.parseInt(sstIndex);
                        XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
                        thisCellValue.stringValue = rtss.toString();
                    } catch (NumberFormatException ex) {
                        thisCellValue.stringValue = "Failed to parse SST index '" + sstIndex + "': "
                                + ex.toString();
                    }
                    break;
                case NUMBER:
                    String n = value.toString();
                    if (this.formatString != null) {
                        thisCellValue.stringValue = formatter.formatRawCellContents(Double.parseDouble(n),
                                this.formatIndex, this.formatString);
                    } else {
                        thisCellValue.stringValue = n;
                    }
                    thisCellValue.doubleValue = Double.parseDouble(n);
                    thisCellValue.dateValue = convertDateValue(thisCellValue.doubleValue, this.formatIndex,
                            this.formatString);
                    break;
                default:
                    thisCellValue.stringValue = "(TODO: Unexpected type: " + nextDataType + ")";
                    break;
                }
                // Output after we've seen the string contents
                // Emit commas for any fields that were missing on this row
                //Fill empty columns if required
                for (int i = lastColumnNumber + 1; i < thisColumn; ++i) {
                    //  output.print(',');
                    if (getCurrentSheetRowIndex() == 0) {
                        getColumns().add(new CellValueHolder());
                    } else {
                        CellValueHolder empty = new CellValueHolder();
                        addCurrentRowValue(empty);
                    }

                }
                if (lastColumnNumber == -1) {
                    lastColumnNumber = 0;
                }
                // Might be the empty string.
                if (getCurrentSheetRowIndex() == 0) {
                    getColumns().add(thisCellValue);
                } else {
                    addCurrentRowValue(thisCellValue);
                }
                // Update column
                if (thisColumn > -1) {
                    lastColumnNumber = thisColumn;
                }
            } else if ("row".equals(endElement.getName().getLocalPart())) {
                // We're onto a new row
                lastColumnNumber = -1;
                setCurrentSheetRowIndex(getCurrentSheetRowIndex() + 1);
            }

        }

        /**
         * Captures characters only if a suitable element is open.
         * Originally was just "v"; extended for inlineStr also.
         */
        public void characters(char[] ch) {
            if (vIsOpen)
                value.append(ch);
        }

        /**
         * Converts an Excel column name like "C" to a zero-based index.
         *
         * @param name  column name
         * @return Index corresponding to the specified name
         */
        private int nameToColumn(String name) {
            int column = -1;
            for (int i = 0; i < name.length(); ++i) {
                int c = name.charAt(i);
                column = (column + 1) * 26 + c - 'A';
            }
            return column;
        }

    }

}