net.geoprism.data.etl.excel.XSSFSheetXMLHandler.java Source code

Java tutorial

Introduction

Here is the source code for net.geoprism.data.etl.excel.XSSFSheetXMLHandler.java

Source

/**
 * Copyright (c) 2015 TerraFrame, Inc. All rights reserved.
 *
 * This file is part of Runway SDK(tm).
 *
 * Runway SDK(tm) is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * Runway SDK(tm) is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Runway SDK(tm).  If not, see <http://www.gnu.org/licenses/>.
 */
package net.geoprism.data.etl.excel;

import net.geoprism.data.etl.ColumnType;

import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * This class handles the processing of a sheet#.xml sheet part of a XSSF .xlsx file, and generates row and cell events
 * for it.
 */
public class XSSFSheetXMLHandler extends DefaultHandler {
    /**
     * Table with the styles used for formatting
     */
    private StylesTable stylesTable;

    private ReadOnlySharedStringsTable sharedStringsTable;

    /**
     * Where our text is going
     */
    private final SheetHandler output;

    // Set when V start element is seen
    private boolean vIsOpen;

    // Set when F start element is seen
    private boolean fIsOpen;

    // Set when an Inline String "is" is seen
    private boolean isIsOpen;

    // Set when a header/footer element is seen
    private boolean hfIsOpen;

    // Set when cell start element is seen;
    // used when cell close element is seen.
    private ColumnType nextDataType;

    // Used to format numeric cell values.
    private short formatIndex;

    private String formatString;

    /**
     * Formatter for getting the formatted cell values
     */
    private final DataFormatter cellFormatter;

    /**
     * Formatter for getting DATE and NUMBER values in a specific format for further parsing
     */
    private final DataFormatter contentFormatter;

    private String cellRef;

    private boolean formulasNotResults;

    // Gathers characters as they are seen.
    private StringBuffer value = new StringBuffer();

    private StringBuffer formula = new StringBuffer();

    private StringBuffer headerFooter = new StringBuffer();

    /**
     * Accepts objects needed while parsing.
     *
     * @param styles
     *          Table of styles
     * @param strings
     *          Table of shared strings
     */
    public XSSFSheetXMLHandler(StylesTable styles, ReadOnlySharedStringsTable strings,
            SheetHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) {
        this.stylesTable = styles;
        this.sharedStringsTable = strings;
        this.output = sheetContentsHandler;
        this.formulasNotResults = formulasNotResults;
        this.nextDataType = ColumnType.NUMBER;

        this.contentFormatter = dataFormatter;
        this.cellFormatter = new DataFormatter();
    }

    /**
     * Accepts objects needed while parsing.
     *
     * @param styles
     *          Table of styles
     * @param strings
     *          Table of shared strings
     */
    public XSSFSheetXMLHandler(StylesTable styles, ReadOnlySharedStringsTable strings,
            SheetHandler sheetContentsHandler, boolean formulasNotResults) {
        this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
    }

    private boolean isTextTag(String name) {
        if ("v".equals(name)) {
            // Easy, normal v text tag
            return true;
        }
        if ("inlineStr".equals(name)) {
            // Easy inline string
            return true;
        }
        if ("t".equals(name) && isIsOpen) {
            // Inline string <is><t>...</t></is> pair
            return true;
        }
        // It isn't a text tag
        return false;
    }

    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

        if (isTextTag(name)) {
            vIsOpen = true;
            // Clear contents cache
            value.setLength(0);
        } else if ("is".equals(name)) {
            // Inline string outer tag
            isIsOpen = true;
        } else if ("f".equals(name)) {
            // Clear contents cache
            formula.setLength(0);

            // Mark us as being a formula if not already
            if (nextDataType == ColumnType.NUMBER) {
                nextDataType = ColumnType.FORMULA;
            }

            // Decide where to get the formula string from
            String type = attributes.getValue("t");
            if (type != null && type.equals("shared")) {
                // Is it the one that defines the shared, or uses it?
                String ref = attributes.getValue("ref");
                String si = attributes.getValue("si");

                if (ref != null) {
                    // This one defines it
                    // TODO Save it somewhere
                    fIsOpen = true;
                } else {
                    // This one uses a shared formula
                    // TODO Retrieve the shared formula and tweak it to
                    // match the current cell
                    if (formulasNotResults) {
                        System.err.println("Warning - shared formulas not yet supported!");
                    } else {
                        // It's a shared formula, so we can't get at the formula string yet
                        // However, they don't care about the formula string, so that's ok!
                    }
                }
            } else {
                fIsOpen = true;
            }
        } else if ("oddHeader".equals(name) || "evenHeader".equals(name) || "firstHeader".equals(name)
                || "firstFooter".equals(name) || "oddFooter".equals(name) || "evenFooter".equals(name)) {
            hfIsOpen = true;
            // Clear contents cache
            headerFooter.setLength(0);
        } else if ("row".equals(name)) {
            int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
            output.startRow(rowNum);
        }
        // c => cell
        else if ("c".equals(name)) {
            // Set up defaults.
            this.nextDataType = ColumnType.NUMBER;
            this.formatIndex = -1;
            this.formatString = null;
            cellRef = attributes.getValue("r");
            String cellType = attributes.getValue("t");
            String cellStyleStr = attributes.getValue("s");
            if ("b".equals(cellType))
                nextDataType = ColumnType.BOOLEAN;
            else if ("e".equals(cellType))
                nextDataType = ColumnType.ERROR;
            else if ("inlineStr".equals(cellType))
                nextDataType = ColumnType.INLINE_STRING;
            else if ("s".equals(cellType))
                nextDataType = ColumnType.TEXT;
            else if ("str".equals(cellType))
                nextDataType = ColumnType.FORMULA;
            else if (cellStyleStr != null) {
                // Number, but almost certainly with a special style or format
                int styleIndex = Integer.parseInt(cellStyleStr);
                XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
                this.formatIndex = style.getDataFormat();
                this.formatString = style.getDataFormatString();
                if (this.formatString == null)
                    this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
            }
        }
    }

    public void endElement(String uri, String localName, String name) throws SAXException {
        String thisStr = null;
        String thisData = null;

        // v => contents of a cell
        if (isTextTag(name)) {
            vIsOpen = false;

            // Process the value contents as required, now we have it all
            switch (nextDataType) {
            case BOOLEAN:
                char first = value.charAt(0);
                thisStr = first == '0' ? "FALSE" : "TRUE";
                thisData = new String(thisStr);
                break;

            case ERROR:
                thisStr = "ERROR:" + value.toString();
                thisData = new String(thisStr);
                break;

            case FORMULA:
                if (formulasNotResults) {
                    thisStr = formula.toString();
                    thisData = new String(thisStr);
                } else {
                    String fv = value.toString();

                    if (this.formatString != null) {
                        try {
                            // Try to use the value as a formattable number
                            double d = Double.parseDouble(fv);
                            thisStr = this.cellFormatter.formatRawCellContents(d, this.formatIndex,
                                    this.formatString);
                            thisData = this.contentFormatter.formatRawCellContents(d, this.formatIndex,
                                    this.formatString);
                        } catch (NumberFormatException e) {
                            // Formula is a String result not a Numeric one
                            thisStr = fv;
                            thisData = new String(thisStr);
                        }
                    } else {
                        // No formating applied, just do raw value in all cases
                        thisStr = fv;
                        thisData = new String(thisStr);
                    }
                }
                break;

            case INLINE_STRING:
                // TODO: Can these ever have formatting on them?
                XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
                thisStr = rtsi.toString();
                thisData = new String(thisStr);
                break;

            case TEXT:
                String sstIndex = value.toString();
                try {
                    int idx = Integer.parseInt(sstIndex);
                    XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
                    thisStr = rtss.toString();
                    thisData = new String(thisStr);
                } catch (NumberFormatException ex) {
                    System.err.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
                }
                break;

            case NUMBER:
                String n = value.toString();
                if (this.formatString != null) {
                    thisStr = cellFormatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex,
                            this.formatString);
                    thisData = this.contentFormatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex,
                            this.formatString);
                } else {
                    thisStr = n;
                    thisData = new String(thisStr);
                }
                break;

            default:
                thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
                thisData = new String(thisStr);
                break;
            }

            if (DateUtil.isADateFormat(this.formatIndex, this.formatString)) {
                output.cell(cellRef, thisData, thisStr, ColumnType.DATE);
            } else {
                output.cell(cellRef, thisData, thisStr, nextDataType);
            }
        } else if ("f".equals(name)) {
            fIsOpen = false;
        } else if ("is".equals(name)) {
            isIsOpen = false;
        } else if ("row".equals(name)) {
            output.endRow();
        } else if ("oddHeader".equals(name) || "evenHeader".equals(name) || "firstHeader".equals(name)) {
            hfIsOpen = false;
            output.headerFooter(headerFooter.toString(), true, name);
        } else if ("oddFooter".equals(name) || "evenFooter".equals(name) || "firstFooter".equals(name)) {
            hfIsOpen = false;
            output.headerFooter(headerFooter.toString(), false, name);
        }
    }

    /**
     * Captures characters only if a suitable element is open. Originally was just "v"; extended for inlineStr also.
     */
    public void characters(char[] ch, int start, int length) throws SAXException {
        if (vIsOpen) {
            value.append(ch, start, length);
        }
        if (fIsOpen) {
            formula.append(ch, start, length);
        }
        if (hfIsOpen) {
            headerFooter.append(ch, start, length);
        }
    }
}