ec.util.spreadsheet.poi.FastPoiBook.java Source code

Java tutorial

Introduction

Here is the source code for ec.util.spreadsheet.poi.FastPoiBook.java

Source

/*
 * Copyright 2013 National Bank of Belgium
 *
 * Licensed under the EUPL, Version 1.1 or  as soon they will be approved 
 * by the European Commission - subsequent versions of the EUPL (the "Licence");
 * You may not use this work except in compliance with the Licence.
 * You may obtain a copy of the Licence at:
 *
 * http://ec.europa.eu/idabc/eupl
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the Licence is distributed on an "AS IS" basis,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the Licence for the specific language governing permissions and 
 * limitations under the Licence.
 */
package ec.util.spreadsheet.poi;

import ec.util.spreadsheet.Book;
import ec.util.spreadsheet.Sheet;
import ec.util.spreadsheet.helpers.ArraySheet;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.Immutable;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 *
 * @author Philippe Charles
 */
final class FastPoiBook extends Book {

    /**
     * Opens a book from a file.
     *
     * @param file
     * @return
     * @throws IOException
     * @throws OpenXML4JException
     */
    @Nonnull
    public static FastPoiBook create(@Nonnull File file) throws IOException, OpenXML4JException {
        return create(OPCPackage.open(file.getPath(), PackageAccess.READ));
    }

    /**
     * Opens a book from a stream.
     *
     * Note from OPCPackage: uses quite a bit more memory than
     * {@link #create(File)}, which doesn't need to hold the whole zip file in
     * memory, and can take advantage of native methods
     *
     * @param stream
     * @return
     * @throws IOException
     * @throws OpenXML4JException
     */
    @Nonnull
    public static FastPoiBook create(@Nonnull InputStream stream) throws IOException, OpenXML4JException {
        return create(OPCPackage.open(stream));
    }

    @Nonnull
    private static FastPoiBook create(@Nonnull OPCPackage pkg) throws IOException, OpenXML4JException {
        XSSFReader reader = new XSSFReader(pkg);
        WorkbookData workbookData = new WorkbookDataSax2EventHandler().parse(newWorkBookDataSupplier(reader));
        FastPoiContext sheetContext = new FastPoiContext(
                new SharedStringsDataSax2EventHandler().parse(newSharedStringsDataSupplier(reader)),
                new StylesDataSax2EventHandler().parse(newStylesDataSupplier(reader)), workbookData.date1904);
        return new FastPoiBook(pkg, reader, workbookData.sheets, sheetContext);
    }

    private final OPCPackage pkg;
    private final XSSFReader reader;
    private final List<SheetMeta> sheets;
    private final FastPoiContext sheetContext;

    private FastPoiBook(OPCPackage pkg, XSSFReader reader, List<SheetMeta> sheets, FastPoiContext sheetContext) {
        this.pkg = pkg;
        this.reader = reader;
        this.sheets = sheets;
        this.sheetContext = sheetContext;
    }

    @Override
    public void close() throws IOException {
        pkg.close();
    }

    @Override
    public int getSheetCount() {
        return sheets.size();
    }

    @Override
    public Sheet getSheet(int index) throws IOException {
        SheetMeta sheetMeta = sheets.get(index);
        return new SheetSax2EventHandler(sheetMeta.name, sheetContext)
                .parse(newSheetSupplier(reader, sheetMeta.relationId));
    }

    //<editor-fold defaultstate="collapsed" desc="Local structures">
    //    @VisibleForTesting
    @Immutable
    static final class WorkbookData {

        public final List<SheetMeta> sheets;
        public final boolean date1904;

        public WorkbookData(List<SheetMeta> sheets, boolean date1904) {
            this.sheets = sheets;
            this.date1904 = date1904;
        }
    }

    //    @VisibleForTesting
    @Immutable
    static final class SheetMeta {

        public final String relationId;
        public final String name;

        public SheetMeta(String relationId, String name) {
            this.relationId = relationId;
            this.name = name;
        }
    }
    //</editor-fold>

    //<editor-fold defaultstate="collapsed" desc="ByteSource suppliers">
    private static ByteSource newWorkBookDataSupplier(final XSSFReader reader) {
        return new ByteSource() {
            @Override
            public InputStream openStream() throws IOException {
                try {
                    return reader.getWorkbookData();
                } catch (InvalidFormatException ex) {
                    throw new IOException(ex);
                }
            }
        };
    }

    private static ByteSource newSharedStringsDataSupplier(final XSSFReader reader) {
        return new ByteSource() {
            @Override
            public InputStream openStream() throws IOException {
                try {
                    return reader.getSharedStringsData();
                } catch (InvalidFormatException ex) {
                    throw new IOException(ex);
                }
            }
        };
    }

    private static ByteSource newStylesDataSupplier(final XSSFReader reader) {
        return new ByteSource() {
            @Override
            public InputStream openStream() throws IOException {
                try {
                    return reader.getStylesData();
                } catch (InvalidFormatException ex) {
                    throw new IOException(ex);
                }
            }
        };
    }

    private static ByteSource newSheetSupplier(final XSSFReader reader, final String relationId) {
        return new ByteSource() {
            @Override
            public InputStream openStream() throws IOException {
                try {
                    return reader.getSheet(relationId);
                } catch (InvalidFormatException ex) {
                    throw new IOException(ex);
                }
            }
        };
    }
    //</editor-fold>

    //<editor-fold defaultstate="collapsed" desc="Sax2 event handlers">
    //    @VisibleForTesting
    static abstract class FluentHandler<T> extends DefaultHandler /*implements IBuilder<T>*/ {

        public final T parse(ByteSource byteSource) throws IOException {
            try (InputStream stream = byteSource.openStream()) {
                if (stream != null) {
                    try {
                        XMLReader reader = XMLReaderFactory.createXMLReader();
                        reader.setContentHandler(this);
                        reader.parse(new InputSource(stream));
                    } catch (SAXException ex) {
                        throw new RuntimeException("While parsing xml", ex);
                    }
                }
            }
            return build();
        }

        abstract public T build();
    }

    private static final class SaxStringBuilder {

        private boolean enabled = false;
        private StringBuilder content = new StringBuilder();

        public SaxStringBuilder clear() {
            content = new StringBuilder();
            return this;
        }

        public boolean isEnabled() {
            return enabled;
        }

        public SaxStringBuilder enable() {
            this.enabled = true;
            return this;
        }

        public SaxStringBuilder disable() {
            this.enabled = false;
            return this;
        }

        public CharSequence build() {
            // we defer CharSequence@toString()
            return content;
        }

        public SaxStringBuilder appendIfNeeded(char[] ch, int start, int length) {
            if (isEnabled()) {
                content.append(ch, start, length);
            }
            return this;
        }
    }

    /**
     * FIXME: missing support of inline string <is><t>hello</t></is>
     */
    //    @VisibleForTesting
    static final class SheetSax2EventHandler extends FluentHandler<Sheet> {

        private static final String CELL_TAG = "c";
        private static final String REFERENCE_ATTRIBUTE = "r";
        private static final String STYLE_INDEX_ATTRIBUTE = "s";
        private static final String CELL_DATA_TYPE_ATTRIBUTE = "t";
        private static final String CELL_VALUE_TAG = "v";
        private static final String SHEET_DIMENSIONS_TAG = "dimension";
        private static final String SHEET_BOUNDS_ATTRIBUTE = "ref";
        private static final String SHEET_DATA_TAG = "sheetData";
        //private static final String INLINE_STRING_TAG = "is";
        //
        private final String sheetName;
        private final FastPoiContext sheetContext;
        private final SaxStringBuilder stringBuilder;
        //
        private String sheetBounds;
        private FastPoiSheetBuilder sheetBuilder;
        private String ref;
        private String rawDataType;
        private String rawStyleIndex;

        public SheetSax2EventHandler(String sheetName, FastPoiContext sheetContext) {
            this.sheetName = sheetName;
            this.sheetContext = sheetContext;
            this.stringBuilder = new SaxStringBuilder();
            //
            this.sheetBounds = null;
            this.sheetBuilder = null;
            this.ref = null;
            this.rawDataType = null;
            this.rawStyleIndex = null;
        }

        @Override
        public Sheet build() {
            return sheetBuilder != null ? sheetBuilder.build() : ArraySheet.copyOf(sheetName, new Object[][] {});
        }

        @Override
        public void startElement(String uri, String localName, String name, Attributes attributes)
                throws SAXException {
            switch (name) {
            case CELL_TAG:
                ref = attributes.getValue(REFERENCE_ATTRIBUTE);
                rawDataType = attributes.getValue(CELL_DATA_TYPE_ATTRIBUTE);
                rawStyleIndex = attributes.getValue(STYLE_INDEX_ATTRIBUTE);
                break;
            case CELL_VALUE_TAG:
                stringBuilder.enable().clear();
                break;
            case SHEET_DIMENSIONS_TAG:
                sheetBounds = attributes.getValue(SHEET_BOUNDS_ATTRIBUTE);
                break;
            case SHEET_DATA_TAG:
                sheetBuilder = FastPoiSheetBuilder.create(sheetName, sheetContext, sheetBounds);
                break;
            }
        }

        @Override
        public void endElement(String uri, String localName, String name) throws SAXException {
            if (stringBuilder.isEnabled() && (name.equals(CELL_VALUE_TAG) /*|| name.equals(INLINE_STRING_TAG)*/)) {
                sheetBuilder.put(ref, stringBuilder.disable().build(), rawDataType, rawStyleIndex);
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            stringBuilder.appendIfNeeded(ch, start, length);
        }
    }

    /**
     * http://msdn.microsoft.com/en-us/library/office/documentformat.openxml.spreadsheet.aspx
     */
    //    @VisibleForTesting
    static final class WorkbookDataSax2EventHandler extends FluentHandler<WorkbookData> {

        private static final String SHEET_TAG = "sheet";
        private static final String WORKBOOK_PROPERTIES_TAG = "workbookPr";
        private static final String DATE1904_ATTRIBUTE = "date1904";
        private static final String SHEET_TAB_ID_ATTRIBUTE = "r:id";
        private static final String SHEET_NAME_ATTRIBUTE = "name";
        //
        private final List<SheetMeta> sheets;
        private boolean date1904;

        public WorkbookDataSax2EventHandler() {
            this.sheets = new ArrayList<>();
            this.date1904 = false;
        }

        @Override
        public WorkbookData build() {
            return new WorkbookData(new ArrayList<>(sheets), date1904);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            switch (qName) {
            case SHEET_TAG:
                sheets.add(new SheetMeta(attributes.getValue(SHEET_TAB_ID_ATTRIBUTE),
                        attributes.getValue(SHEET_NAME_ATTRIBUTE)));
                break;
            case WORKBOOK_PROPERTIES_TAG:
                date1904 = Boolean.parseBoolean(attributes.getValue(DATE1904_ATTRIBUTE));
                break;
            }
        }
    }

    /**
     * http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
     */
    //    @VisibleForTesting
    static final class SharedStringsDataSax2EventHandler extends FluentHandler<String[]> {

        private static final String SHARED_STRING_ITEM_TAG = "si";
        private static final String TEXT_TAG = "t";
        //
        private final List<String> sharedStrings;
        private final SaxStringBuilder stringBuilder;

        public SharedStringsDataSax2EventHandler() {
            this.sharedStrings = new ArrayList<>();
            this.stringBuilder = new SaxStringBuilder();
        }

        @Override
        public String[] build() {
            return sharedStrings.toArray(new String[sharedStrings.size()]);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            switch (qName) {
            case SHARED_STRING_ITEM_TAG:
                stringBuilder.clear();
                break;
            case TEXT_TAG:
                stringBuilder.enable();
                break;
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            switch (qName) {
            case TEXT_TAG:
                stringBuilder.disable();
                break;
            case SHARED_STRING_ITEM_TAG:
                sharedStrings.add(stringBuilder.build().toString());
                break;
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            stringBuilder.appendIfNeeded(ch, start, length);
        }
    }

    //    @VisibleForTesting
    static final class StylesDataSax2EventHandler extends FluentHandler<boolean[]> {

        private static final String CELL_FORMAT_TAG = "xf";
        private static final String CELL_FORMATS_TAG = "cellXfs";
        private static final String NUMBER_FORMAT_TAG = "numFmt";
        private static final String NUMBER_FORMAT_ID_ATTRIBUTE = "numFmtId";
        private static final String NUMBER_FORMAT_CODE_ATTRIBUTE = "formatCode";
        //
        private final List<String> orderedListOfIds;
        private final Map<String, String> numberFormats;
        private boolean insideGroupTag;

        public StylesDataSax2EventHandler() {
            this.orderedListOfIds = new ArrayList<>();
            this.numberFormats = new HashMap<>();
            this.insideGroupTag = false;
        }

        @Override
        public boolean[] build() {
            boolean[] result = new boolean[orderedListOfIds.size()];
            // Style order matters! -> accessed by index in sheets
            for (int i = 0; i < result.length; i++) {
                String formatId = orderedListOfIds.get(i);
                result[i] = DateUtil.isADateFormat(Integer.parseInt(formatId), numberFormats.get(formatId));
            }
            return result;
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            switch (qName) {
            case CELL_FORMATS_TAG:
                insideGroupTag = true;
                break;
            case CELL_FORMAT_TAG:
                if (insideGroupTag) {
                    orderedListOfIds.add(attributes.getValue(NUMBER_FORMAT_ID_ATTRIBUTE));
                }
                break;
            case NUMBER_FORMAT_TAG:
                numberFormats.put(attributes.getValue(NUMBER_FORMAT_ID_ATTRIBUTE),
                        attributes.getValue(NUMBER_FORMAT_CODE_ATTRIBUTE));
                break;
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if (qName.equals(CELL_FORMATS_TAG)) {
                insideGroupTag = false;
            }
        }
    }
    //</editor-fold>
}