org.dhatim.fastexcel.reader.ReadableWorkbook.java Source code

Java tutorial

Introduction

Here is the source code for org.dhatim.fastexcel.reader.ReadableWorkbook.java

Source

/*
 * Copyright 2016 Dhatim.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.dhatim.fastexcel.reader;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;

import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.openxml4j.util.ZipFileZipEntrySource;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;

public class ReadableWorkbook implements Closeable {

    private final OPCPackage pkg;
    private final XSSFReader reader;
    private final SharedStringsTable sst;
    private final XMLInputFactory factory;

    private boolean date1904;
    private final List<Sheet> sheets = new ArrayList<>();

    public ReadableWorkbook(File inputFile) throws IOException {
        this(open(inputFile));
    }

    /**
     * Note: will load the whole xlsx file into memory,
     * (but will not uncompress it in memory)
     */
    public ReadableWorkbook(InputStream inputStream) throws IOException {
        this(open(inputStream));
    }

    private ReadableWorkbook(OPCPackage pkg) throws IOException {
        try {
            this.pkg = pkg;
            reader = new XSSFReader(pkg);
            sst = reader.getSharedStringsTable();
        } catch (NotOfficeXmlFileException | OpenXML4JException e) {
            throw new ExcelReaderException(e);
        }
        factory = XMLInputFactory.newInstance();
        // To prevent XML External Entity (XXE) attacks
        factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);

        try (SimpleXmlReader workbookReader = new SimpleXmlReader(factory, reader.getWorkbookData())) {
            readWorkbook(workbookReader);
        } catch (InvalidFormatException | XMLStreamException e) {
            throw new ExcelReaderException(e);
        }
    }

    @Override
    public void close() throws IOException {
        pkg.close();
    }

    public boolean isDate1904() {
        return date1904;
    }

    public Stream<Sheet> getSheets() {
        return sheets.stream();
    }

    public Optional<Sheet> getSheet(int index) {
        return index < 0 || index >= sheets.size() ? Optional.empty() : Optional.of(sheets.get(index));
    }

    public Sheet getFirstSheet() {
        return sheets.get(0);
    }

    public Optional<Sheet> findSheet(String name) {
        return sheets.stream().filter(sheet -> name.equals(sheet.getName())).findFirst();
    }

    private void readWorkbook(SimpleXmlReader r) throws XMLStreamException {
        while (r.goTo(
                () -> r.isStartElement("sheets") || r.isStartElement("workbookPr") || r.isEndElement("workbook"))) {
            if ("sheets".equals(r.getLocalName())) {
                r.forEach("sheet", "sheets", this::createSheet);
            } else if ("workbookPr".equals(r.getLocalName())) {
                String date1904Value = r.getAttribute("date1904");
                date1904 = Boolean.parseBoolean(date1904Value);
            } else {
                break;
            }
        }
    }

    private void createSheet(SimpleXmlReader r) {
        String name = r.getAttribute("name");
        String id = r.getAttribute("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
        int index = sheets.size();
        sheets.add(new Sheet(this, index, id, name));
    }

    Stream<Row> openStream(Sheet sheet) throws IOException {
        try {
            InputStream inputStream = reader.getSheet(sheet.getId());
            Stream<Row> stream = StreamSupport.stream(new RowSpliterator(this, inputStream), false);
            return stream.onClose(asUncheckedRunnable(inputStream));
        } catch (XMLStreamException | InvalidFormatException e) {
            throw new IOException(e);
        }
    }

    XMLInputFactory getXmlFactory() {
        return factory;
    }

    SharedStringsTable getSharedStringsTable() {
        return sst;
    }

    public static boolean isOOXMLZipHeader(byte[] bytes) {
        requireLength(bytes, POIFSConstants.OOXML_FILE_HEADER.length);
        return arrayEquals(bytes, 0, POIFSConstants.OOXML_FILE_HEADER, 0, POIFSConstants.OOXML_FILE_HEADER.length);
    }

    public static boolean isOLE2Header(byte[] bytes) {
        requireLength(bytes, 8);
        byte[] ole2Header = new byte[8];
        LittleEndian.putLong(ole2Header, 0, HeaderBlockConstants._signature);
        return arrayEquals(bytes, 0, ole2Header, 0, ole2Header.length);
    }

    private static void requireLength(byte[] bytes, int requiredLength) {
        if (bytes.length < requiredLength) {
            throw new IllegalArgumentException("Insufficient header bytes");
        }
    }

    private static boolean arrayEquals(byte[] a, int offsetA, byte[] b, int offsetB, int length) {
        if (a == b) {
            return true;
        }
        if (a == null || b == null) {
            return false;
        }
        if ((offsetA + length > a.length) || (offsetB + length > b.length)) {
            return false;
        }
        for (int i = 0; i < length; i++) {
            if (a[offsetA + i] != b[offsetB + i]) {
                return false;
            }
        }
        return true;
    }

    private static Runnable asUncheckedRunnable(Closeable c) {
        return () -> {
            try {
                c.close();
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            }
        };
    }

    private static OPCPackage open(File file) {
        try {
            return OPCPackage.open(file, PackageAccess.READ);
        } catch (InvalidFormatException e) {
            throw new ExcelReaderException(e);
        }
    }

    private static OPCPackage open(InputStream in) throws IOException {
        try {
            byte[] compressedBytes = IOUtils.toByteArray(in);
            ZipFile zipFile = new ZipFile(new SeekableInMemoryByteChannel(compressedBytes));
            return OPCPackage.open(new ZipFileZipEntrySource(zipFile));
        } catch (InvalidFormatException e) {
            throw new ExcelReaderException(e);
        }
    }

}