Java tutorial
// ============================================================================ // // Copyright (C) 2006-2018 Talend Inc. - www.talend.com // // This source code is available under agreement available at // https://github.com/Talend/data-prep/blob/master/LICENSE // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataprep.schema.xls.streaming; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.security.GeneralSecurityException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Attribute; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.crypt.EncryptionInfo; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.talend.dataprep.util.FilesHelper; import com.monitorjbl.xlsx.exceptions.OpenException; import com.monitorjbl.xlsx.exceptions.ReadException; public class StreamingWorkbookReader implements Iterable<Sheet>, AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(StreamingWorkbookReader.class); private static final QName SHEET_NAME_ATT_QNAME = new QName("name"); private final List<StreamingSheet> sheets; private final List<String> sheetNames = new ArrayList<>(); private final StreamingReader.Builder builder; private File tmp; private OPCPackage pkg; public StreamingWorkbookReader(StreamingReader.Builder builder) { this.sheets = new ArrayList<>(); this.builder = builder; } private static File writeInputStreamToFile(InputStream is, int bufferSize) throws IOException { File f = Files.createTempFile("tmp-", ".xlsx").toFile(); try (FileOutputStream fos = new FileOutputStream(f)) { int read; byte[] bytes = new byte[bufferSize]; while ((read = is.read(bytes)) != -1) { fos.write(bytes, 0, read); } is.close(); fos.close(); return f; } } public StreamingSheetReader first() { return sheets.get(0).getReader(); } public void init(InputStream is) { File f = null; try { f = writeInputStreamToFile(is, builder.getBufferSize()); LOGGER.debug("Created temp file [{}", f.getAbsolutePath()); init(f); tmp = f; } catch (IOException e) { throw new ReadException("Unable to read input stream", e); } catch (RuntimeException e) { FilesHelper.deleteQuietly(f); throw e; } } // to override https://bz.apache.org/bugzilla/show_bug.cgi?id=57699 public void init(File f) { try { if (builder.getPassword() != null) { // Based on: https://poi.apache.org/encryption.html POIFSFileSystem poifs = new POIFSFileSystem(f); EncryptionInfo info = new EncryptionInfo(poifs); Decryptor d = Decryptor.getInstance(info); d.verifyPassword(builder.getPassword()); pkg = OPCPackage.open(d.getDataStream(poifs)); } else { pkg = OPCPackage.open(f); } XSSFReader reader = new XSSFReader(pkg); SharedStringsTable sst = reader.getSharedStringsTable(); StylesTable styles = reader.getStylesTable(); loadSheets(reader, sst, styles, builder.getRowCacheSize()); } catch (IOException e) { throw new OpenException("Failed to open file", e); } catch (OpenXML4JException | XMLStreamException e) { throw new ReadException("Unable to read workbook", e); } catch (GeneralSecurityException e) { throw new ReadException("Unable to read workbook - Decryption failed", e); } } void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTable, int rowCacheSize) throws IOException, InvalidFormatException, XMLStreamException { lookupSheetNames(reader.getWorkbookData()); Iterator<InputStream> iter = reader.getSheetsData(); int i = 0; while (iter.hasNext()) { XMLEventReader parser = XMLInputFactory.newInstance().createXMLEventReader(iter.next()); sheets.add(new StreamingSheet(sheetNames.get(i++), new StreamingSheetReader(sst, stylesTable, parser, rowCacheSize))); } } void lookupSheetNames(InputStream workBookData) throws IOException, InvalidFormatException, XMLStreamException { sheetNames.clear(); XMLEventReader parser = XMLInputFactory.newInstance().createXMLEventReader(workBookData); boolean parsingsSheets = false; while (parser.hasNext()) { XMLEvent event = parser.nextEvent(); switch (event.getEventType()) { case XMLStreamConstants.START_ELEMENT: StartElement startElement = event.asStartElement(); String tagLocalName = startElement.getName().getLocalPart(); if ("sheets".equals(tagLocalName)) { parsingsSheets = true; continue; } if (parsingsSheets && "sheet".equals(tagLocalName)) { Attribute attribute = startElement.getAttributeByName(SHEET_NAME_ATT_QNAME); if (attribute != null) { sheetNames.add(attribute.getValue()); } } break; case XMLStreamConstants.END_ELEMENT: if ("sheets".equals(event.asEndElement().getName().getLocalPart())) { return; } } } } int findSheetByName(String name) { return sheetNames.indexOf(name); } String findSheetNameByIndex(int index) { return sheetNames.get(index); } List<? extends Sheet> getSheets() { return sheets; } @Override public Iterator<Sheet> iterator() { return new StreamingSheetIterator(sheets.iterator()); } @Override public void close() { try { for (StreamingSheet sheet : sheets) { sheet.getReader().close(); } pkg.revert(); } finally { if (tmp != null) { LOGGER.debug("Deleting tmp file [{}]", tmp.getAbsolutePath()); FilesHelper.deleteQuietly(tmp); } } } static class StreamingSheetIterator implements Iterator<Sheet> { private final Iterator<StreamingSheet> iterator; public StreamingSheetIterator(Iterator<StreamingSheet> iterator) { this.iterator = iterator; } @Override public boolean hasNext() { return iterator.hasNext(); } @Override public Sheet next() { return iterator.next(); } @Override public void remove() { throw new UnsupportedOperationException("NotSupported"); } } }