Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ /** * Author = Shailesh Ahuja */ package org.pentaho.di.trans.steps.excelinput.staxpoi; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.logging.KettleLogStore; import org.pentaho.di.core.logging.LogChannelInterface; import org.pentaho.di.core.spreadsheet.KSheet; import org.pentaho.di.core.spreadsheet.KWorkbook; /** * Streaming reader for XLSX files.<br> * Does not open XLS. */ public class StaxPoiWorkbook implements KWorkbook { private static final String RELATION_NS_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; private LogChannelInterface log; private XSSFReader reader; // maintain the mapping of the sheet name to its ID private Map<String, String> sheetNameIDMap; // sheet names in order private String[] sheetNames; // mapping of the sheet object with its ID/Name private Map<String, StaxPoiSheet> openSheetsMap; private OPCPackage opcpkg; protected StaxPoiWorkbook() { openSheetsMap = new HashMap<String, StaxPoiSheet>(); this.log = KettleLogStore.getLogChannelInterfaceFactory().create(this); } public StaxPoiWorkbook(String filename, String encoding) throws KettleException { this(); try { opcpkg = OPCPackage.open(filename); openFile(opcpkg, encoding); } catch (Exception e) { throw new KettleException(e); } } public StaxPoiWorkbook(InputStream inputStream, String encoding) throws KettleException { this(); try { opcpkg = OPCPackage.open(inputStream); openFile(opcpkg, encoding); } catch (Exception e) { throw new KettleException(e); } } private void openFile(OPCPackage pkg, String encoding) throws KettleException { InputStream workbookData = null; XMLStreamReader workbookReader = null; try { reader = new XSSFReader(pkg); sheetNameIDMap = new LinkedHashMap<String, String>(); workbookData = reader.getWorkbookData(); XMLInputFactory factory = XMLInputFactory.newInstance(); workbookReader = factory.createXMLStreamReader(workbookData); while (workbookReader.hasNext()) { if (workbookReader.next() == XMLStreamConstants.START_ELEMENT && workbookReader.getLocalName().equals("sheet")) { String sheetName = workbookReader.getAttributeValue(null, "name"); String sheetID = workbookReader.getAttributeValue(RELATION_NS_URI, "id"); sheetNameIDMap.put(sheetName, sheetID); } } sheetNames = new String[sheetNameIDMap.size()]; int i = 0; for (String sheetName : sheetNameIDMap.keySet()) { sheetNames[i++] = sheetName; } } catch (Exception e) { throw new KettleException(e); } finally { if (workbookReader != null) { try { workbookReader.close(); } catch (XMLStreamException e) { throw new KettleException(e); } } if (workbookData != null) { try { workbookData.close(); } catch (IOException e) { throw new KettleException(e); } } } } @Override /** * return the same sheet if it already is created otherwise instantiate a new one */ public KSheet getSheet(String sheetName) { String sheetID = sheetNameIDMap.get(sheetName); if (sheetID == null) { return null; } StaxPoiSheet sheet = openSheetsMap.get(sheetID); if (sheet == null) { try { sheet = new StaxPoiSheet(reader, sheetName, sheetID); openSheetsMap.put(sheetID, sheet); } catch (Exception e) { log.logError(sheetName, e); } } return sheet; } @Override public String[] getSheetNames() { String[] sheets = new String[sheetNameIDMap.size()]; return sheetNameIDMap.keySet().toArray(sheets); } @Override public void close() { // close all the sheets for (StaxPoiSheet sheet : openSheetsMap.values()) { try { sheet.close(); } catch (IOException e) { log.logError("Could not close workbook", e); } catch (XMLStreamException e) { log.logError("Could not close xmlstream", e); } } if (opcpkg != null) { //We should not save change in xlsx because it is input step. opcpkg.revert(); } } @Override public int getNumberOfSheets() { return sheetNameIDMap.size(); } @Override public KSheet getSheet(int sheetNr) { if (sheetNr >= 0 && sheetNr < sheetNames.length) { return getSheet(sheetNames[sheetNr]); } return null; } @Override public String getSheetName(int sheetNr) { if (sheetNr >= 0 && sheetNr < sheetNames.length) { return sheetNames[sheetNr]; } return null; } }