List of usage examples for org.apache.poi.xssf.eventusermodel XSSFReader getSharedStringsTable
public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException
From source file:de.ks.idnadrev.expimp.xls.XlsxImporter.java
License:Apache License
public XlsxImportResultCollector importFromFile(File file) { resultCollector = new XlsxImportResultCollector(); checkFile(file);/*from www. j ava2 s . c o m*/ OPCPackage pkg = openPackage(file); try { XSSFReader reader = new XSSFReader(pkg); SharedStringsTable sharedStringsTable = reader.getSharedStringsTable();//used by ms office to store all string values log.info("Importing from {}", file); Map<Integer, Collection<SingleSheetImport>> importStages = new HashMap<>(); XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData(); while (iterator.hasNext()) { InputStream sheetStream = iterator.next(); String sheetName = iterator.getSheetName(); final XlsxImportSheetResult result = resultCollector.getSheetResult(sheetName); Class<?> class2Import; try { class2Import = getClass().getClassLoader().loadClass(sheetName); } catch (ClassNotFoundException e) { log.info("Could not load class to import {} will skip sheet.", sheetName); result.generalError("Could not load class to import " + sheetName + " will skip sheet.", e); continue; } if (class2Import != null) { if (importCfg.getIgnored().contains(class2Import)) { continue; } int stage = dependencyGraph.getStage(class2Import); importStages.putIfAbsent(stage, new LinkedList<>()); SingleSheetImport singleSheetImport = new SingleSheetImport(class2Import, sheetStream, dependencyGraph, reader, result, importCfg); importStages.get(stage).add(singleSheetImport); } } importStages.entrySet().forEach(stage -> { try { executorService.invokeAll(stage.getValue()); List<List<Future<?>>> collect = stage.getValue().stream()// .map(sheet -> sheet.getRunAfterImport().stream() .map((Runnable r) -> executorService.submit(r)) .collect(Collectors.<Future<?>>toList()))// .collect(Collectors.<List<Future<?>>>toList()); for (List<Future<?>> futureList : collect) { futureList.forEach(future -> { try { future.get(); } catch (ExecutionException e) { if (throwOnError) { log.error("Could not run after sheet ", e); throw new RuntimeException(e); } } catch (InterruptedException e) { // } }); } } catch (InterruptedException e1) { // } }); } catch (OpenXML4JException | IOException e) { resultCollector.generalError("Could not read " + file, e); if (throwOnError) { log.error("Could not read {}", file, e); throw new RuntimeException(e); } } finally { try { pkg.close(); } catch (IOException e) { resultCollector.generalError("Could not close package " + pkg, e); if (throwOnError) { log.error("Could not close package {}", pkg, e); } } } return resultCollector; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.xlsx.XLSXFileReader.java
License:Apache License
public void processSheet(InputStream inputStream, DataTable dataTable, PrintWriter tempOut) throws Exception { OPCPackage pkg = OPCPackage.open(inputStream); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst, dataTable, tempOut); // rId2 found by processing the Workbook // Seems to either be rId# or rSheet# InputStream sheet1 = r.getSheet("rId1"); InputSource sheetSource = new InputSource(sheet1); parser.parse(sheetSource);/*from w ww. j av a 2s. c om*/ sheet1.close(); }
From source file:excel.FromHowTo.java
License:Apache License
public void processFirstSheet(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename, PackageAccess.READ); try {/*from w ww .j a v a 2s . c o m*/ XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); // process the first sheet InputStream sheet2 = r.getSheetsData().next(); InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } finally { pkg.close(); } }
From source file:excel.FromHowTo.java
License:Apache License
public void processAllSheets(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename, PackageAccess.READ); try {//from www. ja va 2 s. co m XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { System.out.println("Processing new sheet:\n"); InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); System.out.println(""); } } finally { pkg.close(); } }
From source file:org.apache.metamodel.excel.XlsxSheetToRowsHandler.java
License:Apache License
public XlsxSheetToRowsHandler(XlsxRowCallback callback, XSSFReader xssfReader, ExcelConfiguration configuration) throws Exception { _callback = callback;/*from w w w. jav a 2s. c o m*/ _configuration = configuration; _sharedStringTable = xssfReader.getSharedStringsTable(); _stylesTable = xssfReader.getStylesTable(); _value = new StringBuilder(); _style = new StyleBuilder(); _rowValues = new ArrayList<String>(); _styles = new ArrayList<Style>(); _rowNumber = -1; _inCell = false; _inFormula = false; }
From source file:org.jberet.support.io.ExcelStreamingItemReader.java
License:Open Source License
@Override protected void initWorkbookAndSheet(final int startRowNumber) throws Exception { InputStream workbookDataInputStream = null; XMLStreamReader workbookStreamReader = null; try {//from w w w . java 2 s . c o m final OPCPackage opcPackage = OPCPackage.open(inputStream); final XSSFReader xssfReader = new XSSFReader(opcPackage); workbookDataInputStream = xssfReader.getWorkbookData(); final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); workbookStreamReader = xmlInputFactory.createXMLStreamReader(workbookDataInputStream); sharedStringsTable = xssfReader.getSharedStringsTable(); /* sample sheet element: <sheets> <sheet name="Movies" sheetId="1" state="visible" r:id="rId2"/> <sheet name="Person" sheetId="2" state="visible" r:id="rId3"/> </sheets> */ while (workbookStreamReader.hasNext()) { if (workbookStreamReader.next() == XMLStreamConstants.START_ELEMENT && "sheet".equals(workbookStreamReader.getLocalName())) { final String shn = workbookStreamReader.getAttributeValue(null, "name"); final String shId = workbookStreamReader.getAttributeValue(null, "sheetId"); if ((sheetName != null && sheetName.equals(shn)) || (sheetName == null && String.valueOf(this.sheetIndex + 1).equals(shId))) { //this is the target sheet final String relationshipId = workbookStreamReader.getAttributeValue(schemaRelationships, "id"); sheetInputStream = xssfReader.getSheet(relationshipId); sheetStreamReader = xmlInputFactory.createXMLStreamReader(sheetInputStream); break; } } } } finally { if (workbookDataInputStream != null) { try { workbookDataInputStream.close(); } catch (final Exception e) { //ignore } } if (workbookStreamReader != null) { try { workbookStreamReader.close(); } catch (final Exception e) { //ignore } } } /* sample row element: <row r="1" customFormat="false" ht="15" hidden="false" customHeight="false" outlineLevel="0" collapsed="false"> <c r="A1" s="0" t="s"> <v>0</v> </c> <c r="B1" s="0" t="s"> <v>1</v> </c> <c r="C1" s="0" t="s"> <v>2</v> </c> <c r="D1" s="0" t="s"> <v>3</v> </c> </row> For inlineStr: <c r="A1" t="inlineStr"> <is> <t>Date</t> </is> </c> Note: a blank cell does not show up in xml at all. So for list type beanType, need to detect blank cell and add null; for map or custom beanType, need to link to the correct header column by r attribute. */ if (header == null) { headerMapping = new HashMap<String, String>(); outerLoop: while (sheetStreamReader.hasNext()) { if (sheetStreamReader.next() == XMLStreamConstants.START_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) { final int rowNum = Integer.parseInt(sheetStreamReader.getAttributeValue(null, "r")); if (headerRow + 1 == rowNum) { // got the header row, next loop through header row cells final List<String> headerVals = new ArrayList<String>(); while (sheetStreamReader.hasNext()) { final int event = sheetStreamReader.next(); if (event == XMLStreamConstants.START_ELEMENT && "c".equals(sheetStreamReader.getLocalName())) { final String label = getColumnLabel(sheetStreamReader.getAttributeValue(null, "r")); final String value = getCellStringValue(); headerVals.add(value); headerMapping.put(label, value); } else if (event == XMLStreamConstants.END_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) { header = headerVals.toArray(new String[headerVals.size()]); currentRowNum = rowNum - 1; break outerLoop; } } } } } } //fast forward to the start row, which may not immediately follow header row while (currentRowNum < startRowNumber - 1 && sheetStreamReader.hasNext()) { if (sheetStreamReader.next() == XMLStreamConstants.START_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) { currentRowNum = Integer.parseInt(sheetStreamReader.getAttributeValue(null, "r")) - 1; } else if (sheetStreamReader.next() == XMLStreamConstants.END_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) { if (currentRowNum >= startRowNumber - 1) { break; } } } }
From source file:org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheet.java
License:Apache License
public StaxPoiSheet(XSSFReader reader, String sheetName, String sheetID) throws InvalidFormatException, IOException, XMLStreamException { this.sheetName = sheetName; xssfReader = reader;/*from w w w. java 2 s . c om*/ sheetId = sheetID; sst = reader.getSharedStringsTable(); styles = reader.getStylesTable(); sheetStream = reader.getSheet(sheetID); XMLInputFactory factory = XMLInputFactory.newInstance(); sheetReader = factory.createXMLStreamReader(sheetStream); headerRow = new ArrayList<String>(); while (sheetReader.hasNext()) { int event = sheetReader.next(); if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("dimension")) { String dim = sheetReader.getAttributeValue(null, "ref"); // empty sheets have dimension with no range if (StringUtils.contains(dim, ':')) { dim = dim.split(":")[1]; numRows = StaxUtil.extractRowNumber(dim); numCols = StaxUtil.extractColumnNumber(dim); } else { maxColsNumberDefined = false; numCols = StaxUtil.MAX_COLUMNS; numRows = StaxUtil.MAX_ROWS; } } if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("row")) { currentRow = Integer.parseInt(sheetReader.getAttributeValue(null, "r")); firstRow = currentRow; // calculate the number of columns in the header row while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.END_ELEMENT && sheetReader.getLocalName().equals("row")) { // if the row has ended, break the inner while loop break; } if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("c")) { String attributeValue = sheetReader.getAttributeValue(null, "t"); if (attributeValue != null) { if (attributeValue.equals("s")) { // if the type of the cell is string, we continue while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("v")) { int idx = Integer.parseInt(sheetReader.getElementText()); String content = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); headerRow.add(content); break; } } } else if (attributeValue.equals("inlineStr")) { // if the type of the cell is string, we continue while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("is")) { while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.CHARACTERS) { String content = new XSSFRichTextString(sheetReader.getText()) .toString(); headerRow.add(content); break; } } break; } } } } else { break; } } } // we have parsed the header row break; } } }
From source file:org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheetTest.java
License:Apache License
private XSSFReader mockXSSFReader(final String sheetId, final String sheetContent, final SharedStringsTable sst, final StylesTable styles) throws Exception { XSSFReader reader = mock(XSSFReader.class); when(reader.getSharedStringsTable()).thenReturn(sst); when(reader.getStylesTable()).thenReturn(styles); when(reader.getSheet(sheetId)).thenAnswer(new Answer<InputStream>() { public InputStream answer(InvocationOnMock invocation) throws Throwable { return IOUtils.toInputStream(sheetContent, "UTF-8"); }/*from ww w .j a v a2s.c o m*/ }); return reader; }
From source file:org.talend.dataprep.schema.xls.streaming.StreamingSheetTest.java
License:Open Source License
@Before public void setUp() throws Exception { OPCPackage pkg = OPCPackage.open(StreamingSheetTest.class.getResourceAsStream("../dates.xlsx")); XSSFReader reader = new XSSFReader(pkg); SharedStringsTable sst = reader.getSharedStringsTable(); StylesTable styles = reader.getStylesTable(); Iterator<InputStream> iter = reader.getSheetsData(); XMLEventReader parser = XMLInputFactory.newInstance().createXMLEventReader(iter.next()); final StreamingSheetReader streamingSheetReader = new StreamingSheetReader(sst, styles, parser, 10); streamingSheet = new StreamingSheet("name", streamingSheetReader); }
From source file:org.talend.dataprep.schema.xls.streaming.StreamingWorkbookReader.java
License:Open Source License
public void init(File f) { try {/*from w w w .j a va 2 s . co m*/ if (builder.getPassword() != null) { // Based on: https://poi.apache.org/encryption.html POIFSFileSystem poifs = new POIFSFileSystem(f); EncryptionInfo info = new EncryptionInfo(poifs); Decryptor d = Decryptor.getInstance(info); d.verifyPassword(builder.getPassword()); pkg = OPCPackage.open(d.getDataStream(poifs)); } else { pkg = OPCPackage.open(f); } XSSFReader reader = new XSSFReader(pkg); SharedStringsTable sst = reader.getSharedStringsTable(); StylesTable styles = reader.getStylesTable(); loadSheets(reader, sst, styles, builder.getRowCacheSize()); } catch (IOException e) { throw new OpenException("Failed to open file", e); } catch (OpenXML4JException | XMLStreamException e) { throw new ReadException("Unable to read workbook", e); } catch (GeneralSecurityException e) { throw new ReadException("Unable to read workbook - Decryption failed", e); } }