List of usage examples for org.apache.poi.xssf.eventusermodel XSSFReader getStylesTable
public StylesTable getStylesTable() throws IOException, InvalidFormatException
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText() *///from w w w. j a v a 2 s . co m @Override protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { OPCPackage container = extractor.getPackage(); ReadOnlySharedStringsTable strings; XSSFReader.SheetIterator iter; XSSFReader xssfReader; StylesTable styles; try { xssfReader = new XSSFReader(container); styles = xssfReader.getStylesTable(); iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); strings = new ReadOnlySharedStringsTable(container); } catch (InvalidFormatException e) { throw new XmlException(e); } catch (OpenXML4JException oe) { throw new XmlException(oe); } while (iter.hasNext()) { InputStream stream = iter.next(); sheetParts.add(iter.getSheetPart()); SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml, iter.getSheetComments()); // Start, and output the sheet name xhtml.startElement("div"); xhtml.element("h1", iter.getSheetName()); // Extract the main sheet contents xhtml.startElement("table"); xhtml.startElement("tbody"); processSheet(sheetExtractor, styles, strings, stream); xhtml.endElement("tbody"); xhtml.endElement("table"); // Output any headers and footers // (Need to process the sheet to get them, so we can't // do the headers before the contents) for (String header : sheetExtractor.headers) { extractHeaderFooter(header, xhtml); } for (String footer : sheetExtractor.footers) { extractHeaderFooter(footer, xhtml); } processShapes(iter.getShapes(), xhtml); // All done with this sheet xhtml.endElement("div"); } if (Config.inst().getProp(ConfigBool.ENABLE_IMAGE_OCR)) { TikaImageHelper helper = new TikaImageHelper(metadata); try { XSSFWorkbook workbook = new XSSFWorkbook(container); List<XSSFPictureData> pictures = workbook.getAllPictures(); for (XSSFPictureData picture : pictures) { ByteArrayInputStream imageData = new ByteArrayInputStream(picture.getData()); BufferedImage image = ImageIO.read(imageData); helper.addImage(image); } helper.addTextToHandler(xhtml); } catch (Exception e) { // TODO: e.printStackTrace(); } finally { if (extractor != null) { extractor.close(); } if (helper != null) { helper.close(); } } } }
From source file:net.geoprism.data.etl.excel.ExcelSheetReader.java
License:Open Source License
public void process(InputStream stream) throws Exception { try {//from w ww . jav a2 s . c o m OPCPackage pkg = OPCPackage.open(stream); ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XSSFReader xssfReader = new XSSFReader(pkg); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); while (iter.hasNext()) { InputStream sheet = iter.next(); try { String sheetName = iter.getSheetName(); this.handler.startSheet(sheetName); InputSource sheetSource = new InputSource(sheet); ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, this.handler, this.formatter, false); XMLReader reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); reader.setContentHandler(handler); reader.parse(sheetSource); this.handler.endSheet(); } finally { sheet.close(); } } } finally { stream.close(); } }
From source file:oracle.cloud.sampleapps.Excel2Csv.XLSX2CSV.java
License:Apache License
/** * Initiates the processing of the XLS workbook file to CSV. * * @throws IOException//from w w w . j av a 2 s . c o m * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public String process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); String sheetName = iter.getSheetName(); processSheet(styles, strings, new SheetToCSV(), stream); stream.close(); ++index; } return csvOutput.toString(); }
From source file:org.apache.metamodel.excel.XlsxSheetToRowsHandler.java
License:Apache License
public XlsxSheetToRowsHandler(XlsxRowCallback callback, XSSFReader xssfReader, ExcelConfiguration configuration) throws Exception { _callback = callback;/*from w ww .j a v a2 s .com*/ _configuration = configuration; _sharedStringTable = xssfReader.getSharedStringsTable(); _stylesTable = xssfReader.getStylesTable(); _value = new StringBuilder(); _style = new StyleBuilder(); _rowValues = new ArrayList<String>(); _styles = new ArrayList<Style>(); _rowNumber = -1; _inCell = false; _inFormula = false; }
From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;/*www.ja v a 2 s . co m*/ } final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions() .getValue(); final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean(); final CSVFormat csvFormat = CSVUtils.createCSVFormat(context); //Switch to 0 based index final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1; final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ","); final List<Integer> columnsToSkip = new ArrayList<>(); if (sColumnsToSkip != null && sColumnsToSkip.length > 0) { for (String c : sColumnsToSkip) { try { //Switch to 0 based index columnsToSkip.add(Integer.parseInt(c) - 1); } catch (NumberFormatException e) { throw new ProcessException("Invalid column in Columns to Skip list.", e); } } } try { session.read(flowFile, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { try { OPCPackage pkg = OPCPackage.open(inputStream); XSSFReader r = new XSSFReader(pkg); ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg); StylesTable styles = r.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData(); if (desiredSheetsDelimited != null) { String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER); if (desiredSheets != null) { while (iter.hasNext()) { InputStream sheet = iter.next(); String sheetName = iter.getSheetName(); for (int i = 0; i < desiredSheets.length; i++) { //If the sheetName is a desired one parse it if (sheetName.equalsIgnoreCase(desiredSheets[i])) { ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig( columnsToSkip, firstRow, sheetName, formatValues, sst, styles); handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); break; } } } } else { getLogger().debug( "Excel document was parsed but no sheets with the specified desired names were found."); } } else { //Get all of the sheets in the document. while (iter.hasNext()) { InputStream sheet = iter.next(); String sheetName = iter.getSheetName(); ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles); handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); } } } catch (InvalidFormatException ife) { getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife); throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife); } catch (OpenXML4JException | SAXException e) { getLogger().error("Error occurred while processing Excel document metadata", e); } } }); session.transfer(flowFile, ORIGINAL); } catch (RuntimeException ex) { getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex); FlowFile failedFlowFile = session.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage()); session.transfer(failedFlowFile, FAILURE); } }
From source file:org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText() */// ww w .j ava2s .com @Override protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { OPCPackage container = extractor.getPackage(); ReadOnlySharedStringsTable strings; XSSFReader.SheetIterator iter; XSSFReader xssfReader; StylesTable styles; try { xssfReader = new XSSFReader(container); styles = xssfReader.getStylesTable(); iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); strings = new ReadOnlySharedStringsTable(container); } catch (InvalidFormatException e) { throw new XmlException(e); } catch (OpenXML4JException oe) { throw new XmlException(oe); } while (iter.hasNext()) { InputStream stream = iter.next(); sheetParts.add(iter.getSheetPart()); SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml); CommentsTable comments = iter.getSheetComments(); // Start, and output the sheet name xhtml.startElement("div"); xhtml.element("h1", iter.getSheetName()); // Extract the main sheet contents xhtml.startElement("table"); xhtml.startElement("tbody"); processSheet(sheetExtractor, comments, styles, strings, stream); xhtml.endElement("tbody"); xhtml.endElement("table"); // Output any headers and footers // (Need to process the sheet to get them, so we can't // do the headers before the contents) for (String header : sheetExtractor.headers) { extractHeaderFooter(header, xhtml); } for (String footer : sheetExtractor.footers) { extractHeaderFooter(footer, xhtml); } processShapes(iter.getShapes(), xhtml); // All done with this sheet xhtml.endElement("div"); } }
From source file:org.dhatim.fastexcel.reader.BenchmarksTest.java
License:Apache License
@Benchmark public int streamingApachePoi() throws IOException, OpenXML4JException, SAXException { try (OPCPackage pkg = OPCPackage.open(openResource(FILE))) { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XSSFReader reader = new XSSFReader(pkg); StylesTable styles = reader.getStylesTable(); XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData(); int sheetIndex = 0; while (iterator.hasNext()) { try (InputStream sheetStream = iterator.next()) { if (sheetIndex == 0) { SheetContentHandler sheetHandler = new SheetContentHandler(); processSheet(styles, strings, sheetHandler, sheetStream); assertEquals(RESULT, sheetHandler.result); }/*from w w w .j ava 2 s . c om*/ } sheetIndex++; } return sheetIndex; } }
From source file:org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheet.java
License:Apache License
public StaxPoiSheet(XSSFReader reader, String sheetName, String sheetID) throws InvalidFormatException, IOException, XMLStreamException { this.sheetName = sheetName; xssfReader = reader;//from w ww . j a v a2s . c o m sheetId = sheetID; sst = reader.getSharedStringsTable(); styles = reader.getStylesTable(); sheetStream = reader.getSheet(sheetID); XMLInputFactory factory = XMLInputFactory.newInstance(); sheetReader = factory.createXMLStreamReader(sheetStream); headerRow = new ArrayList<String>(); while (sheetReader.hasNext()) { int event = sheetReader.next(); if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("dimension")) { String dim = sheetReader.getAttributeValue(null, "ref"); // empty sheets have dimension with no range if (StringUtils.contains(dim, ':')) { dim = dim.split(":")[1]; numRows = StaxUtil.extractRowNumber(dim); numCols = StaxUtil.extractColumnNumber(dim); } else { maxColsNumberDefined = false; numCols = StaxUtil.MAX_COLUMNS; numRows = StaxUtil.MAX_ROWS; } } if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("row")) { currentRow = Integer.parseInt(sheetReader.getAttributeValue(null, "r")); firstRow = currentRow; // calculate the number of columns in the header row while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.END_ELEMENT && sheetReader.getLocalName().equals("row")) { // if the row has ended, break the inner while loop break; } if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("c")) { String attributeValue = sheetReader.getAttributeValue(null, "t"); if (attributeValue != null) { if (attributeValue.equals("s")) { // if the type of the cell is string, we continue while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("v")) { int idx = Integer.parseInt(sheetReader.getElementText()); String content = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); headerRow.add(content); break; } } } else if (attributeValue.equals("inlineStr")) { // if the type of the cell is string, we continue while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("is")) { while (sheetReader.hasNext()) { event = sheetReader.next(); if (event == XMLStreamConstants.CHARACTERS) { String content = new XSSFRichTextString(sheetReader.getText()) .toString(); headerRow.add(content); break; } } break; } } } } else { break; } } } // we have parsed the header row break; } } }
From source file:org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheetTest.java
License:Apache License
private XSSFReader mockXSSFReader(final String sheetId, final String sheetContent, final SharedStringsTable sst, final StylesTable styles) throws Exception { XSSFReader reader = mock(XSSFReader.class); when(reader.getSharedStringsTable()).thenReturn(sst); when(reader.getStylesTable()).thenReturn(styles); when(reader.getSheet(sheetId)).thenAnswer(new Answer<InputStream>() { public InputStream answer(InvocationOnMock invocation) throws Throwable { return IOUtils.toInputStream(sheetContent, "UTF-8"); }//from w w w . j a va2 s . c om }); return reader; }
From source file:org.talend.dataprep.schema.xls.streaming.StreamingSheetTest.java
License:Open Source License
@Before public void setUp() throws Exception { OPCPackage pkg = OPCPackage.open(StreamingSheetTest.class.getResourceAsStream("../dates.xlsx")); XSSFReader reader = new XSSFReader(pkg); SharedStringsTable sst = reader.getSharedStringsTable(); StylesTable styles = reader.getStylesTable(); Iterator<InputStream> iter = reader.getSheetsData(); XMLEventReader parser = XMLInputFactory.newInstance().createXMLEventReader(iter.next()); final StreamingSheetReader streamingSheetReader = new StreamingSheetReader(sst, styles, parser, 10); streamingSheet = new StreamingSheet("name", streamingSheetReader); }