List of usage examples for org.apache.poi.xssf.eventusermodel XSSFReader getSheetsData
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException
From source file:excel.XSSF.XLSX2CSV.java
License:Apache License
/** * Initiates the processing of the XLS workbook file to CSV. * * @throws IOException If reading the data from the package fails. * @throws SAXException if parsing the XML data fails. *//*from w w w. j ava2 s. c o m*/ public void process() throws IOException, OpenXML4JException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); String sheetName = iter.getSheetName(); this.output.println(); // this.output.println(sheetName + " [index=" + index + "]:"); this.output.println(String.format("\"%s\":[", sheetName)); processSheet(styles, strings, new SheetToCSV(), stream); stream.close(); this.output.println("]"); ++index; } }
From source file:extract.XLSX2CSV.java
License:Apache License
/** * Initiates the processing of the XLS workbook file to CSV. * * @throws IOException/*from w w w . j a v a2 s . com*/ * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); String sheetName = iter.getSheetName(); this.output.println(); this.output.println(sheetName + " [index=" + index + "]:"); processSheet(styles, strings, new SheetToCSV(), stream); stream.close(); ++index; } }
From source file:kiklos.tv.timetable.XLSX2CSV.java
License:Apache License
/** * Initiates the processing of the XLS workbook file to CSV. * /*w w w. j a v a 2 s . c om*/ * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); //String sheetName = iter.getSheetName(); //this.output.println(); //this.output.println(sheetName + " [index=" + index + "]:"); processSheet(styles, strings, stream); stream.close(); ++index; return; // todo: need only first sheet !!!!! } }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText() *///from ww w. jav a 2 s .co m @Override protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { OPCPackage container = extractor.getPackage(); ReadOnlySharedStringsTable strings; XSSFReader.SheetIterator iter; XSSFReader xssfReader; StylesTable styles; try { xssfReader = new XSSFReader(container); styles = xssfReader.getStylesTable(); iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); strings = new ReadOnlySharedStringsTable(container); } catch (InvalidFormatException e) { throw new XmlException(e); } catch (OpenXML4JException oe) { throw new XmlException(oe); } while (iter.hasNext()) { InputStream stream = iter.next(); sheetParts.add(iter.getSheetPart()); SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml, iter.getSheetComments()); // Start, and output the sheet name xhtml.startElement("div"); xhtml.element("h1", iter.getSheetName()); // Extract the main sheet contents xhtml.startElement("table"); xhtml.startElement("tbody"); processSheet(sheetExtractor, styles, strings, stream); xhtml.endElement("tbody"); xhtml.endElement("table"); // Output any headers and footers // (Need to process the sheet to get them, so we can't // do the headers before the contents) for (String header : sheetExtractor.headers) { extractHeaderFooter(header, xhtml); } for (String footer : sheetExtractor.footers) { extractHeaderFooter(footer, xhtml); } processShapes(iter.getShapes(), xhtml); // All done with this sheet xhtml.endElement("div"); } if (Config.inst().getProp(ConfigBool.ENABLE_IMAGE_OCR)) { TikaImageHelper helper = new TikaImageHelper(metadata); try { XSSFWorkbook workbook = new XSSFWorkbook(container); List<XSSFPictureData> pictures = workbook.getAllPictures(); for (XSSFPictureData picture : pictures) { ByteArrayInputStream imageData = new ByteArrayInputStream(picture.getData()); BufferedImage image = ImageIO.read(imageData); helper.addImage(image); } helper.addTextToHandler(xhtml); } catch (Exception e) { // TODO: e.printStackTrace(); } finally { if (extractor != null) { extractor.close(); } if (helper != null) { helper.close(); } } } }
From source file:net.geoprism.data.etl.excel.ExcelSheetReader.java
License:Open Source License
public void process(InputStream stream) throws Exception { try {/*w w w. j a v a 2 s.c o m*/ OPCPackage pkg = OPCPackage.open(stream); ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XSSFReader xssfReader = new XSSFReader(pkg); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); while (iter.hasNext()) { InputStream sheet = iter.next(); try { String sheetName = iter.getSheetName(); this.handler.startSheet(sheetName); InputSource sheetSource = new InputSource(sheet); ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, this.handler, this.formatter, false); XMLReader reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); reader.setContentHandler(handler); reader.parse(sheetSource); this.handler.endSheet(); } finally { sheet.close(); } } } finally { stream.close(); } }
From source file:oracle.cloud.sampleapps.Excel2Csv.XLSX2CSV.java
License:Apache License
/** * Initiates the processing of the XLS workbook file to CSV. * * @throws IOException/*from w ww.jav a2s.c om*/ * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public String process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); String sheetName = iter.getSheetName(); processSheet(styles, strings, new SheetToCSV(), stream); stream.close(); ++index; } return csvOutput.toString(); }
From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;// w w w . j a va 2s . c o m } final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions() .getValue(); final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean(); final CSVFormat csvFormat = CSVUtils.createCSVFormat(context); //Switch to 0 based index final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1; final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ","); final List<Integer> columnsToSkip = new ArrayList<>(); if (sColumnsToSkip != null && sColumnsToSkip.length > 0) { for (String c : sColumnsToSkip) { try { //Switch to 0 based index columnsToSkip.add(Integer.parseInt(c) - 1); } catch (NumberFormatException e) { throw new ProcessException("Invalid column in Columns to Skip list.", e); } } } try { session.read(flowFile, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { try { OPCPackage pkg = OPCPackage.open(inputStream); XSSFReader r = new XSSFReader(pkg); ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg); StylesTable styles = r.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData(); if (desiredSheetsDelimited != null) { String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER); if (desiredSheets != null) { while (iter.hasNext()) { InputStream sheet = iter.next(); String sheetName = iter.getSheetName(); for (int i = 0; i < desiredSheets.length; i++) { //If the sheetName is a desired one parse it if (sheetName.equalsIgnoreCase(desiredSheets[i])) { ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig( columnsToSkip, firstRow, sheetName, formatValues, sst, styles); handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); break; } } } } else { getLogger().debug( "Excel document was parsed but no sheets with the specified desired names were found."); } } else { //Get all of the sheets in the document. while (iter.hasNext()) { InputStream sheet = iter.next(); String sheetName = iter.getSheetName(); ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles); handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); } } } catch (InvalidFormatException ife) { getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife); throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife); } catch (OpenXML4JException | SAXException e) { getLogger().error("Error occurred while processing Excel document metadata", e); } } }); session.transfer(flowFile, ORIGINAL); } catch (RuntimeException ex) { getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex); FlowFile failedFlowFile = session.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage()); session.transfer(failedFlowFile, FAILURE); } }
From source file:org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText() *///w w w .j ava2 s .com @Override protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { OPCPackage container = extractor.getPackage(); ReadOnlySharedStringsTable strings; XSSFReader.SheetIterator iter; XSSFReader xssfReader; StylesTable styles; try { xssfReader = new XSSFReader(container); styles = xssfReader.getStylesTable(); iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); strings = new ReadOnlySharedStringsTable(container); } catch (InvalidFormatException e) { throw new XmlException(e); } catch (OpenXML4JException oe) { throw new XmlException(oe); } while (iter.hasNext()) { InputStream stream = iter.next(); sheetParts.add(iter.getSheetPart()); SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml); CommentsTable comments = iter.getSheetComments(); // Start, and output the sheet name xhtml.startElement("div"); xhtml.element("h1", iter.getSheetName()); // Extract the main sheet contents xhtml.startElement("table"); xhtml.startElement("tbody"); processSheet(sheetExtractor, comments, styles, strings, stream); xhtml.endElement("tbody"); xhtml.endElement("table"); // Output any headers and footers // (Need to process the sheet to get them, so we can't // do the headers before the contents) for (String header : sheetExtractor.headers) { extractHeaderFooter(header, xhtml); } for (String footer : sheetExtractor.footers) { extractHeaderFooter(footer, xhtml); } processShapes(iter.getShapes(), xhtml); // All done with this sheet xhtml.endElement("div"); } }
From source file:org.dhatim.fastexcel.reader.BenchmarksTest.java
License:Apache License
@Benchmark public int streamingApachePoi() throws IOException, OpenXML4JException, SAXException { try (OPCPackage pkg = OPCPackage.open(openResource(FILE))) { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XSSFReader reader = new XSSFReader(pkg); StylesTable styles = reader.getStylesTable(); XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData(); int sheetIndex = 0; while (iterator.hasNext()) { try (InputStream sheetStream = iterator.next()) { if (sheetIndex == 0) { SheetContentHandler sheetHandler = new SheetContentHandler(); processSheet(styles, strings, sheetHandler, sheetStream); assertEquals(RESULT, sheetHandler.result); }// w w w .ja va2s. com } sheetIndex++; } return sheetIndex; } }
From source file:org.talend.dataprep.schema.xls.streaming.StreamingSheetTest.java
License:Open Source License
@Before public void setUp() throws Exception { OPCPackage pkg = OPCPackage.open(StreamingSheetTest.class.getResourceAsStream("../dates.xlsx")); XSSFReader reader = new XSSFReader(pkg); SharedStringsTable sst = reader.getSharedStringsTable(); StylesTable styles = reader.getStylesTable(); Iterator<InputStream> iter = reader.getSheetsData(); XMLEventReader parser = XMLInputFactory.newInstance().createXMLEventReader(iter.next()); final StreamingSheetReader streamingSheetReader = new StreamingSheetReader(sst, styles, parser, 10); streamingSheet = new StreamingSheet("name", streamingSheetReader); }