Example usage for org.apache.poi.xssf.eventusermodel XSSFReader getSheetsData

List of usage examples for org.apache.poi.xssf.eventusermodel XSSFReader getSheetsData

Introduction

In this page you can find the example usage for org.apache.poi.xssf.eventusermodel XSSFReader getSheetsData.

Prototype

public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException 

Source Link

Document

Returns an Iterator which will let you get at all the different Sheets in turn.

Usage

From source file:excel.XSSF.XLSX2CSV.java

License:Apache License

/**
 * Initiates the processing of the XLS workbook file to CSV.
 *
 * @throws IOException  If reading the data from the package fails.
 * @throws SAXException if parsing the XML data fails.
 *//*from   w  w w. j ava2  s.  c  o  m*/
public void process() throws IOException, OpenXML4JException, SAXException {
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
    XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
    StylesTable styles = xssfReader.getStylesTable();
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    int index = 0;
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        String sheetName = iter.getSheetName();
        this.output.println();
        //            this.output.println(sheetName + " [index=" + index + "]:");
        this.output.println(String.format("\"%s\":[", sheetName));
        processSheet(styles, strings, new SheetToCSV(), stream);
        stream.close();
        this.output.println("]");
        ++index;
    }
}

From source file:extract.XLSX2CSV.java

License:Apache License

/**
 * Initiates the processing of the XLS workbook file to CSV.
 *
 * @throws IOException/*from  w  w  w  . j a v  a2  s .  com*/
 * @throws OpenXML4JException
 * @throws ParserConfigurationException
 * @throws SAXException
 */
public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
    XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
    StylesTable styles = xssfReader.getStylesTable();
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    int index = 0;
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        String sheetName = iter.getSheetName();
        this.output.println();
        this.output.println(sheetName + " [index=" + index + "]:");
        processSheet(styles, strings, new SheetToCSV(), stream);
        stream.close();
        ++index;
    }
}

From source file:kiklos.tv.timetable.XLSX2CSV.java

License:Apache License

/**
 * Initiates the processing of the XLS workbook file to CSV.
 * /*w w  w. j a  v  a 2 s . c om*/
 * @throws IOException
 * @throws OpenXML4JException
 * @throws ParserConfigurationException
 * @throws SAXException
 */
public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {

    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
    XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
    StylesTable styles = xssfReader.getStylesTable();
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    int index = 0;
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        //String sheetName = iter.getSheetName();
        //this.output.println();
        //this.output.println(sheetName + " [index=" + index + "]:");
        processSheet(styles, strings, stream);
        stream.close();
        ++index;
        return; // todo: need only first sheet !!!!!
    }
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java

License:Apache License

/**
 * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
 *///from ww  w.  jav  a 2  s .co  m
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();

    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }

    while (iter.hasNext()) {
        InputStream stream = iter.next();
        sheetParts.add(iter.getSheetPart());

        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml, iter.getSheetComments());

        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());

        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");

        processSheet(sheetExtractor, styles, strings, stream);

        xhtml.endElement("tbody");
        xhtml.endElement("table");

        // Output any headers and footers
        // (Need to process the sheet to get them, so we can't
        // do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        processShapes(iter.getShapes(), xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }

    if (Config.inst().getProp(ConfigBool.ENABLE_IMAGE_OCR)) {
        TikaImageHelper helper = new TikaImageHelper(metadata);
        try {
            XSSFWorkbook workbook = new XSSFWorkbook(container);
            List<XSSFPictureData> pictures = workbook.getAllPictures();
            for (XSSFPictureData picture : pictures) {
                ByteArrayInputStream imageData = new ByteArrayInputStream(picture.getData());
                BufferedImage image = ImageIO.read(imageData);
                helper.addImage(image);
            }
            helper.addTextToHandler(xhtml);
        } catch (Exception e) {
            // TODO:
            e.printStackTrace();
        } finally {
            if (extractor != null) {
                extractor.close();
            }
            if (helper != null) {
                helper.close();
            }
        }
    }
}

From source file:net.geoprism.data.etl.excel.ExcelSheetReader.java

License:Open Source License

public void process(InputStream stream) throws Exception {
    try {/*w  w w. j  a v  a  2 s.c  o m*/
        OPCPackage pkg = OPCPackage.open(stream);

        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
        XSSFReader xssfReader = new XSSFReader(pkg);
        StylesTable styles = xssfReader.getStylesTable();
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

        while (iter.hasNext()) {
            InputStream sheet = iter.next();

            try {
                String sheetName = iter.getSheetName();

                this.handler.startSheet(sheetName);

                InputSource sheetSource = new InputSource(sheet);
                ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, this.handler, this.formatter,
                        false);

                XMLReader reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
                reader.setContentHandler(handler);
                reader.parse(sheetSource);

                this.handler.endSheet();
            } finally {
                sheet.close();
            }
        }
    } finally {
        stream.close();
    }
}

From source file:oracle.cloud.sampleapps.Excel2Csv.XLSX2CSV.java

License:Apache License

/**
 * Initiates the processing of the XLS workbook file to CSV.
 *
 * @throws IOException/*from   w ww.jav a2s.c om*/
 * @throws OpenXML4JException
 * @throws ParserConfigurationException
 * @throws SAXException
 */
public String process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
    XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
    StylesTable styles = xssfReader.getStylesTable();
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    int index = 0;
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        String sheetName = iter.getSheetName();
        processSheet(styles, strings, new SheetToCSV(), stream);
        stream.close();
        ++index;
    }

    return csvOutput.toString();
}

From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;// w  w w  . j  a va  2s . c  o  m
    }

    final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions()
            .getValue();
    final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();

    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context);

    //Switch to 0 based index
    final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1;
    final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ",");

    final List<Integer> columnsToSkip = new ArrayList<>();

    if (sColumnsToSkip != null && sColumnsToSkip.length > 0) {
        for (String c : sColumnsToSkip) {
            try {
                //Switch to 0 based index
                columnsToSkip.add(Integer.parseInt(c) - 1);
            } catch (NumberFormatException e) {
                throw new ProcessException("Invalid column in Columns to Skip list.", e);
            }
        }
    }

    try {
        session.read(flowFile, new InputStreamCallback() {
            @Override
            public void process(InputStream inputStream) throws IOException {

                try {
                    OPCPackage pkg = OPCPackage.open(inputStream);
                    XSSFReader r = new XSSFReader(pkg);
                    ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);
                    StylesTable styles = r.getStylesTable();
                    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();

                    if (desiredSheetsDelimited != null) {
                        String[] desiredSheets = StringUtils.split(desiredSheetsDelimited,
                                DESIRED_SHEETS_DELIMITER);

                        if (desiredSheets != null) {
                            while (iter.hasNext()) {
                                InputStream sheet = iter.next();
                                String sheetName = iter.getSheetName();

                                for (int i = 0; i < desiredSheets.length; i++) {
                                    //If the sheetName is a desired one parse it
                                    if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
                                        ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(
                                                columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
                                        handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                                        break;
                                    }
                                }
                            }
                        } else {
                            getLogger().debug(
                                    "Excel document was parsed but no sheets with the specified desired names were found.");
                        }

                    } else {
                        //Get all of the sheets in the document.
                        while (iter.hasNext()) {
                            InputStream sheet = iter.next();
                            String sheetName = iter.getSheetName();

                            ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow,
                                    sheetName, formatValues, sst, styles);
                            handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                        }
                    }
                } catch (InvalidFormatException ife) {
                    getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
                    throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported",
                            ife);
                } catch (OpenXML4JException | SAXException e) {
                    getLogger().error("Error occurred while processing Excel document metadata", e);
                }
            }
        });

        session.transfer(flowFile, ORIGINAL);

    } catch (RuntimeException ex) {
        getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex);
        FlowFile failedFlowFile = session.putAttribute(flowFile,
                ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
        session.transfer(failedFlowFile, FAILURE);
    }
}

From source file:org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java

License:Apache License

/**
 * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
 *///w  w w  .j ava2 s .com
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();

    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }

    while (iter.hasNext()) {
        InputStream stream = iter.next();
        sheetParts.add(iter.getSheetPart());

        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        CommentsTable comments = iter.getSheetComments();

        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());

        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");

        processSheet(sheetExtractor, comments, styles, strings, stream);

        xhtml.endElement("tbody");
        xhtml.endElement("table");

        // Output any headers and footers
        // (Need to process the sheet to get them, so we can't
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        processShapes(iter.getShapes(), xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}

From source file:org.dhatim.fastexcel.reader.BenchmarksTest.java

License:Apache License

@Benchmark
public int streamingApachePoi() throws IOException, OpenXML4JException, SAXException {
    try (OPCPackage pkg = OPCPackage.open(openResource(FILE))) {
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
        XSSFReader reader = new XSSFReader(pkg);
        StylesTable styles = reader.getStylesTable();
        XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData();
        int sheetIndex = 0;
        while (iterator.hasNext()) {
            try (InputStream sheetStream = iterator.next()) {
                if (sheetIndex == 0) {
                    SheetContentHandler sheetHandler = new SheetContentHandler();
                    processSheet(styles, strings, sheetHandler, sheetStream);
                    assertEquals(RESULT, sheetHandler.result);
                }//  w w w  .ja va2s. com
            }
            sheetIndex++;
        }
        return sheetIndex;
    }
}

From source file:org.talend.dataprep.schema.xls.streaming.StreamingSheetTest.java

License:Open Source License

@Before
public void setUp() throws Exception {
    OPCPackage pkg = OPCPackage.open(StreamingSheetTest.class.getResourceAsStream("../dates.xlsx"));
    XSSFReader reader = new XSSFReader(pkg);

    SharedStringsTable sst = reader.getSharedStringsTable();
    StylesTable styles = reader.getStylesTable();

    Iterator<InputStream> iter = reader.getSheetsData();
    XMLEventReader parser = XMLInputFactory.newInstance().createXMLEventReader(iter.next());
    final StreamingSheetReader streamingSheetReader = new StreamingSheetReader(sst, styles, parser, 10);
    streamingSheet = new StreamingSheet("name", streamingSheetReader);
}