Example usage for org.apache.poi.xssf.eventusermodel XSSFReader getStylesTable

List of usage examples for org.apache.poi.xssf.eventusermodel XSSFReader getStylesTable

Introduction

In this page you can find the example usage for org.apache.poi.xssf.eventusermodel XSSFReader getStylesTable.

Prototype

public StylesTable getStylesTable() throws IOException, InvalidFormatException 

Source Link

Document

Opens up the Styles Table, parses it, and returns a handy object for working with cell styles

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java

License:Apache License

/**
 * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
 *///from w  w  w.  j  a  v a  2 s .  co m
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();

    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }

    while (iter.hasNext()) {
        InputStream stream = iter.next();
        sheetParts.add(iter.getSheetPart());

        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml, iter.getSheetComments());

        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());

        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");

        processSheet(sheetExtractor, styles, strings, stream);

        xhtml.endElement("tbody");
        xhtml.endElement("table");

        // Output any headers and footers
        // (Need to process the sheet to get them, so we can't
        // do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        processShapes(iter.getShapes(), xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }

    if (Config.inst().getProp(ConfigBool.ENABLE_IMAGE_OCR)) {
        TikaImageHelper helper = new TikaImageHelper(metadata);
        try {
            XSSFWorkbook workbook = new XSSFWorkbook(container);
            List<XSSFPictureData> pictures = workbook.getAllPictures();
            for (XSSFPictureData picture : pictures) {
                ByteArrayInputStream imageData = new ByteArrayInputStream(picture.getData());
                BufferedImage image = ImageIO.read(imageData);
                helper.addImage(image);
            }
            helper.addTextToHandler(xhtml);
        } catch (Exception e) {
            // TODO:
            e.printStackTrace();
        } finally {
            if (extractor != null) {
                extractor.close();
            }
            if (helper != null) {
                helper.close();
            }
        }
    }
}

From source file:net.geoprism.data.etl.excel.ExcelSheetReader.java

License:Open Source License

public void process(InputStream stream) throws Exception {
    try {//from   w ww  . jav  a2  s .  c o m
        OPCPackage pkg = OPCPackage.open(stream);

        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
        XSSFReader xssfReader = new XSSFReader(pkg);
        StylesTable styles = xssfReader.getStylesTable();
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

        while (iter.hasNext()) {
            InputStream sheet = iter.next();

            try {
                String sheetName = iter.getSheetName();

                this.handler.startSheet(sheetName);

                InputSource sheetSource = new InputSource(sheet);
                ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, this.handler, this.formatter,
                        false);

                XMLReader reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
                reader.setContentHandler(handler);
                reader.parse(sheetSource);

                this.handler.endSheet();
            } finally {
                sheet.close();
            }
        }
    } finally {
        stream.close();
    }
}

From source file:oracle.cloud.sampleapps.Excel2Csv.XLSX2CSV.java

License:Apache License

/**
 * Initiates the processing of the XLS workbook file to CSV.
 *
 * @throws IOException//from w w w  .  j av a 2  s  . c o m
 * @throws OpenXML4JException
 * @throws ParserConfigurationException
 * @throws SAXException
 */
public String process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
    XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
    StylesTable styles = xssfReader.getStylesTable();
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    int index = 0;
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        String sheetName = iter.getSheetName();
        processSheet(styles, strings, new SheetToCSV(), stream);
        stream.close();
        ++index;
    }

    return csvOutput.toString();
}

From source file:org.apache.metamodel.excel.XlsxSheetToRowsHandler.java

License:Apache License

public XlsxSheetToRowsHandler(XlsxRowCallback callback, XSSFReader xssfReader, ExcelConfiguration configuration)
        throws Exception {
    _callback = callback;/*from  w ww .j a v a2  s  .com*/
    _configuration = configuration;

    _sharedStringTable = xssfReader.getSharedStringsTable();
    _stylesTable = xssfReader.getStylesTable();

    _value = new StringBuilder();
    _style = new StyleBuilder();
    _rowValues = new ArrayList<String>();
    _styles = new ArrayList<Style>();
    _rowNumber = -1;
    _inCell = false;
    _inFormula = false;
}

From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;/*www.ja v a 2 s  . co  m*/
    }

    final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions()
            .getValue();
    final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();

    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context);

    //Switch to 0 based index
    final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1;
    final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ",");

    final List<Integer> columnsToSkip = new ArrayList<>();

    if (sColumnsToSkip != null && sColumnsToSkip.length > 0) {
        for (String c : sColumnsToSkip) {
            try {
                //Switch to 0 based index
                columnsToSkip.add(Integer.parseInt(c) - 1);
            } catch (NumberFormatException e) {
                throw new ProcessException("Invalid column in Columns to Skip list.", e);
            }
        }
    }

    try {
        session.read(flowFile, new InputStreamCallback() {
            @Override
            public void process(InputStream inputStream) throws IOException {

                try {
                    OPCPackage pkg = OPCPackage.open(inputStream);
                    XSSFReader r = new XSSFReader(pkg);
                    ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);
                    StylesTable styles = r.getStylesTable();
                    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();

                    if (desiredSheetsDelimited != null) {
                        String[] desiredSheets = StringUtils.split(desiredSheetsDelimited,
                                DESIRED_SHEETS_DELIMITER);

                        if (desiredSheets != null) {
                            while (iter.hasNext()) {
                                InputStream sheet = iter.next();
                                String sheetName = iter.getSheetName();

                                for (int i = 0; i < desiredSheets.length; i++) {
                                    //If the sheetName is a desired one parse it
                                    if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
                                        ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(
                                                columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
                                        handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                                        break;
                                    }
                                }
                            }
                        } else {
                            getLogger().debug(
                                    "Excel document was parsed but no sheets with the specified desired names were found.");
                        }

                    } else {
                        //Get all of the sheets in the document.
                        while (iter.hasNext()) {
                            InputStream sheet = iter.next();
                            String sheetName = iter.getSheetName();

                            ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow,
                                    sheetName, formatValues, sst, styles);
                            handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                        }
                    }
                } catch (InvalidFormatException ife) {
                    getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
                    throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported",
                            ife);
                } catch (OpenXML4JException | SAXException e) {
                    getLogger().error("Error occurred while processing Excel document metadata", e);
                }
            }
        });

        session.transfer(flowFile, ORIGINAL);

    } catch (RuntimeException ex) {
        getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex);
        FlowFile failedFlowFile = session.putAttribute(flowFile,
                ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
        session.transfer(failedFlowFile, FAILURE);
    }
}

From source file:org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java

License:Apache License

/**
 * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
 */// ww w  .j ava2s  .com
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();

    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }

    while (iter.hasNext()) {
        InputStream stream = iter.next();
        sheetParts.add(iter.getSheetPart());

        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        CommentsTable comments = iter.getSheetComments();

        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());

        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");

        processSheet(sheetExtractor, comments, styles, strings, stream);

        xhtml.endElement("tbody");
        xhtml.endElement("table");

        // Output any headers and footers
        // (Need to process the sheet to get them, so we can't
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        processShapes(iter.getShapes(), xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}

From source file:org.dhatim.fastexcel.reader.BenchmarksTest.java

License:Apache License

@Benchmark
public int streamingApachePoi() throws IOException, OpenXML4JException, SAXException {
    try (OPCPackage pkg = OPCPackage.open(openResource(FILE))) {
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
        XSSFReader reader = new XSSFReader(pkg);
        StylesTable styles = reader.getStylesTable();
        XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData();
        int sheetIndex = 0;
        while (iterator.hasNext()) {
            try (InputStream sheetStream = iterator.next()) {
                if (sheetIndex == 0) {
                    SheetContentHandler sheetHandler = new SheetContentHandler();
                    processSheet(styles, strings, sheetHandler, sheetStream);
                    assertEquals(RESULT, sheetHandler.result);
                }/*from w  w  w  .j ava  2  s .  c  om*/
            }
            sheetIndex++;
        }
        return sheetIndex;
    }
}

From source file:org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheet.java

License:Apache License

public StaxPoiSheet(XSSFReader reader, String sheetName, String sheetID)
        throws InvalidFormatException, IOException, XMLStreamException {
    this.sheetName = sheetName;
    xssfReader = reader;//from   w  ww  . j  a  v  a2s  . c  o  m
    sheetId = sheetID;
    sst = reader.getSharedStringsTable();
    styles = reader.getStylesTable();
    sheetStream = reader.getSheet(sheetID);
    XMLInputFactory factory = XMLInputFactory.newInstance();
    sheetReader = factory.createXMLStreamReader(sheetStream);
    headerRow = new ArrayList<String>();
    while (sheetReader.hasNext()) {
        int event = sheetReader.next();
        if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("dimension")) {
            String dim = sheetReader.getAttributeValue(null, "ref");
            // empty sheets have dimension with no range
            if (StringUtils.contains(dim, ':')) {
                dim = dim.split(":")[1];
                numRows = StaxUtil.extractRowNumber(dim);
                numCols = StaxUtil.extractColumnNumber(dim);
            } else {
                maxColsNumberDefined = false;
                numCols = StaxUtil.MAX_COLUMNS;
                numRows = StaxUtil.MAX_ROWS;
            }
        }
        if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("row")) {
            currentRow = Integer.parseInt(sheetReader.getAttributeValue(null, "r"));
            firstRow = currentRow;

            // calculate the number of columns in the header row
            while (sheetReader.hasNext()) {
                event = sheetReader.next();
                if (event == XMLStreamConstants.END_ELEMENT && sheetReader.getLocalName().equals("row")) {
                    // if the row has ended, break the inner while loop
                    break;
                }
                if (event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals("c")) {
                    String attributeValue = sheetReader.getAttributeValue(null, "t");
                    if (attributeValue != null) {
                        if (attributeValue.equals("s")) {
                            // if the type of the cell is string, we continue
                            while (sheetReader.hasNext()) {
                                event = sheetReader.next();
                                if (event == XMLStreamConstants.START_ELEMENT
                                        && sheetReader.getLocalName().equals("v")) {
                                    int idx = Integer.parseInt(sheetReader.getElementText());
                                    String content = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                                    headerRow.add(content);
                                    break;
                                }
                            }
                        } else if (attributeValue.equals("inlineStr")) {
                            // if the type of the cell is string, we continue
                            while (sheetReader.hasNext()) {
                                event = sheetReader.next();
                                if (event == XMLStreamConstants.START_ELEMENT
                                        && sheetReader.getLocalName().equals("is")) {
                                    while (sheetReader.hasNext()) {
                                        event = sheetReader.next();
                                        if (event == XMLStreamConstants.CHARACTERS) {
                                            String content = new XSSFRichTextString(sheetReader.getText())
                                                    .toString();
                                            headerRow.add(content);
                                            break;
                                        }
                                    }
                                    break;
                                }
                            }
                        }
                    } else {
                        break;
                    }
                }
            }
            // we have parsed the header row
            break;
        }
    }
}

From source file:org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheetTest.java

License:Apache License

private XSSFReader mockXSSFReader(final String sheetId, final String sheetContent, final SharedStringsTable sst,
        final StylesTable styles) throws Exception {
    XSSFReader reader = mock(XSSFReader.class);
    when(reader.getSharedStringsTable()).thenReturn(sst);
    when(reader.getStylesTable()).thenReturn(styles);
    when(reader.getSheet(sheetId)).thenAnswer(new Answer<InputStream>() {
        public InputStream answer(InvocationOnMock invocation) throws Throwable {
            return IOUtils.toInputStream(sheetContent, "UTF-8");
        }//from  w  w w .  j  a va2 s  .  c  om
    });
    return reader;
}

From source file:org.talend.dataprep.schema.xls.streaming.StreamingSheetTest.java

License:Open Source License

@Before
public void setUp() throws Exception {
    OPCPackage pkg = OPCPackage.open(StreamingSheetTest.class.getResourceAsStream("../dates.xlsx"));
    XSSFReader reader = new XSSFReader(pkg);

    SharedStringsTable sst = reader.getSharedStringsTable();
    StylesTable styles = reader.getStylesTable();

    Iterator<InputStream> iter = reader.getSheetsData();
    XMLEventReader parser = XMLInputFactory.newInstance().createXMLEventReader(iter.next());
    final StreamingSheetReader streamingSheetReader = new StreamingSheetReader(sst, styles, parser, 10);
    streamingSheet = new StreamingSheet("name", streamingSheetReader);
}