Example usage for org.apache.poi.ss.usermodel DataFormatter DataFormatter

Introduction

In this page you can find the example usage for org.apache.poi.ss.usermodel DataFormatter DataFormatter.

Prototype

public DataFormatter()

Source Link

Document

Creates a formatter using the Locale#getDefault() default locale .

Usage

From source file:net.sf.dvstar.swirl.desktopdbf.data.ExcelTableModel.java

License:Open Source License

public ExcelTableModel(Workbook workbook) {
    this.workbook = workbook;
    this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator();
    this.formatter = new DataFormatter();

    convertToCSV();//from ww w.ja v a2s  .  co m
}

From source file:net.sf.dvstar.swirl.desktopdbf.data.XLSPanelLoader.java

License:Open Source License

/**
 * Open an Excel workbook ready for conversion.
 *
 * @param file An instance of the File class that encapsulates a handle
 *        to a valid Excel workbook. Note that the workbook can be in
 *        either binary (.xls) or SpreadsheetML (.xlsx) format.
 * @throws java.io.FileNotFoundException Thrown if the file cannot be located.
 * @throws java.io.IOException Thrown if a problem occurs in the file system.
 * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown
 *         if invalid xml is found whilst parsing an input SpreadsheetML
 *         file.// ww  w .  j a v  a  2  s . c om
 */
private void openWorkbook(File file) throws FileNotFoundException, IOException, InvalidFormatException {
    FileInputStream fis = null;
    try {
        System.out.println("Opening workbook [" + file.getName() + "]");

        fis = new FileInputStream(file);

        // Open the workbook and then create the FormulaEvaluator and
        // DataFormatter instances that will be needed to, respectively,
        // force evaluation of forumlae found in cells and create a
        // formatted String encapsulating the cells contents.
        this.workbook = WorkbookFactory.create(fis);
        this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator();
        this.formatter = new DataFormatter();
    } finally {
        if (fis != null) {
            fis.close();
        }
    }
}

From source file:net.sf.taverna.t2.activities.spreadsheet.ExcelSpreadsheetReader.java

License:Open Source License

public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows,
        SpreadsheetRowProcessor rowProcessor) throws SpreadsheetReadException {
    Workbook workbook;/*ww w.j  a v  a 2 s.c  om*/
    try {
        workbook = WorkbookFactory.create(inputStream);
    } catch (InvalidFormatException e) {
        throw new SpreadsheetReadException("The file does not have a compatible spreadsheet format", e);
    } catch (IOException e) {
        throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
    } catch (IllegalArgumentException e) {
        throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
    }

    DataFormatter dataFormatter = new DataFormatter();

    workbook.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK);
    Sheet sheet = workbook.getSheetAt(0);

    if (rowRange.getEnd() < 0) {
        rowRange.setEnd(sheet.getLastRowNum());
        logger.debug("No end of row range specified, setting to " + rowRange.getEnd());
    }

    SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();

    for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) {
        boolean blankRow = true;
        if (rowRange.contains(rowIndex)) {
            Row row = sheet.getRow(rowIndex);
            for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
                if (columnRange.contains(columnIndex)) {
                    String value = null;
                    if (row != null) {
                        Cell cell = row.getCell(columnIndex);
                        if (cell != null) {
                            value = getCellValue(cell, dataFormatter);
                        }
                    }
                    if (value != null) {
                        blankRow = false;
                    }
                    currentDataRow.put(columnIndex, value);
                    if (columnIndex == columnRange.getEnd()) {
                        if (!ignoreBlankRows || !blankRow) {
                            rowProcessor.processRow(rowIndex, currentDataRow);
                        }
                        currentDataRow = new TreeMap<Integer, String>();
                    }
                }
            }
        }
    }

}

From source file:nl.architolk.ldt.processors.ExcelConverter.java

License:Open Source License

public void generateData(PipelineContext context, ContentHandler contentHandler) throws SAXException {

    try {//ww w . j a v a  2 s  . c  o m
        // Read binary content of Excel file
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        Base64XMLReceiver base64ContentHandler = new Base64XMLReceiver(os);
        readInputAsSAX(context, INPUT_DATA, base64ContentHandler);
        final byte[] fileContent = os.toByteArray();
        final java.io.ByteArrayInputStream bais = new ByteArrayInputStream(fileContent);

        // Create workbook
        XSSFWorkbook workbook = new XSSFWorkbook(bais);
        DataFormatter formatter = new DataFormatter();
        XSSFFormulaEvaluator evaluator = new XSSFFormulaEvaluator(workbook);

        contentHandler.startDocument();
        contentHandler.startElement("", "workbook", "workbook", new AttributesImpl());

        for (int s = 0; s < workbook.getNumberOfSheets(); s++) {
            XSSFSheet sheet = workbook.getSheetAt(s);
            AttributesImpl sheetAttr = new AttributesImpl();
            sheetAttr.addAttribute("", "name", "name", "CDATA", sheet.getSheetName());
            contentHandler.startElement("", "sheet", "sheet", sheetAttr);
            for (int r = 0; r <= sheet.getLastRowNum(); r++) {
                XSSFRow row = sheet.getRow(r);
                if (row != null) {
                    AttributesImpl rowAttr = new AttributesImpl();
                    rowAttr.addAttribute("", "id", "id", "CDATA", Integer.toString(r));
                    contentHandler.startElement("", "row", "row", rowAttr);
                    for (int c = 0; c < row.getLastCellNum(); c++) {
                        XSSFCell cell = row.getCell(c);
                        if (cell != null) {
                            try {
                                String cellvalue = formatter.formatCellValue(cell, evaluator);
                                if (cellvalue != "") {
                                    AttributesImpl columnAttr = new AttributesImpl();
                                    columnAttr.addAttribute("", "id", "id", "CDATA",
                                            Integer.toString(cell.getColumnIndex()));
                                    contentHandler.startElement("", "column", "column", columnAttr);
                                    contentHandler.characters(cellvalue.toCharArray(), 0, cellvalue.length());
                                    contentHandler.endElement("", "column", "column");
                                }
                            } catch (Exception e) {
                            }
                        }
                    }
                    contentHandler.endElement("", "row", "row");
                }
            }
            contentHandler.endElement("", "sheet", "sheet");
        }

        contentHandler.endElement("", "workbook", "workbook");
        contentHandler.endDocument();

    } catch (IOException e) {
        throw new OXFException(e);
    }
}

From source file:org.alfresco.repo.web.scripts.person.UserCSVUploadPost.java

License:Open Source License

private void processSpreadsheetUpload(Workbook wb, List<Map<QName, String>> users) throws IOException {
    if (wb.getNumberOfSheets() > 1) {
        logger.info("Uploaded Excel file has " + wb.getNumberOfSheets()
                + " sheets, ignoring  all except the first one");
    }/*from w  w w.  j a v a2s  . c o  m*/

    int firstRow = 0;
    Sheet s = wb.getSheetAt(0);
    DataFormatter df = new DataFormatter();

    String[][] data = new String[s.getLastRowNum() + 1][];

    // If there is a heading freezepane row, skip it
    PaneInformation pane = s.getPaneInformation();
    if (pane != null && pane.isFreezePane() && pane.getHorizontalSplitTopRow() > 0) {
        firstRow = pane.getHorizontalSplitTopRow();
        logger.debug("Skipping excel freeze header of " + firstRow + " rows");
    }

    // Process each row in turn, getting columns up to our limit
    for (int row = firstRow; row <= s.getLastRowNum(); row++) {
        Row r = s.getRow(row);
        if (r != null) {
            String[] d = new String[COLUMNS.length];
            for (int cn = 0; cn < COLUMNS.length; cn++) {
                Cell cell = r.getCell(cn);
                if (cell != null && cell.getCellType() != Cell.CELL_TYPE_BLANK) {
                    d[cn] = df.formatCellValue(cell);
                }
            }
            data[row] = d;
        }
    }

    // Handle the contents
    processSpreadsheetUpload(data, users);
}

From source file:org.apache.metamodel.excel.XlsxSheetToRowsHandler.java

License:Apache License

private DataFormatter getDataFormatter() {
    return new DataFormatter();
}

From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java

License:Apache License

/**
 * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
 *
 * @param session//from   www.j a v a  2  s  . co m
 *  The NiFi ProcessSession instance for the current invocation.
 */
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF,
        final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat)
        throws IOException {

    FlowFile ff = session.create();
    try {
        final DataFormatter formatter = new DataFormatter();
        final InputSource sheetSource = new InputSource(sheetInputStream);

        final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);

        final XMLReader parser = SAXHelper.newXMLReader();

        //If Value Formatting is set to false then don't pass in the styles table.
        // This will cause the XSSF Handler to return the raw value instead of the formatted one.
        final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null;

        final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null,
                readConfig.getSharedStringsTable(), sheetHandler, formatter, false);

        parser.setContentHandler(handler);

        ff = session.write(ff, new OutputStreamCallback() {
            @Override
            public void process(OutputStream out) throws IOException {
                PrintStream outPrint = new PrintStream(out);
                sheetHandler.setOutput(outPrint);

                try {
                    parser.parse(sheetSource);

                    sheetInputStream.close();

                    sheetHandler.close();
                    outPrint.close();
                } catch (SAXException se) {
                    getLogger().error("Error occurred while processing Excel sheet {}",
                            new Object[] { readConfig.getSheetName() }, se);
                }
            }
        });

        ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
        ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString());

        if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME,
                    originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
        } else {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
        }

        //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
        ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(),
                updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
                        ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
        ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);

        session.transfer(ff, SUCCESS);

    } catch (SAXException | ParserConfigurationException saxE) {
        getLogger().error("Failed to create instance of Parser.", saxE);
        ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
        session.transfer(ff, FAILURE);
    } finally {
        sheetInputStream.close();
    }
}

From source file:org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java

License:Apache License

public XSSFExcelExtractorDecorator(ParseContext context, XSSFEventBasedExcelExtractor extractor,
        Locale locale) {//from   w  ww.  j  a va  2  s.  c o  m
    super(context, extractor);

    this.extractor = extractor;
    extractor.setFormulasNotResults(false);
    extractor.setLocale(locale);

    if (locale == null) {
        formatter = new DataFormatter();
    } else {
        formatter = new DataFormatter(locale);
    }
}

From source file:org.corpus_tools.peppermodules.spreadsheet.Spreadsheet2SaltMapper.java

License:Apache License

/**
 * Add annotations to the salt graph/*from  ww w  .j  a v  a 2  s . c o m*/
 * 
 * @param annoPrimRelations
 * @param corpusSheet
 * @param mergedCells
 * @param layerTierCouples
 * @param progressProcessedNumberOfColumns
 */
private void setAnnotations(HashMap<Integer, Integer> annoPrimRelations, Sheet corpusSheet,
        Table<Integer, Integer, CellRangeAddress> mergedCells, Map<String, SLayer> layerTierCouples,
        int progressProcessedNumberOfColumns) {
    if (!annoPrimRelations.isEmpty()) {
        Row headerRow = corpusSheet.getRow(0);
        DataFormatter formatter = new DataFormatter();
        int progressTotalNumberOfColumns = annoPrimRelations.keySet().size();
        for (int annoTier : annoPrimRelations.keySet()) {

            SSpan annoSpan = null;
            int currAnno = 1;

            while (currAnno < corpusSheet.getPhysicalNumberOfRows()) {
                String annoName = headerRow.getCell(annoTier).toString();
                Row row = corpusSheet.getRow(currAnno);
                Cell annoCell = row.getCell(annoTier);

                if (annoCell != null && !annoCell.toString().isEmpty()) {
                    String annoText = "";
                    annoText = formatter.formatCellValue(annoCell);

                    int annoStart = currAnno - 1;
                    int annoEnd = getLastCell(annoCell, mergedCells);
                    DataSourceSequence<Integer> sequence = new DataSourceSequence<Integer>();
                    sequence.setStart(annoStart);
                    sequence.setEnd(annoEnd);
                    sequence.setDataSource(getDocument().getDocumentGraph().getTimeline());

                    List<SToken> sTokens = getDocument().getDocumentGraph().getTokensBySequence(sequence);

                    List<SToken> tokenOfSpan = new ArrayList<>();

                    if (sTokens == null) {
                        SpreadsheetImporter.logger.error("Segmentation error: The segmentation of the tier \""
                                + headerRow.getCell(annoTier).toString() + "\" in the document: \""
                                + getResourceURI().lastSegment() + "\" in line: " + currAnno
                                + " does not match to its primary text: \""
                                + headerRow.getCell(annoPrimRelations.get(annoTier)).toString() + "\".");
                    } else {
                        for (SToken tok : sTokens) {
                            STextualDS textualDS = getTextualDSForNode(tok, getDocument().getDocumentGraph());
                            if (textualDS.getName()
                                    .equals(headerRow.getCell(annoPrimRelations.get(annoTier)).toString())) {
                                tokenOfSpan.add(tok);
                            }
                        }
                    }

                    annoSpan = getDocument().getDocumentGraph().createSpan(tokenOfSpan);

                    if (annoSpan != null && annoName != null && !annoName.isEmpty()) {
                        // remove primary text info of annotation if given
                        if (annoName.matches(".+\\[.+\\]")) {
                            annoName = annoName.split("\\[")[0];
                        }
                        annoSpan.createAnnotation(null, annoName, annoText);
                        annoSpan.setName(annoName);
                    }
                }

                if (getProps().getLayer() != null && annoSpan != null) {

                    if (layerTierCouples.size() > 0) {
                        if (layerTierCouples.get(annoName) != null) {
                            SLayer sLayer = layerTierCouples.get(annoName);
                            getDocument().getDocumentGraph().addLayer(sLayer);
                            sLayer.addNode(annoSpan);
                        }
                    }
                }
                currAnno++;
            } // end for each row of annotation

            progressProcessedNumberOfColumns++;
            setProgress((double) progressProcessedNumberOfColumns / (double) progressTotalNumberOfColumns);
        } // end for each annotation layer
    } else {
        SpreadsheetImporter.logger.warn("No annotations except for primary texts found in document \""
                + getResourceURI().lastSegment() + "\".");
    }
}

From source file:org.corpus_tools.peppermodules.spreadsheet.Spreadsheet2SaltMapper.java

License:Apache License

/**
 * Set the primary text of each document
 * /*from w  w  w.  j a  v a 2 s.  c  om*/
 * @param column
 * @param primText
 * @param corpusSheet
 * @param primTextPos
 * @param annoPrimRelations
 * @param mergedCells
 * @param layerTierCouples
 * @return
 */
private int setPrimText(Sheet corpusSheet, List<Integer> primTextPos,
        HashMap<Integer, Integer> annoPrimRelations, Row headerRow,
        Table<Integer, Integer, CellRangeAddress> mergedCells, Map<String, SLayer> layerTierCouples) {
    // initialize with number of token we have to create
    int progressTotalNumberOfColumns = primTextPos.size();
    // add each annotation to this number

    int progressProcessedNumberOfColumns = 0;

    // use formater to ensure that e.g. integers will not be converted into
    // decimals
    DataFormatter formatter = new DataFormatter();
    // save all tokens of the current primary text
    List<SToken> currentTokList = new ArrayList<>();
    // save all tokens of the current primary text
    for (int primText : primTextPos) {

        // initialize primaryText
        STextualDS primaryText = SaltFactory.createSTextualDS();
        StringBuilder currentText = new StringBuilder();

        if (headerRow.getCell(primText) != null) {
            primaryText.setName(headerRow.getCell(primText).toString());
        }
        getDocument().getDocumentGraph().addNode(primaryText);

        int offset = currentText.length();

        SToken lastTok = null;

        // start with the second row of the table, since the first row holds
        // the name of each tier
        int currRow = 1;
        while (currRow < corpusSheet.getPhysicalNumberOfRows()) {
            // iterate through all rows of the given corpus sheet

            Row row = corpusSheet.getRow(currRow);
            Cell primCell = row.getCell(primText);
            SToken currTok = null;
            int endCell = currRow;

            String text = null;
            if (primCell != null && !primCell.toString().isEmpty()) {
                text = formatter.formatCellValue(primCell);

            } else if (getProps().getIncludeEmptyPrimCells()) {
                text = "";

            }
            if (text != null) {
                int start = offset;
                int end = start + text.length();
                offset += text.length();
                currentText.append(text);

                currTok = getDocument().getDocumentGraph().createToken(primaryText, start, end);

                if (primCell != null) {
                    endCell = getLastCell(primCell, mergedCells);
                }
            }

            if (currTok != null) {
                if (lastTok != null && getProps().getAddOrderRelation()) {
                    addOrderRelation(lastTok, currTok, headerRow.getCell(primText).toString());
                }
                // add timeline relation
                addTimelineRelation(currTok, currRow, endCell, corpusSheet);

                // remember all SToken
                currentTokList.add(currTok);

                // insert space between tokens
                if (text != null && (currRow != corpusSheet.getLastRowNum())) {
                    currentText.append(" ");
                    offset++;
                }
            }

            if (currTok != null) {
                lastTok = currTok;
            }
            currRow++;
        } // end for each token row
        primaryText.setText(currentText.toString());

        progressProcessedNumberOfColumns++;
        setProgress((double) progressProcessedNumberOfColumns / (double) progressTotalNumberOfColumns);

        if (getProps().getLayer() != null) {
            if (currentTokList != null && layerTierCouples.size() > 0) {
                if (layerTierCouples.get(primaryText.getName()) != null) {
                    SLayer sLayer = layerTierCouples.get(primaryText.getName());
                    getDocument().getDocumentGraph().addLayer(sLayer);
                    for (SToken t : currentTokList) {
                        sLayer.addNode(t);
                    }
                }
            }
        }
    } // end for each primTextPos
    return progressProcessedNumberOfColumns;
}