Example usage for org.apache.poi.ss.usermodel DataFormatter DataFormatter

List of usage examples for org.apache.poi.ss.usermodel DataFormatter DataFormatter

Introduction

In this page you can find the example usage for org.apache.poi.ss.usermodel DataFormatter DataFormatter.

Prototype

public DataFormatter() 

Source Link

Document

Creates a formatter using the Locale#getDefault() default locale .

Usage

From source file:net.sf.dvstar.swirl.desktopdbf.data.ExcelTableModel.java

License:Open Source License

public ExcelTableModel(Workbook workbook) {
    this.workbook = workbook;
    this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator();
    this.formatter = new DataFormatter();

    convertToCSV();//from ww w.ja v a2s  .  co m
}

From source file:net.sf.dvstar.swirl.desktopdbf.data.XLSPanelLoader.java

License:Open Source License

/**
 * Open an Excel workbook ready for conversion.
 *
 * @param file An instance of the File class that encapsulates a handle
 *        to a valid Excel workbook. Note that the workbook can be in
 *        either binary (.xls) or SpreadsheetML (.xlsx) format.
 * @throws java.io.FileNotFoundException Thrown if the file cannot be located.
 * @throws java.io.IOException Thrown if a problem occurs in the file system.
 * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown
 *         if invalid xml is found whilst parsing an input SpreadsheetML
 *         file.// ww  w .  j a v  a  2  s . c om
 */
private void openWorkbook(File file) throws FileNotFoundException, IOException, InvalidFormatException {
    FileInputStream fis = null;
    try {
        System.out.println("Opening workbook [" + file.getName() + "]");

        fis = new FileInputStream(file);

        // Open the workbook and then create the FormulaEvaluator and
        // DataFormatter instances that will be needed to, respectively,
        // force evaluation of forumlae found in cells and create a
        // formatted String encapsulating the cells contents.
        this.workbook = WorkbookFactory.create(fis);
        this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator();
        this.formatter = new DataFormatter();
    } finally {
        if (fis != null) {
            fis.close();
        }
    }
}

From source file:net.sf.taverna.t2.activities.spreadsheet.ExcelSpreadsheetReader.java

License:Open Source License

public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows,
        SpreadsheetRowProcessor rowProcessor) throws SpreadsheetReadException {
    Workbook workbook;/*ww w.j  a v  a 2 s.c  om*/
    try {
        workbook = WorkbookFactory.create(inputStream);
    } catch (InvalidFormatException e) {
        throw new SpreadsheetReadException("The file does not have a compatible spreadsheet format", e);
    } catch (IOException e) {
        throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
    } catch (IllegalArgumentException e) {
        throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
    }

    DataFormatter dataFormatter = new DataFormatter();

    workbook.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK);
    Sheet sheet = workbook.getSheetAt(0);

    if (rowRange.getEnd() < 0) {
        rowRange.setEnd(sheet.getLastRowNum());
        logger.debug("No end of row range specified, setting to " + rowRange.getEnd());
    }

    SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();

    for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) {
        boolean blankRow = true;
        if (rowRange.contains(rowIndex)) {
            Row row = sheet.getRow(rowIndex);
            for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
                if (columnRange.contains(columnIndex)) {
                    String value = null;
                    if (row != null) {
                        Cell cell = row.getCell(columnIndex);
                        if (cell != null) {
                            value = getCellValue(cell, dataFormatter);
                        }
                    }
                    if (value != null) {
                        blankRow = false;
                    }
                    currentDataRow.put(columnIndex, value);
                    if (columnIndex == columnRange.getEnd()) {
                        if (!ignoreBlankRows || !blankRow) {
                            rowProcessor.processRow(rowIndex, currentDataRow);
                        }
                        currentDataRow = new TreeMap<Integer, String>();
                    }
                }
            }
        }
    }

}

From source file:nl.architolk.ldt.processors.ExcelConverter.java

License:Open Source License

public void generateData(PipelineContext context, ContentHandler contentHandler) throws SAXException {

    try {//ww w . j a v a  2 s  . c  o m
        // Read binary content of Excel file
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        Base64XMLReceiver base64ContentHandler = new Base64XMLReceiver(os);
        readInputAsSAX(context, INPUT_DATA, base64ContentHandler);
        final byte[] fileContent = os.toByteArray();
        final java.io.ByteArrayInputStream bais = new ByteArrayInputStream(fileContent);

        // Create workbook
        XSSFWorkbook workbook = new XSSFWorkbook(bais);
        DataFormatter formatter = new DataFormatter();
        XSSFFormulaEvaluator evaluator = new XSSFFormulaEvaluator(workbook);

        contentHandler.startDocument();
        contentHandler.startElement("", "workbook", "workbook", new AttributesImpl());

        for (int s = 0; s < workbook.getNumberOfSheets(); s++) {
            XSSFSheet sheet = workbook.getSheetAt(s);
            AttributesImpl sheetAttr = new AttributesImpl();
            sheetAttr.addAttribute("", "name", "name", "CDATA", sheet.getSheetName());
            contentHandler.startElement("", "sheet", "sheet", sheetAttr);
            for (int r = 0; r <= sheet.getLastRowNum(); r++) {
                XSSFRow row = sheet.getRow(r);
                if (row != null) {
                    AttributesImpl rowAttr = new AttributesImpl();
                    rowAttr.addAttribute("", "id", "id", "CDATA", Integer.toString(r));
                    contentHandler.startElement("", "row", "row", rowAttr);
                    for (int c = 0; c < row.getLastCellNum(); c++) {
                        XSSFCell cell = row.getCell(c);
                        if (cell != null) {
                            try {
                                String cellvalue = formatter.formatCellValue(cell, evaluator);
                                if (cellvalue != "") {
                                    AttributesImpl columnAttr = new AttributesImpl();
                                    columnAttr.addAttribute("", "id", "id", "CDATA",
                                            Integer.toString(cell.getColumnIndex()));
                                    contentHandler.startElement("", "column", "column", columnAttr);
                                    contentHandler.characters(cellvalue.toCharArray(), 0, cellvalue.length());
                                    contentHandler.endElement("", "column", "column");
                                }
                            } catch (Exception e) {
                            }
                        }
                    }
                    contentHandler.endElement("", "row", "row");
                }
            }
            contentHandler.endElement("", "sheet", "sheet");
        }

        contentHandler.endElement("", "workbook", "workbook");
        contentHandler.endDocument();

    } catch (IOException e) {
        throw new OXFException(e);
    }
}

From source file:org.alfresco.repo.web.scripts.person.UserCSVUploadPost.java

License:Open Source License

private void processSpreadsheetUpload(Workbook wb, List<Map<QName, String>> users) throws IOException {
    if (wb.getNumberOfSheets() > 1) {
        logger.info("Uploaded Excel file has " + wb.getNumberOfSheets()
                + " sheets, ignoring  all except the first one");
    }/*from w  w w.  j a v a2s  . c o  m*/

    int firstRow = 0;
    Sheet s = wb.getSheetAt(0);
    DataFormatter df = new DataFormatter();

    String[][] data = new String[s.getLastRowNum() + 1][];

    // If there is a heading freezepane row, skip it
    PaneInformation pane = s.getPaneInformation();
    if (pane != null && pane.isFreezePane() && pane.getHorizontalSplitTopRow() > 0) {
        firstRow = pane.getHorizontalSplitTopRow();
        logger.debug("Skipping excel freeze header of " + firstRow + " rows");
    }

    // Process each row in turn, getting columns up to our limit
    for (int row = firstRow; row <= s.getLastRowNum(); row++) {
        Row r = s.getRow(row);
        if (r != null) {
            String[] d = new String[COLUMNS.length];
            for (int cn = 0; cn < COLUMNS.length; cn++) {
                Cell cell = r.getCell(cn);
                if (cell != null && cell.getCellType() != Cell.CELL_TYPE_BLANK) {
                    d[cn] = df.formatCellValue(cell);
                }
            }
            data[row] = d;
        }
    }

    // Handle the contents
    processSpreadsheetUpload(data, users);
}

From source file:org.apache.metamodel.excel.XlsxSheetToRowsHandler.java

License:Apache License

private DataFormatter getDataFormatter() {
    return new DataFormatter();
}

From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java

License:Apache License

/**
 * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
 *
 * @param session//from   www.j a v a  2  s  . co m
 *  The NiFi ProcessSession instance for the current invocation.
 */
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF,
        final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat)
        throws IOException {

    FlowFile ff = session.create();
    try {
        final DataFormatter formatter = new DataFormatter();
        final InputSource sheetSource = new InputSource(sheetInputStream);

        final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);

        final XMLReader parser = SAXHelper.newXMLReader();

        //If Value Formatting is set to false then don't pass in the styles table.
        // This will cause the XSSF Handler to return the raw value instead of the formatted one.
        final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null;

        final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null,
                readConfig.getSharedStringsTable(), sheetHandler, formatter, false);

        parser.setContentHandler(handler);

        ff = session.write(ff, new OutputStreamCallback() {
            @Override
            public void process(OutputStream out) throws IOException {
                PrintStream outPrint = new PrintStream(out);
                sheetHandler.setOutput(outPrint);

                try {
                    parser.parse(sheetSource);

                    sheetInputStream.close();

                    sheetHandler.close();
                    outPrint.close();
                } catch (SAXException se) {
                    getLogger().error("Error occurred while processing Excel sheet {}",
                            new Object[] { readConfig.getSheetName() }, se);
                }
            }
        });

        ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
        ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString());

        if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME,
                    originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
        } else {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
        }

        //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
        ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(),
                updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
                        ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
        ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);

        session.transfer(ff, SUCCESS);

    } catch (SAXException | ParserConfigurationException saxE) {
        getLogger().error("Failed to create instance of Parser.", saxE);
        ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
        session.transfer(ff, FAILURE);
    } finally {
        sheetInputStream.close();
    }
}

From source file:org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java

License:Apache License

public XSSFExcelExtractorDecorator(ParseContext context, XSSFEventBasedExcelExtractor extractor,
        Locale locale) {//from   w  ww.  j  a va  2  s.  c o  m
    super(context, extractor);

    this.extractor = extractor;
    extractor.setFormulasNotResults(false);
    extractor.setLocale(locale);

    if (locale == null) {
        formatter = new DataFormatter();
    } else {
        formatter = new DataFormatter(locale);
    }
}

From source file:org.corpus_tools.peppermodules.spreadsheet.Spreadsheet2SaltMapper.java

License:Apache License

/**
 * Add annotations to the salt graph/*from  ww w  .j  a v  a 2  s . c o m*/
 * 
 * @param annoPrimRelations
 * @param corpusSheet
 * @param mergedCells
 * @param layerTierCouples
 * @param progressProcessedNumberOfColumns
 */
private void setAnnotations(HashMap<Integer, Integer> annoPrimRelations, Sheet corpusSheet,
        Table<Integer, Integer, CellRangeAddress> mergedCells, Map<String, SLayer> layerTierCouples,
        int progressProcessedNumberOfColumns) {
    if (!annoPrimRelations.isEmpty()) {
        Row headerRow = corpusSheet.getRow(0);
        DataFormatter formatter = new DataFormatter();
        int progressTotalNumberOfColumns = annoPrimRelations.keySet().size();
        for (int annoTier : annoPrimRelations.keySet()) {

            SSpan annoSpan = null;
            int currAnno = 1;

            while (currAnno < corpusSheet.getPhysicalNumberOfRows()) {
                String annoName = headerRow.getCell(annoTier).toString();
                Row row = corpusSheet.getRow(currAnno);
                Cell annoCell = row.getCell(annoTier);

                if (annoCell != null && !annoCell.toString().isEmpty()) {
                    String annoText = "";
                    annoText = formatter.formatCellValue(annoCell);

                    int annoStart = currAnno - 1;
                    int annoEnd = getLastCell(annoCell, mergedCells);
                    DataSourceSequence<Integer> sequence = new DataSourceSequence<Integer>();
                    sequence.setStart(annoStart);
                    sequence.setEnd(annoEnd);
                    sequence.setDataSource(getDocument().getDocumentGraph().getTimeline());

                    List<SToken> sTokens = getDocument().getDocumentGraph().getTokensBySequence(sequence);

                    List<SToken> tokenOfSpan = new ArrayList<>();

                    if (sTokens == null) {
                        SpreadsheetImporter.logger.error("Segmentation error: The segmentation of the tier \""
                                + headerRow.getCell(annoTier).toString() + "\" in the document: \""
                                + getResourceURI().lastSegment() + "\" in line: " + currAnno
                                + " does not match to its primary text: \""
                                + headerRow.getCell(annoPrimRelations.get(annoTier)).toString() + "\".");
                    } else {
                        for (SToken tok : sTokens) {
                            STextualDS textualDS = getTextualDSForNode(tok, getDocument().getDocumentGraph());
                            if (textualDS.getName()
                                    .equals(headerRow.getCell(annoPrimRelations.get(annoTier)).toString())) {
                                tokenOfSpan.add(tok);
                            }
                        }
                    }

                    annoSpan = getDocument().getDocumentGraph().createSpan(tokenOfSpan);

                    if (annoSpan != null && annoName != null && !annoName.isEmpty()) {
                        // remove primary text info of annotation if given
                        if (annoName.matches(".+\\[.+\\]")) {
                            annoName = annoName.split("\\[")[0];
                        }
                        annoSpan.createAnnotation(null, annoName, annoText);
                        annoSpan.setName(annoName);
                    }
                }

                if (getProps().getLayer() != null && annoSpan != null) {

                    if (layerTierCouples.size() > 0) {
                        if (layerTierCouples.get(annoName) != null) {
                            SLayer sLayer = layerTierCouples.get(annoName);
                            getDocument().getDocumentGraph().addLayer(sLayer);
                            sLayer.addNode(annoSpan);
                        }
                    }
                }
                currAnno++;
            } // end for each row of annotation

            progressProcessedNumberOfColumns++;
            setProgress((double) progressProcessedNumberOfColumns / (double) progressTotalNumberOfColumns);
        } // end for each annotation layer
    } else {
        SpreadsheetImporter.logger.warn("No annotations except for primary texts found in document \""
                + getResourceURI().lastSegment() + "\".");
    }
}

From source file:org.corpus_tools.peppermodules.spreadsheet.Spreadsheet2SaltMapper.java

License:Apache License

/**
 * Set the primary text of each document
 * /*from w  w  w.  j a  v a 2 s.  c  om*/
 * @param column
 * @param primText
 * @param corpusSheet
 * @param primTextPos
 * @param annoPrimRelations
 * @param mergedCells
 * @param layerTierCouples
 * @return
 */
private int setPrimText(Sheet corpusSheet, List<Integer> primTextPos,
        HashMap<Integer, Integer> annoPrimRelations, Row headerRow,
        Table<Integer, Integer, CellRangeAddress> mergedCells, Map<String, SLayer> layerTierCouples) {
    // initialize with number of token we have to create
    int progressTotalNumberOfColumns = primTextPos.size();
    // add each annotation to this number

    int progressProcessedNumberOfColumns = 0;

    // use formater to ensure that e.g. integers will not be converted into
    // decimals
    DataFormatter formatter = new DataFormatter();
    // save all tokens of the current primary text
    List<SToken> currentTokList = new ArrayList<>();
    // save all tokens of the current primary text
    for (int primText : primTextPos) {

        // initialize primaryText
        STextualDS primaryText = SaltFactory.createSTextualDS();
        StringBuilder currentText = new StringBuilder();

        if (headerRow.getCell(primText) != null) {
            primaryText.setName(headerRow.getCell(primText).toString());
        }
        getDocument().getDocumentGraph().addNode(primaryText);

        int offset = currentText.length();

        SToken lastTok = null;

        // start with the second row of the table, since the first row holds
        // the name of each tier
        int currRow = 1;
        while (currRow < corpusSheet.getPhysicalNumberOfRows()) {
            // iterate through all rows of the given corpus sheet

            Row row = corpusSheet.getRow(currRow);
            Cell primCell = row.getCell(primText);
            SToken currTok = null;
            int endCell = currRow;

            String text = null;
            if (primCell != null && !primCell.toString().isEmpty()) {
                text = formatter.formatCellValue(primCell);

            } else if (getProps().getIncludeEmptyPrimCells()) {
                text = "";

            }
            if (text != null) {
                int start = offset;
                int end = start + text.length();
                offset += text.length();
                currentText.append(text);

                currTok = getDocument().getDocumentGraph().createToken(primaryText, start, end);

                if (primCell != null) {
                    endCell = getLastCell(primCell, mergedCells);
                }
            }

            if (currTok != null) {
                if (lastTok != null && getProps().getAddOrderRelation()) {
                    addOrderRelation(lastTok, currTok, headerRow.getCell(primText).toString());
                }
                // add timeline relation
                addTimelineRelation(currTok, currRow, endCell, corpusSheet);

                // remember all SToken
                currentTokList.add(currTok);

                // insert space between tokens
                if (text != null && (currRow != corpusSheet.getLastRowNum())) {
                    currentText.append(" ");
                    offset++;
                }
            }

            if (currTok != null) {
                lastTok = currTok;
            }
            currRow++;
        } // end for each token row
        primaryText.setText(currentText.toString());

        progressProcessedNumberOfColumns++;
        setProgress((double) progressProcessedNumberOfColumns / (double) progressTotalNumberOfColumns);

        if (getProps().getLayer() != null) {
            if (currentTokList != null && layerTierCouples.size() > 0) {
                if (layerTierCouples.get(primaryText.getName()) != null) {
                    SLayer sLayer = layerTierCouples.get(primaryText.getName());
                    getDocument().getDocumentGraph().addLayer(sLayer);
                    for (SToken t : currentTokList) {
                        sLayer.addNode(t);
                    }
                }
            }
        }
    } // end for each primTextPos
    return progressProcessedNumberOfColumns;
}