List of usage examples for org.apache.poi.ss.usermodel DataFormatter DataFormatter
public DataFormatter()
From source file:net.sf.dvstar.swirl.desktopdbf.data.ExcelTableModel.java
License:Open Source License
public ExcelTableModel(Workbook workbook) { this.workbook = workbook; this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator(); this.formatter = new DataFormatter(); convertToCSV();//from ww w.ja v a2s . co m }
From source file:net.sf.dvstar.swirl.desktopdbf.data.XLSPanelLoader.java
License:Open Source License
/** * Open an Excel workbook ready for conversion. * * @param file An instance of the File class that encapsulates a handle * to a valid Excel workbook. Note that the workbook can be in * either binary (.xls) or SpreadsheetML (.xlsx) format. * @throws java.io.FileNotFoundException Thrown if the file cannot be located. * @throws java.io.IOException Thrown if a problem occurs in the file system. * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown * if invalid xml is found whilst parsing an input SpreadsheetML * file.// ww w . j a v a 2 s . c om */ private void openWorkbook(File file) throws FileNotFoundException, IOException, InvalidFormatException { FileInputStream fis = null; try { System.out.println("Opening workbook [" + file.getName() + "]"); fis = new FileInputStream(file); // Open the workbook and then create the FormulaEvaluator and // DataFormatter instances that will be needed to, respectively, // force evaluation of forumlae found in cells and create a // formatted String encapsulating the cells contents. this.workbook = WorkbookFactory.create(fis); this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator(); this.formatter = new DataFormatter(); } finally { if (fis != null) { fis.close(); } } }
From source file:net.sf.taverna.t2.activities.spreadsheet.ExcelSpreadsheetReader.java
License:Open Source License
public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows, SpreadsheetRowProcessor rowProcessor) throws SpreadsheetReadException { Workbook workbook;/*ww w.j a v a 2 s.c om*/ try { workbook = WorkbookFactory.create(inputStream); } catch (InvalidFormatException e) { throw new SpreadsheetReadException("The file does not have a compatible spreadsheet format", e); } catch (IOException e) { throw new SpreadsheetReadException("The spreadsheet stream could not be read", e); } catch (IllegalArgumentException e) { throw new SpreadsheetReadException("The spreadsheet stream could not be read", e); } DataFormatter dataFormatter = new DataFormatter(); workbook.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK); Sheet sheet = workbook.getSheetAt(0); if (rowRange.getEnd() < 0) { rowRange.setEnd(sheet.getLastRowNum()); logger.debug("No end of row range specified, setting to " + rowRange.getEnd()); } SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>(); for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) { boolean blankRow = true; if (rowRange.contains(rowIndex)) { Row row = sheet.getRow(rowIndex); for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) { if (columnRange.contains(columnIndex)) { String value = null; if (row != null) { Cell cell = row.getCell(columnIndex); if (cell != null) { value = getCellValue(cell, dataFormatter); } } if (value != null) { blankRow = false; } currentDataRow.put(columnIndex, value); if (columnIndex == columnRange.getEnd()) { if (!ignoreBlankRows || !blankRow) { rowProcessor.processRow(rowIndex, currentDataRow); } currentDataRow = new TreeMap<Integer, String>(); } } } } } }
From source file:nl.architolk.ldt.processors.ExcelConverter.java
License:Open Source License
public void generateData(PipelineContext context, ContentHandler contentHandler) throws SAXException { try {//ww w . j a v a 2 s . c o m // Read binary content of Excel file ByteArrayOutputStream os = new ByteArrayOutputStream(); Base64XMLReceiver base64ContentHandler = new Base64XMLReceiver(os); readInputAsSAX(context, INPUT_DATA, base64ContentHandler); final byte[] fileContent = os.toByteArray(); final java.io.ByteArrayInputStream bais = new ByteArrayInputStream(fileContent); // Create workbook XSSFWorkbook workbook = new XSSFWorkbook(bais); DataFormatter formatter = new DataFormatter(); XSSFFormulaEvaluator evaluator = new XSSFFormulaEvaluator(workbook); contentHandler.startDocument(); contentHandler.startElement("", "workbook", "workbook", new AttributesImpl()); for (int s = 0; s < workbook.getNumberOfSheets(); s++) { XSSFSheet sheet = workbook.getSheetAt(s); AttributesImpl sheetAttr = new AttributesImpl(); sheetAttr.addAttribute("", "name", "name", "CDATA", sheet.getSheetName()); contentHandler.startElement("", "sheet", "sheet", sheetAttr); for (int r = 0; r <= sheet.getLastRowNum(); r++) { XSSFRow row = sheet.getRow(r); if (row != null) { AttributesImpl rowAttr = new AttributesImpl(); rowAttr.addAttribute("", "id", "id", "CDATA", Integer.toString(r)); contentHandler.startElement("", "row", "row", rowAttr); for (int c = 0; c < row.getLastCellNum(); c++) { XSSFCell cell = row.getCell(c); if (cell != null) { try { String cellvalue = formatter.formatCellValue(cell, evaluator); if (cellvalue != "") { AttributesImpl columnAttr = new AttributesImpl(); columnAttr.addAttribute("", "id", "id", "CDATA", Integer.toString(cell.getColumnIndex())); contentHandler.startElement("", "column", "column", columnAttr); contentHandler.characters(cellvalue.toCharArray(), 0, cellvalue.length()); contentHandler.endElement("", "column", "column"); } } catch (Exception e) { } } } contentHandler.endElement("", "row", "row"); } } contentHandler.endElement("", "sheet", "sheet"); } contentHandler.endElement("", "workbook", "workbook"); contentHandler.endDocument(); } catch (IOException e) { throw new OXFException(e); } }
From source file:org.alfresco.repo.web.scripts.person.UserCSVUploadPost.java
License:Open Source License
private void processSpreadsheetUpload(Workbook wb, List<Map<QName, String>> users) throws IOException { if (wb.getNumberOfSheets() > 1) { logger.info("Uploaded Excel file has " + wb.getNumberOfSheets() + " sheets, ignoring all except the first one"); }/*from w w w. j a v a2s . c o m*/ int firstRow = 0; Sheet s = wb.getSheetAt(0); DataFormatter df = new DataFormatter(); String[][] data = new String[s.getLastRowNum() + 1][]; // If there is a heading freezepane row, skip it PaneInformation pane = s.getPaneInformation(); if (pane != null && pane.isFreezePane() && pane.getHorizontalSplitTopRow() > 0) { firstRow = pane.getHorizontalSplitTopRow(); logger.debug("Skipping excel freeze header of " + firstRow + " rows"); } // Process each row in turn, getting columns up to our limit for (int row = firstRow; row <= s.getLastRowNum(); row++) { Row r = s.getRow(row); if (r != null) { String[] d = new String[COLUMNS.length]; for (int cn = 0; cn < COLUMNS.length; cn++) { Cell cell = r.getCell(cn); if (cell != null && cell.getCellType() != Cell.CELL_TYPE_BLANK) { d[cn] = df.formatCellValue(cell); } } data[row] = d; } } // Handle the contents processSpreadsheetUpload(data, users); }
From source file:org.apache.metamodel.excel.XlsxSheetToRowsHandler.java
License:Apache License
private DataFormatter getDataFormatter() { return new DataFormatter(); }
From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java
License:Apache License
/** * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile. * * @param session//from www.j a v a 2 s . co m * The NiFi ProcessSession instance for the current invocation. */ private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat) throws IOException { FlowFile ff = session.create(); try { final DataFormatter formatter = new DataFormatter(); final InputSource sheetSource = new InputSource(sheetInputStream); final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat); final XMLReader parser = SAXHelper.newXMLReader(); //If Value Formatting is set to false then don't pass in the styles table. // This will cause the XSSF Handler to return the raw value instead of the formatted one. final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null; final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null, readConfig.getSharedStringsTable(), sheetHandler, formatter, false); parser.setContentHandler(handler); ff = session.write(ff, new OutputStreamCallback() { @Override public void process(OutputStream out) throws IOException { PrintStream outPrint = new PrintStream(out); sheetHandler.setOutput(outPrint); try { parser.parse(sheetSource); sheetInputStream.close(); sheetHandler.close(); outPrint.close(); } catch (SAXException se) { getLogger().error("Error occurred while processing Excel sheet {}", new Object[] { readConfig.getSheetName() }, se); } } }); ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName()); ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString()); if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) { ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key())); } else { ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME); } //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()), ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName())); ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); session.transfer(ff, SUCCESS); } catch (SAXException | ParserConfigurationException saxE) { getLogger().error("Failed to create instance of Parser.", saxE); ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage()); session.transfer(ff, FAILURE); } finally { sheetInputStream.close(); } }
From source file:org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
public XSSFExcelExtractorDecorator(ParseContext context, XSSFEventBasedExcelExtractor extractor, Locale locale) {//from w ww. j a va 2 s. c o m super(context, extractor); this.extractor = extractor; extractor.setFormulasNotResults(false); extractor.setLocale(locale); if (locale == null) { formatter = new DataFormatter(); } else { formatter = new DataFormatter(locale); } }
From source file:org.corpus_tools.peppermodules.spreadsheet.Spreadsheet2SaltMapper.java
License:Apache License
/** * Add annotations to the salt graph/*from ww w .j a v a 2 s . c o m*/ * * @param annoPrimRelations * @param corpusSheet * @param mergedCells * @param layerTierCouples * @param progressProcessedNumberOfColumns */ private void setAnnotations(HashMap<Integer, Integer> annoPrimRelations, Sheet corpusSheet, Table<Integer, Integer, CellRangeAddress> mergedCells, Map<String, SLayer> layerTierCouples, int progressProcessedNumberOfColumns) { if (!annoPrimRelations.isEmpty()) { Row headerRow = corpusSheet.getRow(0); DataFormatter formatter = new DataFormatter(); int progressTotalNumberOfColumns = annoPrimRelations.keySet().size(); for (int annoTier : annoPrimRelations.keySet()) { SSpan annoSpan = null; int currAnno = 1; while (currAnno < corpusSheet.getPhysicalNumberOfRows()) { String annoName = headerRow.getCell(annoTier).toString(); Row row = corpusSheet.getRow(currAnno); Cell annoCell = row.getCell(annoTier); if (annoCell != null && !annoCell.toString().isEmpty()) { String annoText = ""; annoText = formatter.formatCellValue(annoCell); int annoStart = currAnno - 1; int annoEnd = getLastCell(annoCell, mergedCells); DataSourceSequence<Integer> sequence = new DataSourceSequence<Integer>(); sequence.setStart(annoStart); sequence.setEnd(annoEnd); sequence.setDataSource(getDocument().getDocumentGraph().getTimeline()); List<SToken> sTokens = getDocument().getDocumentGraph().getTokensBySequence(sequence); List<SToken> tokenOfSpan = new ArrayList<>(); if (sTokens == null) { SpreadsheetImporter.logger.error("Segmentation error: The segmentation of the tier \"" + headerRow.getCell(annoTier).toString() + "\" in the document: \"" + getResourceURI().lastSegment() + "\" in line: " + currAnno + " does not match to its primary text: \"" + headerRow.getCell(annoPrimRelations.get(annoTier)).toString() + "\"."); } else { for (SToken tok : sTokens) { STextualDS textualDS = getTextualDSForNode(tok, getDocument().getDocumentGraph()); if (textualDS.getName() .equals(headerRow.getCell(annoPrimRelations.get(annoTier)).toString())) { tokenOfSpan.add(tok); } } } annoSpan = getDocument().getDocumentGraph().createSpan(tokenOfSpan); if (annoSpan != null && annoName != null && !annoName.isEmpty()) { // remove primary text info of annotation if given if (annoName.matches(".+\\[.+\\]")) { annoName = annoName.split("\\[")[0]; } annoSpan.createAnnotation(null, annoName, annoText); annoSpan.setName(annoName); } } if (getProps().getLayer() != null && annoSpan != null) { if (layerTierCouples.size() > 0) { if (layerTierCouples.get(annoName) != null) { SLayer sLayer = layerTierCouples.get(annoName); getDocument().getDocumentGraph().addLayer(sLayer); sLayer.addNode(annoSpan); } } } currAnno++; } // end for each row of annotation progressProcessedNumberOfColumns++; setProgress((double) progressProcessedNumberOfColumns / (double) progressTotalNumberOfColumns); } // end for each annotation layer } else { SpreadsheetImporter.logger.warn("No annotations except for primary texts found in document \"" + getResourceURI().lastSegment() + "\"."); } }
From source file:org.corpus_tools.peppermodules.spreadsheet.Spreadsheet2SaltMapper.java
License:Apache License
/** * Set the primary text of each document * /*from w w w. j a v a 2 s. c om*/ * @param column * @param primText * @param corpusSheet * @param primTextPos * @param annoPrimRelations * @param mergedCells * @param layerTierCouples * @return */ private int setPrimText(Sheet corpusSheet, List<Integer> primTextPos, HashMap<Integer, Integer> annoPrimRelations, Row headerRow, Table<Integer, Integer, CellRangeAddress> mergedCells, Map<String, SLayer> layerTierCouples) { // initialize with number of token we have to create int progressTotalNumberOfColumns = primTextPos.size(); // add each annotation to this number int progressProcessedNumberOfColumns = 0; // use formater to ensure that e.g. integers will not be converted into // decimals DataFormatter formatter = new DataFormatter(); // save all tokens of the current primary text List<SToken> currentTokList = new ArrayList<>(); // save all tokens of the current primary text for (int primText : primTextPos) { // initialize primaryText STextualDS primaryText = SaltFactory.createSTextualDS(); StringBuilder currentText = new StringBuilder(); if (headerRow.getCell(primText) != null) { primaryText.setName(headerRow.getCell(primText).toString()); } getDocument().getDocumentGraph().addNode(primaryText); int offset = currentText.length(); SToken lastTok = null; // start with the second row of the table, since the first row holds // the name of each tier int currRow = 1; while (currRow < corpusSheet.getPhysicalNumberOfRows()) { // iterate through all rows of the given corpus sheet Row row = corpusSheet.getRow(currRow); Cell primCell = row.getCell(primText); SToken currTok = null; int endCell = currRow; String text = null; if (primCell != null && !primCell.toString().isEmpty()) { text = formatter.formatCellValue(primCell); } else if (getProps().getIncludeEmptyPrimCells()) { text = ""; } if (text != null) { int start = offset; int end = start + text.length(); offset += text.length(); currentText.append(text); currTok = getDocument().getDocumentGraph().createToken(primaryText, start, end); if (primCell != null) { endCell = getLastCell(primCell, mergedCells); } } if (currTok != null) { if (lastTok != null && getProps().getAddOrderRelation()) { addOrderRelation(lastTok, currTok, headerRow.getCell(primText).toString()); } // add timeline relation addTimelineRelation(currTok, currRow, endCell, corpusSheet); // remember all SToken currentTokList.add(currTok); // insert space between tokens if (text != null && (currRow != corpusSheet.getLastRowNum())) { currentText.append(" "); offset++; } } if (currTok != null) { lastTok = currTok; } currRow++; } // end for each token row primaryText.setText(currentText.toString()); progressProcessedNumberOfColumns++; setProgress((double) progressProcessedNumberOfColumns / (double) progressTotalNumberOfColumns); if (getProps().getLayer() != null) { if (currentTokList != null && layerTierCouples.size() > 0) { if (layerTierCouples.get(primaryText.getName()) != null) { SLayer sLayer = layerTierCouples.get(primaryText.getName()); getDocument().getDocumentGraph().addLayer(sLayer); for (SToken t : currentTokList) { sLayer.addNode(t); } } } } } // end for each primTextPos return progressProcessedNumberOfColumns; }