List of usage examples for org.apache.commons.csv CSVFormat TDF
CSVFormat TDF
To view the source code for org.apache.commons.csv CSVFormat TDF.
Click Source Link
From source file:org.phenotips.vocabulary.internal.hpoannotations.NegativePhenotypeForDiseaseAnnotationsExtensionTest.java
@Test public void tdfParserIsUsed() throws Exception { final CSVFormat result = this.extension.setupCSVParser(this.vocabulary); Assert.assertEquals(CSVFormat.TDF, result); }
From source file:org.phenotips.vocabulary.internal.solr.MendelianInheritanceInMan.java
private void parseOmimData(URL sourceUrl) { try {/* ww w. j av a 2 s. c o m*/ Reader in = new InputStreamReader(sourceUrl.openConnection().getInputStream(), Charset.forName(ENCODING)); for (CSVRecord row : CSVFormat.TDF.withCommentMarker('#').parse(in)) { // Ignore moved or removed entries if ("Caret".equals(row.get(0))) { continue; } SolrInputDocument crtTerm = new SolrInputDocument(); // set id addFieldValue(ID_FIELD, row.get(1), crtTerm); // set symbol addFieldValue(SYMBOL_FIELD, SYMBOLS.get(row.get(0)), crtTerm); // set type (multivalued) for (String type : TYPES.get(row.get(0))) { addFieldValue(TYPE_FIELD, type, crtTerm); } // set name String name = StringUtils.substringBefore(row.get(2), TITLE_SEPARATOR).trim(); addFieldValue(NAME_FIELD, name, crtTerm); // set short name String shortNameString = StringUtils.substringAfter(row.get(2), TITLE_SEPARATOR).trim(); String[] shortNames = StringUtils.split(shortNameString, TITLE_SEPARATOR); for (String shortName : shortNames) { addFieldValue(SHORT_NAME_FIELD, shortName.trim(), crtTerm); } // set synonyms setListFieldValue(SYNONYM_FIELD, row.get(3), crtTerm); // set included name setListFieldValue(INCLUDED_NAME_FIELD, row.get(4), crtTerm); this.data.put(String.valueOf(crtTerm.get(ID_FIELD).getFirstValue()), crtTerm); } } catch (IOException ex) { this.logger.warn("Failed to read/parse the OMIM source: {}", ex.getMessage()); } }
From source file:org.phenotips.vocabulary.internal.solr.MendelianInheritanceInMan.java
private void loadGenes() { try (BufferedReader in = new BufferedReader( new InputStreamReader(new URL(GENE_ANNOTATIONS_URL).openConnection().getInputStream(), ENCODING))) { for (CSVRecord row : CSVFormat.TDF.withCommentMarker('#').parse(in)) { SolrInputDocument term = this.data.get(row.get(0).trim()); if (term != null) { String gs = row.get(3).trim(); if (StringUtils.isNotBlank(gs)) { term.addField(GENE_FIELD, gs); }//from www. ja va2 s . c om String eidLine = row.get(4).trim(); if (StringUtils.isNotBlank(eidLine)) { String[] eids = StringUtils.split(eidLine, ","); for (String eid : eids) { term.addField(GENE_FIELD, eid.trim()); } } } } } catch (IOException ex) { this.logger.error("Failed to load OMIM-Gene links: {}", ex.getMessage(), ex); } }
From source file:org.phenotips.vocabulary.internal.solr.MendelianInheritanceInMan.java
private void loadGeneReviews() { try (BufferedReader in = new BufferedReader(new InputStreamReader( new URL(GENEREVIEWS_MAPPING_URL).openConnection().getInputStream(), ENCODING))) { for (CSVRecord row : CSVFormat.TDF.withHeader().parse(in)) { SolrInputDocument term = this.data.get(row.get(2)); if (term != null) { term.setField("gene_reviews_link", "https://www.ncbi.nlm.nih.gov/books/" + row.get(0)); }/*from ww w .j ava2 s . c o m*/ } } catch (IOException ex) { this.logger.error("Failed to load OMIM-GeneReviews links: {}", ex.getMessage(), ex); } }
From source file:org.phenotips.vocabulary.internal.solr.OmimSourceParser.java
private void loadSymptoms(boolean positive) { String omimId = ""; String previousOmimId = null; Set<String> ancestors = new HashSet<>(); try (BufferedReader in = new BufferedReader( new InputStreamReader(new URL(positive ? POSITIVE_ANNOTATIONS_URL : NEGATIVE_ANNOTATIONS_URL) .openConnection().getInputStream(), ENCODING))) { for (CSVRecord row : CSVFormat.TDF.parse(in)) { if ("OMIM".equals(row.get(0))) { omimId = row.get(1);/* ww w. ja va 2 s . c o m*/ addAncestors(previousOmimId, omimId, ancestors, positive); previousOmimId = omimId; SolrInputDocument term = this.data.get(omimId); if (term != null) { term.addField(positive ? "actual_symptom" : "actual_not_symptom", row.get(4)); } VocabularyTerm vterm = this.hpo.getTerm(row.get(4)); if (vterm != null) { for (VocabularyTerm ancestor : vterm.getAncestorsAndSelf()) { ancestors.add(ancestor.getId()); } } } } addAncestors(omimId, null, ancestors, positive); } catch (IOException ex) { this.logger.error("Failed to load OMIM-HPO links: {}", ex.getMessage(), ex); } }
From source file:org.wheatinitiative.vitro.webapp.ontology.update.OntologyChangeParser.java
/** * @param args//ww w. ja v a 2 s . c om * @throws IOException */ @SuppressWarnings({ "unchecked", "null", "static-access" }) public ArrayList<AtomicOntologyChange> parseFile(String diffPath) throws IOException { AtomicOntologyChange changeObj; ArrayList<AtomicOntologyChange> changeObjects = new ArrayList<AtomicOntologyChange>(); int countColumns = 0; String URI = null; String rename = null; String sourceURI = null; String destinationURI = null; StringTokenizer stArr = null; InputStreamReader in = new InputStreamReader(this.getClass().getResourceAsStream(diffPath)); Iterable<CSVRecord> records = CSVFormat.TDF.parse(in); Iterator<CSVRecord> rows = records.iterator(); //CSVReader readFile = new SimpleReader(); //readFile.setSeperator('\t'); //List<String[]> rows = readFile.parse(in); int rowNum = 0; while (rows.hasNext()) { rowNum++; CSVRecord row = rows.next(); if (row.size() != 5) { log.error("Invalid PromptDiff data at row " + (rowNum + 1) + ". Expected 5 columns; found " + row.size()); } else { changeObj = new AtomicOntologyChange(); if (row.get(0) != null && row.get(0).length() > 0) { changeObj.setSourceURI(row.get(0)); } if (row.get(1) != null && row.get(1).length() > 0) { changeObj.setDestinationURI(row.get(1)); } if (row.get(4) != null && row.get(4).length() > 0) { changeObj.setNotes(row.get(4)); } if ("Yes".equals(row.get(2))) { changeObj.setAtomicChangeType(AtomicChangeType.RENAME); } else if ("Delete".equals(row.get(3))) { changeObj.setAtomicChangeType(AtomicChangeType.DELETE); } else if ("Add".equals(row.get(3))) { changeObj.setAtomicChangeType(AtomicChangeType.ADD); } else { log.error("Invalid rename or change type data: '" + row.get(2) + " " + row.get(3) + "'"); } log.debug(changeObj); changeObjects.add(changeObj); } } if (changeObjects.size() == 0) { log.debug("No ABox updates are required."); } return changeObjects; }
From source file:org.wso2.carbon.event.simulator.core.internal.generator.csv.util.CSVReader.java
/** * parseFile() method is used to parse the CSV file using the delimiter specified in CSV simulation Configuration * * @param delimiter delimiter to be used when parsing CSV file * @throws IOException if an error occurs when creating a CSVReader *///from w ww.j a v a2 s.com private CSVParser parseFile(String delimiter) throws IOException { switch (delimiter) { case ",": return new CSVParser(fileReader, CSVFormat.DEFAULT); case ";": return new CSVParser(fileReader, CSVFormat.EXCEL); case "\\t": return new CSVParser(fileReader, CSVFormat.TDF); default: return new CSVParser(fileReader, CSVFormat.newFormat(delimiter.charAt(0))); } }
From source file:org.wso2.carbon.event.simulator.csvFeedSimulation.core.CSVFeedEventSimulator.java
/** * This method must be called within a synchronized block to avoid multiple file simulators from running simultaneously. * Read the values from uploaded CSV file and convert those values into event and send those events to * input handler/*from w w w . j a v a2 s . c om*/ * <p> * <p> * To read the CSV file It uses CSV parser Library. * {@link <a href="https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVParser.html">CSVParser</a>} * </p> * <p> * <p> * CSV file can be separated by one of these fallowing character , , ; , \t by default * It has capability to have user defined delimiter * Any field may be quoted (with double quotes) * Fields with embedded commas or delimiter characters must be double quoted. * </p> * <p> * Initialize CSVParser * * @param executionPlanDto ExecutionPlanDto * @param csvFileConfig CSVFileSimulationDto */ private void sendEvent(ExecutionPlanDto executionPlanDto, CSVFileSimulationDto csvFileConfig) { /* return no of events read from CSV file during ever iteration */ long noOfEvents = 0; int delay = csvFileConfig.getDelay(); /* Reader for reading character streams from file */ Reader in = null; /* CSVParser to read CSV Values */ CSVParser csvParser = null; if (delay <= 0) { log.warn("Events will be sent continuously since the delay between events are set to " + delay + "milliseconds"); delay = 0; } try { /* Initialize Reader */ in = new FileReader(String.valueOf(Paths.get(System.getProperty("java.io.tmpdir"), csvFileConfig.getFileDto().getFileInfo().getFileName()))); /* Initialize CSVParser with appropriate CSVFormat according to delimiter */ switch (csvFileConfig.getDelimiter()) { case ",": csvParser = CSVParser.parse(in, CSVFormat.DEFAULT); break; case ";": csvParser = CSVParser.parse(in, CSVFormat.EXCEL); break; case "\\t": csvParser = CSVParser.parse(in, CSVFormat.TDF); break; default: csvParser = CSVParser.parse(in, CSVFormat.newFormat(csvFileConfig.getDelimiter().charAt(0))); } int attributeSize = executionPlanDto.getInputStreamDtoMap().get(csvFileConfig.getStreamName()) .getStreamAttributeDtos().size(); /* Iterate through the CSV file line by line */ for (CSVRecord record : csvParser) { try { synchronized (this) { if (isStopped) { isStopped = false; break; } if (isPaused) { this.wait(); } } if (record.size() != attributeSize) { log.warn("No of attribute is not equal to attribute size: " + attributeSize + " is needed" + "in Row no:" + noOfEvents + 1); } String[] attributes = new String[attributeSize]; noOfEvents = csvParser.getCurrentLineNumber(); for (int i = 0; i < record.size(); i++) { attributes[i] = record.get(i); } //convert Attribute values into event Event event = EventConverter.eventConverter(csvFileConfig.getStreamName(), attributes, executionPlanDto); // TODO: 13/12/16 delete sout System.out.println("Input Event " + Arrays.deepToString(event.getEventData())); // //send the event to input handler send(csvFileConfig.getStreamName(), event); //delay between two events if (delay > 0) { Thread.sleep(delay); } } catch (EventSimulationException e) { log.error("Event dropped due to Error occurred during generating an event" + e.getMessage()); } catch (InterruptedException e) { log.error("Error occurred during send event" + e.getMessage()); } } } catch (IllegalArgumentException e) { // TODO: 02/12/16 proper error message throw new EventSimulationException("File Parameters are null" + e.getMessage()); } catch (FileNotFoundException e) { throw new EventSimulationException( "File not found :" + csvFileConfig.getFileDto().getFileInfo().getFileName()); } catch (IOException e) { throw new EventSimulationException("Error occurred while reading the file"); } finally { try { if (in != null && csvParser != null) in.close(); csvParser.close(); } catch (IOException e) { throw new EventSimulationException("Error occurred during closing the file"); } } }
From source file:trainer.userinput.TrainingFileDB.java
public static CSVFormat getCSVFormat() { // Create the CSVFormat object with "\n" as a record delimiter CSVFormat csvFileFormat = CSVFormat.TDF.withRecordSeparator(NEW_LINE_SEPARATOR); csvFileFormat = csvFileFormat.withEscape('^'); csvFileFormat = csvFileFormat.withQuoteMode(QuoteMode.NONE); return csvFileFormat; }
From source file:us.parr.animl.data.DataTable.java
public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride, String[] colNamesOverride, boolean hasHeaderRow) { try {//w w w .j a va 2 s. c o m // use apache commons io + csv to load but convert to list of String[] // byte-order markers are handled if present at start of file. FileInputStream fis = new FileInputStream(fileName); final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8"); CSVFormat format; if (formatType == null) { format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180; } else { switch (formatType.toLowerCase()) { case "tsv": format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF; break; case "mysql": format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL; break; case "excel": format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL; break; case "rfc4180": default: format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180; break; } } final CSVParser parser = new CSVParser(reader, format); List<String[]> rows = new ArrayList<>(); int numHeaderNames = parser.getHeaderMap().size(); try { for (final CSVRecord record : parser) { String[] row = new String[record.size()]; for (int j = 0; j < record.size(); j++) { row[j] = record.get(j); } rows.add(row); } } finally { parser.close(); reader.close(); } VariableType[] actualTypes = computeColTypes(rows, numHeaderNames); Set<String> colNameSet = parser.getHeaderMap().keySet(); String[] colNames = colNameSet.toArray(new String[colNameSet.size()]); if (colNamesOverride != null) { colNames = colNamesOverride; } if (colTypesOverride != null) { actualTypes = colTypesOverride; } return fromStrings(rows, actualTypes, colNames, false); } catch (Exception e) { throw new IllegalArgumentException("Can't open and/or read " + fileName, e); } }