List of usage examples for org.apache.commons.csv CSVRecord get
public String get(final String name)
From source file:org.transitime.gtfs.readers.GtfsStopTimesReader.java
@Override public GtfsStopTime handleRecord(CSVRecord record, boolean supplemental) throws ParseException { if (GtfsData.tripNotFiltered(record.get("trip_id"))) return new GtfsStopTime(record, supplemental, getFileName()); else// www . j a v a2 s. c om return null; }
From source file:org.transitime.gtfs.readers.GtfsTripsReader.java
@Override public GtfsTrip handleRecord(CSVRecord record, boolean supplemental) throws ParseException { if (GtfsData.tripNotFiltered(record.get("trip_id")) && GtfsData.routeNotFiltered(record.get("route_id"))) return new GtfsTrip(record, supplemental, getFileName()); else//from ww w . j ava2s .co m return null; }
From source file:org.transitime.utils.csv.CsvBase.java
/** * For reading values from a CSVRecord. If the column was not defined then * CSVRecord.get() throws an exception. Therefore for optional CSV columns * better to use this function so don't get exception. This way can continue * processing and all errors for the data will be logged. Better than just * logging first error and then quitting. * <p>/*w w w . j a v a2s . c o m*/ * Also, if the value is empty string then it is converted to null for * consistency. Also trims the resulting string since some agencies leave in * spaces. * * @param record * The data for the row in the CSV file * @param name * The name of the column in the CSV file * @param required * Whether this value is required. If required and the value is * not set then an error is logged and null is returned. * @return The value, or null if it was not defined */ private String getValue(CSVRecord record, String name, boolean required) { // If the column is not defined in the file then return null. // After all, the item is optional so it is fine for it to // not be in the file. if (!record.isSet(name)) { if (required) { logger.error("Column {} not defined in file \"{}\" yet it is required", name, getFileName()); } return null; } // Get the value. First trim whitespace so that // value will be consistent. Sometimes agencies will mistakenly have // some whitespace in the columns. String value = record.get(name).trim(); // Return the value. But if the value is empty string // convert to null for consistency. if (value.isEmpty()) { if (required) { logger.error( "For file \"{}\" line number {} for column {} value was not set " + "yet it is required", getFileName(), lineNumber, name); } return null; } else { // Successfully got value so return intern() version of it. Using // intern() because many strings are duplicates such as headsign // or shape info. By sharing the strings we can save a huge amount // of memory. return value.intern(); } }
From source file:org.wheatinitiative.vitro.webapp.ontology.update.OntologyChangeParser.java
/** * @param args//w w w. ja va2 s . co m * @throws IOException */ @SuppressWarnings({ "unchecked", "null", "static-access" }) public ArrayList<AtomicOntologyChange> parseFile(String diffPath) throws IOException { AtomicOntologyChange changeObj; ArrayList<AtomicOntologyChange> changeObjects = new ArrayList<AtomicOntologyChange>(); int countColumns = 0; String URI = null; String rename = null; String sourceURI = null; String destinationURI = null; StringTokenizer stArr = null; InputStreamReader in = new InputStreamReader(this.getClass().getResourceAsStream(diffPath)); Iterable<CSVRecord> records = CSVFormat.TDF.parse(in); Iterator<CSVRecord> rows = records.iterator(); //CSVReader readFile = new SimpleReader(); //readFile.setSeperator('\t'); //List<String[]> rows = readFile.parse(in); int rowNum = 0; while (rows.hasNext()) { rowNum++; CSVRecord row = rows.next(); if (row.size() != 5) { log.error("Invalid PromptDiff data at row " + (rowNum + 1) + ". Expected 5 columns; found " + row.size()); } else { changeObj = new AtomicOntologyChange(); if (row.get(0) != null && row.get(0).length() > 0) { changeObj.setSourceURI(row.get(0)); } if (row.get(1) != null && row.get(1).length() > 0) { changeObj.setDestinationURI(row.get(1)); } if (row.get(4) != null && row.get(4).length() > 0) { changeObj.setNotes(row.get(4)); } if ("Yes".equals(row.get(2))) { changeObj.setAtomicChangeType(AtomicChangeType.RENAME); } else if ("Delete".equals(row.get(3))) { changeObj.setAtomicChangeType(AtomicChangeType.DELETE); } else if ("Add".equals(row.get(3))) { changeObj.setAtomicChangeType(AtomicChangeType.ADD); } else { log.error("Invalid rename or change type data: '" + row.get(2) + " " + row.get(3) + "'"); } log.debug(changeObj); changeObjects.add(changeObj); } } if (changeObjects.size() == 0) { log.debug("No ABox updates are required."); } return changeObjects; }
From source file:org.wildfly.swarm.proc.FailFastComparator.java
@Override public void compare(List<CSVRecord> previous, List<CSVRecord> current) throws ThresholdExceeded { List<ComparisonResult> comparisonResults = new ArrayList<>(); int maxChars = 0; for (CSVRecord prevRecord : previous) { String fileName = prevRecord.get(CSVCollector.SHORT_FILE_NAME_COLUMN); if (fileName.length() > maxChars) maxChars = fileName.length(); CSVRecord matching = findMatching(fileName, current); if (matching != null) { for (Measure measure : criteria) { double prevVal = Double.valueOf(prevRecord.get(measure.column75Percentile())); double currVal = Double.valueOf(matching.get(measure.column75Percentile())); if (currVal > prevVal) { double increasePercentage = currVal * 100 / prevVal; boolean failed = increasePercentage - threshold > 100; String message = StringUtils.rightPad(measure.getShortName(), 10) + " +" + Math.floor(increasePercentage - 100) + "% (" + prevVal + " -> " + currVal + ")"; comparisonResults.add(new ComparisonResult(measure, fileName, failed, message)); } else { double decreasePercentage = prevVal * 100 / currVal; String message = StringUtils.rightPad(measure.getShortName(), 10) + " -" + Math.floor(decreasePercentage - 100) + "% (" + prevVal + " -> " + currVal + ")"; comparisonResults.add(new ComparisonResult(measure, fileName, message)); }// ww w. ja v a 2 s . c o m } } else { System.err.println("No matching record for test " + fileName + ". Skipping ..."); } } // dump results final int pad = maxChars + 2; Collections.sort(comparisonResults, Comparator.comparing(ComparisonResult::getMeasure)); comparisonResults .forEach(r -> System.out.println(StringUtils.rightPad(r.getFile(), pad) + ": " + r.getMessage())); // decide if ThresholdExceeded List<ComparisonResult> failedTests = comparisonResults.stream().filter(r -> !r.isFailure()) .collect(Collectors.toList()); if (failedTests.size() > 0) { System.err.println("There have been test errors. See previous logs for details ..."); throw new ThresholdExceeded( failedTests.size() + " test(s) did exceed the " + this.threshold + "% tolerance."); } }
From source file:org.wso2.carbon.event.simulator.csvFeedSimulation.core.CSVFeedEventSimulator.java
/** * This method must be called within a synchronized block to avoid multiple file simulators from running simultaneously. * Read the values from uploaded CSV file and convert those values into event and send those events to * input handler/*from w w w . j ava 2 s . c o m*/ * <p> * <p> * To read the CSV file It uses CSV parser Library. * {@link <a href="https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVParser.html">CSVParser</a>} * </p> * <p> * <p> * CSV file can be separated by one of these fallowing character , , ; , \t by default * It has capability to have user defined delimiter * Any field may be quoted (with double quotes) * Fields with embedded commas or delimiter characters must be double quoted. * </p> * <p> * Initialize CSVParser * * @param executionPlanDto ExecutionPlanDto * @param csvFileConfig CSVFileSimulationDto */ private void sendEvent(ExecutionPlanDto executionPlanDto, CSVFileSimulationDto csvFileConfig) { /* return no of events read from CSV file during ever iteration */ long noOfEvents = 0; int delay = csvFileConfig.getDelay(); /* Reader for reading character streams from file */ Reader in = null; /* CSVParser to read CSV Values */ CSVParser csvParser = null; if (delay <= 0) { log.warn("Events will be sent continuously since the delay between events are set to " + delay + "milliseconds"); delay = 0; } try { /* Initialize Reader */ in = new FileReader(String.valueOf(Paths.get(System.getProperty("java.io.tmpdir"), csvFileConfig.getFileDto().getFileInfo().getFileName()))); /* Initialize CSVParser with appropriate CSVFormat according to delimiter */ switch (csvFileConfig.getDelimiter()) { case ",": csvParser = CSVParser.parse(in, CSVFormat.DEFAULT); break; case ";": csvParser = CSVParser.parse(in, CSVFormat.EXCEL); break; case "\\t": csvParser = CSVParser.parse(in, CSVFormat.TDF); break; default: csvParser = CSVParser.parse(in, CSVFormat.newFormat(csvFileConfig.getDelimiter().charAt(0))); } int attributeSize = executionPlanDto.getInputStreamDtoMap().get(csvFileConfig.getStreamName()) .getStreamAttributeDtos().size(); /* Iterate through the CSV file line by line */ for (CSVRecord record : csvParser) { try { synchronized (this) { if (isStopped) { isStopped = false; break; } if (isPaused) { this.wait(); } } if (record.size() != attributeSize) { log.warn("No of attribute is not equal to attribute size: " + attributeSize + " is needed" + "in Row no:" + noOfEvents + 1); } String[] attributes = new String[attributeSize]; noOfEvents = csvParser.getCurrentLineNumber(); for (int i = 0; i < record.size(); i++) { attributes[i] = record.get(i); } //convert Attribute values into event Event event = EventConverter.eventConverter(csvFileConfig.getStreamName(), attributes, executionPlanDto); // TODO: 13/12/16 delete sout System.out.println("Input Event " + Arrays.deepToString(event.getEventData())); // //send the event to input handler send(csvFileConfig.getStreamName(), event); //delay between two events if (delay > 0) { Thread.sleep(delay); } } catch (EventSimulationException e) { log.error("Event dropped due to Error occurred during generating an event" + e.getMessage()); } catch (InterruptedException e) { log.error("Error occurred during send event" + e.getMessage()); } } } catch (IllegalArgumentException e) { // TODO: 02/12/16 proper error message throw new EventSimulationException("File Parameters are null" + e.getMessage()); } catch (FileNotFoundException e) { throw new EventSimulationException( "File not found :" + csvFileConfig.getFileDto().getFileInfo().getFileName()); } catch (IOException e) { throw new EventSimulationException("Error occurred while reading the file"); } finally { try { if (in != null && csvParser != null) in.close(); csvParser.close(); } catch (IOException e) { throw new EventSimulationException("Error occurred during closing the file"); } } }
From source file:org.wso2.carbon.event.simulator.randomdatafeedsimulation.bean.CustomBasedAttribute.java
/** * Method to split the data list into seperated values and assign it to customDataList * * @param customData String that has data list values * Initial string format is ""CEP,Siddhi",ESB,DAS" *//*from w ww.j a v a2s . c o m*/ public void setCustomData(String customData) { CSVParser csvParser = null; List<String> dataList = null; try { csvParser = CSVParser.parse(customData, CSVFormat.newFormat(',').withQuote('/')); dataList = new ArrayList<>(); for (CSVRecord record : csvParser) { for (int i = 0; i < record.size(); i++) { dataList.add(record.get(i)); } } } catch (IOException e) { throw new EventSimulationException("I/O error occurs :" + e.getMessage()); } catch (IllegalArgumentException e) { throw new EventSimulationException("Data set is null :" + e.getMessage()); } customDataList = dataList.toArray(new String[dataList.size()]); }
From source file:org.wso2.carbon.ml.dataset.internal.DatasetSummary.java
/** * Finds the columns with Categorical data and Numerical data. Stores the raw-data in a list. * * @param datasetIterator Iterator for the CSV parser. * @param sampleSize Size of the sample. * @throws DatasetSummaryException *//* ww w. ja v a 2 s . c o m*/ private void identifyColumnDataType(Iterator<CSVRecord> datasetIterator, int sampleSize) { int recordsCount = 0; CSVRecord row; int[] stringCellCount = new int[this.headerMap.size()]; // Count the number of cells contain strings in each column. while (datasetIterator.hasNext() && recordsCount != sampleSize) { row = datasetIterator.next(); for (int currentCol = 0; currentCol < this.headerMap.size(); currentCol++) { if (!row.get(currentCol).isEmpty()) { if (!NumberUtils.isNumber(row.get(currentCol))) { stringCellCount[currentCol]++; } } else { // If the cell is empty, increase the missing value count. this.missing[currentCol]++; } // Append the cell to the respective column. this.columnData.get(currentCol).add(row.get(currentCol)); } recordsCount++; } // If atleast one cell contains strings, then the column is considered to has string data. for (int col = 0; col < headerMap.size(); col++) { if (stringCellCount[col] > 0) { this.stringDataColumnPositions.add(col); this.type[col] = FeatureType.CATEGORICAL; } else { this.numericDataColumnPositions.add(col); this.type[col] = FeatureType.NUMERICAL; } } }
From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java
public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader, String qualifiedName) throws IOException { try {//from w ww . jav a2 s . co m Iterable<CSVRecord> rawRecords = CSVFormat.RFC4180.parse(reader); List<CSVRecord> records = Lists.newArrayList(rawRecords); validateCSVEntries(records); Map<String, Integer> descriptionMap = setupDescMap(records); Map<Integer, LocaleId> localeColMap = setupLocalesMap(records, descriptionMap); LocaleId srcLocale = localeColMap.get(0); if (!srcLang.equals(srcLocale)) { throw new RuntimeException("input source language '" + srcLang + "' does not match source language in file '" + srcLocale + "'"); } Map<LocaleId, List<GlossaryEntry>> results = Maps.newHashMap(); for (int i = 1; i < records.size(); i++) { CSVRecord row = records.get(i); for (int x = 1; x < row.size() && localeColMap.containsKey(x); x++) { GlossaryEntry entry = new GlossaryEntry(); entry.setSrcLang(srcLocale); entry.setPos(row.get(descriptionMap.get(POS))); entry.setDescription(row.get(descriptionMap.get(DESC))); entry.setQualifiedName(new QualifiedName(qualifiedName)); GlossaryTerm srcTerm = new GlossaryTerm(); srcTerm.setLocale(srcLocale); srcTerm.setContent(row.get(0)); entry.getGlossaryTerms().add(srcTerm); LocaleId transLocaleId = localeColMap.get(x); String transContent = row.get(x); GlossaryTerm transTerm = new GlossaryTerm(); transTerm.setLocale(transLocaleId); transTerm.setContent(transContent); entry.getGlossaryTerms().add(transTerm); List<GlossaryEntry> list = results.get(transLocaleId); if (list == null) { list = Lists.newArrayList(); } list.add(entry); results.put(transLocaleId, list); } } return results; } finally { reader.close(); } }
From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java
/** * Parser reads from all from first row and exclude column from description * map. Format of CVS: {source locale},{locale},{locale}...,pos,description *//*www.j a v a 2 s . c o m*/ private Map<Integer, LocaleId> setupLocalesMap(List<CSVRecord> records, Map<String, Integer> descriptionMap) { Map<Integer, LocaleId> localeColMap = new HashMap<Integer, LocaleId>(); CSVRecord headerRow = records.get(0); for (int row = 0; row <= headerRow.size() && !descriptionMap.containsValue(row); row++) { LocaleId locale = new LocaleId(StringUtils.trim(headerRow.get(row))); localeColMap.put(row, locale); } return localeColMap; }