List of usage examples for org.apache.commons.csv CSVFormat RFC4180
CSVFormat RFC4180
To view the source code for org.apache.commons.csv CSVFormat RFC4180.
Click Source Link
From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaValidatorTest.java
@Test public void sniffBadHeader2() throws Exception { CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6")); EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class))).andReturn(-1); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(0); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(0); PowerMock.replayAll();//from w ww. j a v a 2 s . c o m CSDValidationResult<CSDFieldPattern> r = this.validator.validate(this.s, p); Assert.assertEquals(0, r.errorCount()); PowerMock.verifyAll(); }
From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSnifferTest.java
@Test public void sniffGoodHeader() throws Exception { CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6")); EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp), EasyMock.isA(CSVRecord.class))).andReturn(0); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp), EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(0); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp), EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(0); EasyMock.expect(this.sp.newSchema(EasyMock.eq(this.fy), EasyMock.isA(CSVRecord.class))).andReturn(this.s); PowerMock.replayAll();// w w w . j a v a2s. c o m CSDSchema<CSDFieldPattern> s2 = this.sniffer.sniff(this.sp, p, 10); Assert.assertEquals(s, s2); PowerMock.verifyAll(); }
From source file:com.xceptance.xlt.common.tests.AbstractURLTestCase.java
/** * Loading of the data. There is a state variable used to indicate that we already did that. * //from ww w . java2 s . c om * @throws IOException */ @Before public void loadData() throws IOException { login = getProperty("login", getProperty("com.xceptance.xlt.auth.userName")); password = getProperty("password", getProperty("com.xceptance.xlt.auth.password")); // load the data. Ideally we would offload the file searching to // XltProperties.getDataFile(String name) // or XltProperties.getDataFile(String name, String locale) // or XltProperties.getDataFile(String name, Locale locale) final String dataDirectory = XltProperties.getInstance().getProperty( XltConstants.XLT_PACKAGE_PATH + ".data.directory", "config" + File.separatorChar + "data"); final File file = new File(dataDirectory, getProperty("filename", Session.getCurrent().getUserName() + ".csv")); BufferedReader br = null; boolean incorrectLines = false; try { br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); // permit # as comment, empty lines, set comma as separator, and activate the header final CSVFormat csvFormat = CSVFormat.RFC4180.toBuilder().withIgnoreEmptyLines(true) .withCommentStart('#').withHeader().withIgnoreSurroundingSpaces(true).build(); final CSVParser parser = new CSVParser(br, csvFormat); final Iterator<CSVRecord> csvRecords = parser.iterator(); // verify header fields to avoid problems with incorrect spelling or spaces final Map<String, Integer> headerMap = parser.getHeaderMap(); for (final String headerField : headerMap.keySet()) { if (!CSVBasedURLAction.isPermittedHeaderField(headerField)) { Assert.fail(MessageFormat.format("Unsupported or misspelled header field: {0}", headerField)); } } // go over all lines, this is a little odd, because we have to catch the iterator exception while (true) { try { final boolean hasNext = csvRecords.hasNext(); if (!hasNext) { break; } } catch (final Exception e) { // the plus 1 is meant to correct the increment missing because of the exception throw new RuntimeException( MessageFormat.format("Line at {0} is invalid, because of <{1}>. Line is ignored.", parser.getLineNumber() + 1, e.getMessage())); } final CSVRecord csvRecord = csvRecords.next(); // only take ok lines if (csvRecord.isConsistent()) { // guard against data exceptions try { // do we have an url? if (csvRecord.get(CSVBasedURLAction.URL) != null) { // take it csvBasedActions.add(new CSVBasedURLAction(csvRecord, interpreter)); } else { XltLogger.runTimeLogger.error(MessageFormat.format( "Line at {0} does not contain any URL. Line is ignored: {1}", parser.getLineNumber(), csvRecord)); } } catch (final Exception e) { throw new RuntimeException(MessageFormat.format( "Line at {0} is invalid, because of <{2}>. Line is ignored: {1}", parser.getLineNumber(), csvRecord, e.getMessage())); } } else { XltLogger.runTimeLogger.error(MessageFormat.format( "Line at {0} has not been correctly formatted. Line is ignored: {1}", parser.getLineNumber(), csvRecord)); incorrectLines = true; } } } finally { IOUtils.closeQuietly(br); } // stop if we have anything the is incorrect, avoid half running test cases if (incorrectLines) { throw new RuntimeException("Found incorrectly formatted lines. Stopping here."); } }
From source file:com.mahisoft.elasticsearchprediction.engine.ElasticsearchGenericIndexEngine.java
private void loadData(File dataFile, Client client, String indexName, String mappingFilename) throws IOException { CSVParser parser = null;//ww w . ja v a2 s . co m PrintWriter mappingFileWriter = null; List<String> headers = new ArrayList<String>(); try { mappingFileWriter = new PrintWriter(mappingFilename, Constants.UTF8); parser = CSVParser.parse(dataFile, Charset.forName(Constants.UTF8), CSVFormat.RFC4180); for (CSVRecord csvRecord : parser) { if (csvRecord.getRecordNumber() == 1) { addHeaders(csvRecord, headers); continue; } if (csvRecord.getRecordNumber() == 2) { createIndex(client, indexName, mappingFileWriter, headers, csvRecord); } addValue(client, indexName, headers, csvRecord); } } finally { if (mappingFileWriter != null) mappingFileWriter.close(); if (parser != null) parser.close(); } LOGGER.info("Done!"); }
From source file:com.google.cloud.genomics.dockerflow.args.ArgsTableBuilder.java
/** * Load the workflow arguments from a CSV file. The header of the CSV contains the input or output * parameter names. Each row contains the workflow args for a single run. To run 100 instances of * a workflow concurrently, create a CSV with a header row plus 100 rows for each set of * parameters./*from w w w. j a v a 2 s . co m*/ * * <p>Columns by default are input parameters, passed as environment variables to the Docker * script. For file parameters, you can prefix the column header with "<" for input or ">" for * output. For clarity, you can also prefix the regular input parameters as "<", if you like. * * <p>The column header can also be "logging", which is a reserved name for the logging path. * * @param csvFile CSV file (RFC4180) that's local or in GCS * @return a map with the key being the clientId * @throws IOException */ static Map<String, WorkflowArgs> loadCsv(String csvFile) throws IOException { Map<String, WorkflowArgs> retval = new HashMap<String, WorkflowArgs>(); String csv = FileUtils.readAll(csvFile); CSVParser parser = CSVParser.parse(csv, CSVFormat.RFC4180); // Parse header List<String> header = null; int row = 0; // Parse by row for (CSVRecord csvRecord : parser) { ArgsBuilder args = ArgsBuilder.of(String.valueOf(row)); LOG.debug(StringUtils.toJson(csvRecord)); // Parse header the first time if (row == 0) { header = new ArrayList<String>(); for (String col : csvRecord) { header.add(col); } } else { // Set parameter defined in each column for (int col = 0; col < header.size(); ++col) { String name = header.get(col); String val = csvRecord.get(col); if (name.startsWith(PREFIX_INPUT)) { name = name.replace(PREFIX_INPUT, ""); args.input(name, val); } else if (name.startsWith(PREFIX_OUTPUT)) { name = name.replace(PREFIX_OUTPUT, ""); args.output(name, val); } else if (LOGGING.equals(name)) { args.logging(val); } else { args.input(name, val); } } WorkflowArgs a = args.build(); a.setRunIndex(row); retval.put(a.getClientId(), a); } ++row; } return retval; }
From source file:com.github.jferard.pgloaderutils.sniffer.csv.HeaderRowAnalyzer.java
public CSVFormat analyze(List<String> expectedHeaderStart, String firstReadLine) throws IOException { if (expectedHeaderStart.size() < 2) throw new IllegalArgumentException(); List<String> expectedFields = new ArrayList<String>(expectedHeaderStart.size()); for (String field : expectedHeaderStart) expectedFields.add(StringUtils.normalize(field)); String line = StringUtils.normalize(firstReadLine); int curFieldStartIndex = 0; Iterator<String> iterator = expectedFields.iterator(); assert iterator.hasNext(); String curExpectedField = iterator.next(); char firstCharOfCurExpectedField = curExpectedField.charAt(0); int curFieldFirstLetterIndex = line.indexOf(firstCharOfCurExpectedField, curFieldStartIndex); if (curFieldFirstLetterIndex == -1) throw new IOException("Can't find first letter:" + curExpectedField + " (" + line + ")"); while (iterator.hasNext()) { // get the index of the first char after cur field, ie the first // char of the delimiter block int curFieldDelimiterBlockIndex = this.getFieldDelimiterIndex(curExpectedField, line, curFieldStartIndex, curFieldFirstLetterIndex); String nextExpectedField = iterator.next(); char firstCharOfNextExpectedField = nextExpectedField.charAt(0); // get the index of the first char of the next field int nextFieldFirstLetterIndex = line.indexOf(firstCharOfNextExpectedField, curFieldDelimiterBlockIndex); if (nextFieldFirstLetterIndex == -1) throw new IOException("Can't find first letter:" + nextExpectedField + " (" + line + ")"); // get nextIndex curFieldStartIndex = this.advanceCurFieldStartIndex(curExpectedField, line, curFieldDelimiterBlockIndex, nextFieldFirstLetterIndex); curExpectedField = nextExpectedField; curFieldFirstLetterIndex = nextFieldFirstLetterIndex; }/*from w w w. j a va 2 s .c o m*/ this.delimiter = this.delimiterCounter.maxElementOr(this.delimiter); this.escape = this.escapeCounter.maxElementOr(this.escape); this.quote = this.quoteCounter.maxElementOr(this.quote); return CSVFormat.RFC4180.withDelimiter(this.delimiter).withEscape(this.escape).withQuote(this.quote); }
From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaValidatorTest.java
@Test public void sniffGoodHeader() throws Exception { CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6")); EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class))).andReturn(0); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(0); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(0); PowerMock.replayAll();//w ww. ja v a 2 s. co m CSDValidationResult<CSDFieldPattern> r = this.validator.validate(this.s, p); Assert.assertEquals(0, r.errorCount()); Assert.assertTrue(r.isOk()); PowerMock.verifyAll(); }
From source file:com.ibm.g11n.pipeline.example.MultiBundleCSVFilter.java
@Override public void merge(InputStream baseStream, OutputStream outStream, Map<String, LanguageBundle> languageBundles, FilterOptions options) throws IOException, ResourceFilterException { // create key-value map for each bundle Map<String, Map<String, String>> kvMaps = new HashMap<String, Map<String, String>>(); for (Entry<String, LanguageBundle> bundleEntry : languageBundles.entrySet()) { LanguageBundle languageBundle = bundleEntry.getValue(); Map<String, String> kvMap = new HashMap<String, String>(); for (ResourceString resString : languageBundle.getResourceStrings()) { kvMap.put(resString.getKey(), resString.getValue()); }//from w w w . j av a 2 s . co m kvMaps.put(bundleEntry.getKey(), kvMap); } CSVParser parser = CSVParser.parse(baseStream, StandardCharsets.UTF_8, CSVFormat.RFC4180.withHeader("module", "key", "value").withSkipHeaderRecord(true)); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outStream, StandardCharsets.UTF_8)); CSVPrinter printer = CSVFormat.RFC4180.withHeader("module", "key", "value").print(writer); for (CSVRecord record : parser) { String module = record.get(0); String key = record.get(1); String value = record.get(2); Map<String, String> moduleKVMap = kvMaps.get(module); if (moduleKVMap != null) { String trValue = moduleKVMap.get(key); if (trValue != null) { value = trValue; } } printer.printRecord(module, key, value); } printer.flush(); }
From source file:de.upb.wdqa.wdvd.processors.statistics.ActionStatisticsProcessor.java
private void logResults() { logger.info("Action frequency distribution:\n" + FrequencyUtils.formatFrequency(actionDistribution)); logger.info("Action frequency distribution of rollback-reverted revisions:\n" + FrequencyUtils.formatFrequency(rollbackRevertedActionDistribution)); logger.info("Action frequency distribution of non-rollback-reverted revisions:\n" + FrequencyUtils.formatFrequency(nonRollbackRevertedActionDistribution)); try {/*from w w w.j a v a 2 s. c o m*/ Writer writer = new PrintWriter(path, "UTF-8"); CSVPrinter csvWriter = CSVFormat.RFC4180.withQuoteMode(QuoteMode.ALL) .withHeader("month", "action", "count").print(writer); for (Entry<String, HashMap<String, Integer>> entry : getSortedList(monthlyActionDistribution)) { String month = entry.getKey(); for (Entry<String, Integer> entry2 : getSortedList2(entry.getValue())) { String action = entry2.getKey(); Integer value = entry2.getValue(); csvWriter.printRecord(month, action, value); } } csvWriter.close(); } catch (IOException e) { logger.error("", e); } }
From source file:com.linkedin.pinot.core.data.readers.CSVRecordReader.java
private CSVFormat getFormatFromConfig() { String format = (_config != null) ? _config.getCsvFileFormat() : null; if (format == null) { return CSVFormat.DEFAULT; }/*from w w w . jav a 2 s. c o m*/ format = format.toUpperCase(); if ((format.equals("DEFAULT"))) { return CSVFormat.DEFAULT; } else if (format.equals("EXCEL")) { return CSVFormat.EXCEL; } else if (format.equals("MYSQL")) { return CSVFormat.MYSQL; } else if (format.equals("RFC4180")) { return CSVFormat.RFC4180; } else if (format.equals("TDF")) { return CSVFormat.TDF; } else { return CSVFormat.DEFAULT; } }