List of usage examples for org.apache.commons.csv CSVFormat newFormat
public static CSVFormat newFormat(final char delimiter)
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests if correct number of input fields are provided. *//*from www. j a v a 2 s. com*/ @Test(expected = RuntimeException.class) public void fieldsCountGreaterThanColumnsTest() { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-with-headers"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); Fields sourceFields = new Fields("id", "last name", "first name", "phone"); Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath); Pipe pipe = new Pipe("pipe"); connector.connect(source, sink, pipe).complete(); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests if subset of input fields are provided, properly outputs only that subset. *///from w w w . j a va 2 s . c o m @Test public void fieldsIncludedButNotMatchLengthTest() throws Exception { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-with-headers"; String expectedPath = "src/test/resources/expected/sink-with-headers-id-only.txt"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat(',').withHeader("id", "first name", "last name").withQuote('"') .withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); Fields sourceFields = new Fields("id"); Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE); Pipe pipe = new Pipe("pipe"); connector.connect(source, sink, pipe).complete(); testPaths(sinkPath, expectedPath); }
From source file:com.datascience.cascading.CsvSchemeTest.java
@Test public void testWhenFieldsAndHeadersAreinDifferentOrder() throws Exception { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-with-headers"; String expectedPath = "src/test/resources/expected/with-headers-difforder.txt"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name") .withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); Fields sourceFields = new Fields("id", "last name", "first name"); Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath); Pipe pipe = new Pipe("pipe"); connector.connect(source, sink, pipe).complete(); testPaths(sinkPath, expectedPath);//from w w w .jav a 2 s. com }
From source file:com.datascience.cascading.CsvSchemeTest.java
@Test public void testWhenExtraColumnsNotStrict() throws Exception { String sourcePath = "src/test/resources/input/with-extra-columns.txt"; String sinkPath = "src/test/resources/input/sink-with-headers"; String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict.txt"; String trapPath = "src/test/resources/input/trap-sink-with-headers"; String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict.txt"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat('\t').withQuote('"') .withHeader("id", "first name", "last name", "city", "zip").withEscape('\\') .withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE); Tap trap = new Hfs(new TextDelimited(true, "\t"), trapPath, SinkMode.REPLACE); Pipe pipe = new Pipe("pipe"); connector.connect("extra-columns-not-strict", source, sink, trap, pipe).complete(); testPaths(sinkPath, expectedPath);/*w ww .j a v a2 s. c o m*/ testPaths(trapPath, expectedTrapPath); }
From source file:com.webtide.jetty.load.generator.jenkins.LoadGeneratorBuilder.java
protected void parseTimeValues(FilePath workspace, Path responseTimeResultFilePath, List<Resource.NodeListener> nodeListeners) throws Exception { Path responseTimeResultFile = Files.createTempFile("loadgenerator_result_responsetime", ".csv"); workspace.child(responseTimeResultFilePath.toString()) .copyTo(Files.newOutputStream(responseTimeResultFile)); CSVParser csvParser = new CSVParser(Files.newBufferedReader(responseTimeResultFile), CSVFormat.newFormat('|')); csvParser.forEach(strings -> {/*from w w w. j a v a2s .c om*/ Values values = new Values() // .eventTimestamp(Long.parseLong(strings.get(0))) // .method(strings.get(1)) // .path(strings.get(2)) // .status(Integer.parseInt(strings.get(3))) // .size(Long.parseLong(strings.get(4))) // .responseTime(Long.parseLong(strings.get(5))) // .latencyTime(Long.parseLong(strings.get(6))); for (Resource.NodeListener listener : nodeListeners) { listener.onResourceNode(values.getInfo()); } }); Files.deleteIfExists(responseTimeResultFile); }
From source file:com.datascience.cascading.CsvSchemeTest.java
@Test(expected = FlowException.class) public void testWhenExtraColumnsStrict() throws Exception { String sourcePath = "src/test/resources/input/with-extra-columns.txt"; String sinkPath = "src/test/resources/input/sink-with-headers"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat('\t') .withHeader("id", "first name", "last name", "city", "zip").withQuote('"').withEscape('\\') .withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n'); Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE); Pipe pipe = new Pipe("pipe"); connector.connect(source, sink, pipe).complete(); }
From source file:com.datascience.cascading.CsvSchemeTest.java
@Test public void testWhenExtraColumnsNotStrictNoHeaders() throws Exception { String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt"; String sinkPath = "src/test/resources/input/sink-no-headers"; String trapPath = "src/test/resources/input/trap-no-headers"; String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict-no-header.txt"; String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict-no-header.txt"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat('\t').withQuote('"').withEscape('\\') .withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n'); Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE); Tap trap = new Hfs(new TextDelimited(false, "\t"), trapPath, SinkMode.REPLACE); Pipe pipe = new Pipe("pipe"); connector.connect("test-extra-columns-no-header", source, sink, trap, pipe).complete(); testPaths(sinkPath, expectedPath);/* w w w. j a v a 2 s .co m*/ testPaths(trapPath, expectedTrapPath); }
From source file:com.datascience.cascading.CsvSchemeTest.java
@Test(expected = FlowException.class) public void testWhenExtraColumnsStrictNoHeaders() throws Exception { String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt"; String sinkPath = "src/test/resources/input/sink-no-headers"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat('\t').withQuote('"').withEscape('\\') .withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n'); Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE); Pipe pipe = new Pipe("pipe"); connector.connect(source, sink, pipe).complete(); }
From source file:org.apache.nifi.csv.CSVUtils.java
private static CSVFormat buildCustomFormat(final PropertyContext context) { final char valueSeparator = getUnescapedChar(context, VALUE_SEPARATOR); CSVFormat format = CSVFormat.newFormat(valueSeparator).withAllowMissingColumnNames().withIgnoreEmptyLines(); final PropertyValue skipHeaderPropertyValue = context.getProperty(FIRST_LINE_IS_HEADER); if (skipHeaderPropertyValue.getValue() != null && skipHeaderPropertyValue.asBoolean()) { format = format.withFirstRecordAsHeader(); }//w w w . j av a 2 s .c o m format = format.withQuote(getChar(context, QUOTE_CHAR)); format = format.withEscape(getChar(context, ESCAPE_CHAR)); format = format.withTrim(context.getProperty(TRIM_FIELDS).asBoolean()); if (context.getProperty(COMMENT_MARKER).isSet()) { format = format.withCommentMarker(getChar(context, COMMENT_MARKER)); } if (context.getProperty(NULL_STRING).isSet()) { format = format.withNullString(CSVUtils.unescape(context.getProperty(NULL_STRING).getValue())); } final PropertyValue quoteValue = context.getProperty(QUOTE_MODE); if (quoteValue != null) { final QuoteMode quoteMode = QuoteMode.valueOf(quoteValue.getValue()); format = format.withQuoteMode(quoteMode); } final PropertyValue trailingDelimiterValue = context.getProperty(TRAILING_DELIMITER); if (trailingDelimiterValue != null) { final boolean trailingDelimiter = trailingDelimiterValue.asBoolean(); format = format.withTrailingDelimiter(trailingDelimiter); } final PropertyValue recordSeparator = context.getProperty(RECORD_SEPARATOR); if (recordSeparator != null) { final String separator = unescape(recordSeparator.getValue()); format = format.withRecordSeparator(separator); } return format; }
From source file:org.apache.ranger.unixusersync.process.FileSourceUserGroupBuilder.java
public Map<String, List<String>> readTextFile(File textFile) throws Exception { Map<String, List<String>> ret = new HashMap<String, List<String>>(); String delimiter = config.getUserSyncFileSourceDelimiter(); CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0)); CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat); List<CSVRecord> csvRecordList = csvParser.getRecords(); if (csvRecordList != null) { for (CSVRecord csvRecord : csvRecordList) { List<String> groups = new ArrayList<String>(); String user = csvRecord.get(0); user = user.replaceAll("^\"|\"$", ""); int i = csvRecord.size(); for (int j = 1; j < i; j++) { String group = csvRecord.get(j); if (group != null && !group.isEmpty()) { group = group.replaceAll("^\"|\"$", ""); groups.add(group);/*from w ww .ja v a2 s . c o m*/ } } ret.put(user, groups); } } csvParser.close(); return ret; }