List of usage examples for org.apache.commons.csv CSVFormat newFormat
public static CSVFormat newFormat(final char delimiter)
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests the CSV scheme reading and writing nulls. *///w w w . j av a 2 s . c o m @Test public void testCsvNulls() throws Exception { String sourcePath = "src/test/resources/input/with-nulls.txt"; String sinkPath = "src/test/resources/output/with-nulls"; String expectedPath = "src/test/resources/expected/with-nulls.txt"; CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name") .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n').withNullString("\\N"); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n') .withNullString("null"); testScheme(sourcePath, sourceFormat, sinkPath, sinkFormat, expectedPath, true); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests the CSV scheme sink without headers. *///from w ww.j a v a 2 s .com @Test public void testCsvSinkWithHeaders() throws Exception { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-with-headers"; String expectedPath = "src/test/resources/expected/with-headers.txt"; CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name") .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n'); testScheme(sourcePath, sourceFormat, sinkPath, sinkFormat, expectedPath, true); }
From source file:ca.uhn.fhir.jpa.term.TerminologyLoaderSvc.java
private void iterateOverZipFile(List<byte[]> theZipBytes, String fileNamePart, IRecordHandler handler, char theDelimiter, QuoteMode theQuoteMode) { boolean found = false; for (byte[] nextZipBytes : theZipBytes) { ZipInputStream zis = new ZipInputStream( new BufferedInputStream(new ByteArrayInputStream(nextZipBytes))); try {/* ww w .ja v a 2 s. c o m*/ for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null;) { ZippedFileInputStream inputStream = new ZippedFileInputStream(zis); String nextFilename = nextEntry.getName(); if (nextFilename.contains(fileNamePart)) { ourLog.info("Processing file {}", nextFilename); found = true; Reader reader = null; CSVParser parsed = null; try { reader = new InputStreamReader(zis, Charsets.UTF_8); CSVFormat format = CSVFormat.newFormat(theDelimiter).withFirstRecordAsHeader(); if (theQuoteMode != null) { format = format.withQuote('"').withQuoteMode(theQuoteMode); } parsed = new CSVParser(reader, format); Iterator<CSVRecord> iter = parsed.iterator(); ourLog.debug("Header map: {}", parsed.getHeaderMap()); int count = 0; int logIncrement = LOG_INCREMENT; int nextLoggedCount = 0; while (iter.hasNext()) { CSVRecord nextRecord = iter.next(); handler.accept(nextRecord); count++; if (count >= nextLoggedCount) { ourLog.info(" * Processed {} records in {}", count, nextFilename); nextLoggedCount += logIncrement; } } } catch (IOException e) { throw new InternalErrorException(e); } } } } catch (IOException e) { throw new InternalErrorException(e); } finally { IOUtils.closeQuietly(zis); } } // This should always be true, but just in case we've introduced a bug... Validate.isTrue(found); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests the CSV scheme sink without headers. *//* w w w. jav a 2s . com*/ @Test public void testCsvSinkWithoutHeaders() throws Exception { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-without-headers"; String expectedPath = "src/test/resources/expected/without-headers.txt"; CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name") .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); testScheme(sourcePath, sourceFormat, sinkPath, sinkFormat, expectedPath, true); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests the Csv Scheme Generating Valid headers when not provided. *//*from w ww . j ava 2s. c o m*/ @Test public void schemeGenerateHeadersWhenNotProvided() { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-with-headers"; Set<String> expected = new HashSet<String>(); expected.addAll(Arrays.asList("id", "first name", "last name")); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); CsvScheme sourceScheme = new CsvScheme(sourceFormat); CsvScheme sinkScheme = new CsvScheme(sinkFormat); testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Test the CsvScheme generating positional headers names when not provided. */// ww w . ja va 2s . com @Test public void schemeGeneratePositionalFieldNames() { String sourcePath = "src/test/resources/input/without-headers.txt"; String sinkPath = "src/test/resources/output/sink-without-headers"; Set<String> expected = new HashSet<String>(); expected.addAll(Arrays.asList("col0", "col1", "col2")); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withSkipHeaderRecord(false) .withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); CsvScheme sourceScheme = new CsvScheme(sourceFormat); CsvScheme sinkScheme = new CsvScheme(sinkFormat); testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Test CsvScheme generating Headers when header is defined in source format. *//*from w w w . j av a2 s.c om*/ @Test public void schemeGenerateFieldsWhenSourceFormatHeaderGiven() { String sourcePath = "src/test/resources/input/without-headers.txt"; String sinkPath = "src/test/resources/output/sink-without-headers"; Set<String> expected = new HashSet<String>(); expected.addAll(Arrays.asList("id", "first name", "last name")); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name") .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); CsvScheme sourceScheme = new CsvScheme(sourceFormat); CsvScheme sinkScheme = new CsvScheme(sinkFormat); testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Test CsvScheme Generating headers when Source Fields are provided. *//*from ww w . jav a 2 s .co m*/ @Test public void schemeGenerateFieldsWhenSourceFieldsGiven() { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-without-headers"; Set<String> expected = new HashSet<String>(); expected.addAll(Arrays.asList("id", "first name", "last name")); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); Fields sourceFields = new Fields("id", "first name", "last name"); CsvScheme sourceScheme = new CsvScheme(sourceFields, sourceFormat); CsvScheme sinkScheme = new CsvScheme(sinkFormat); testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Test CsvScheme Generating headers when both Source Fields and Headers are provided. *///from w w w .j ava 2s . c o m @Test public void schemeGeneratingHeadersWhenSourceHeadersAndFieldsAreGiven() { String sourcePath = "src/test/resources/input/without-headers.txt"; String sinkPath = "src/test/resources/output/sink-without-headers"; Set<String> expected = new HashSet<String>(); expected.addAll(Arrays.asList("id", "first name", "last name")); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name") .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); Fields sourceFields = new Fields("id", "first name", "last name"); CsvScheme sourceScheme = new CsvScheme(sourceFields, sourceFormat); CsvScheme sinkScheme = new CsvScheme(sinkFormat); testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests if correct number of input headers are provided. *//*w w w . j a v a2 s .c o m*/ @Test(expected = RuntimeException.class) public void headerCountMismatchColumnsTest() { String sourcePath = "src/test/resources/input/with-headers.txt"; String sinkPath = "src/test/resources/output/sink-with-headers"; FlowConnector connector = new Hadoop2MR1FlowConnector(); CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"') .withHeader("id", "first name", "last name", "phone").withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\') .withRecordSeparator('\n'); Tap source = new Hfs(new CsvScheme(sourceFormat), sourcePath); Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath); Pipe pipe = new Pipe("pipe"); connector.connect(source, sink, pipe).complete(); }