Example usage for org.apache.commons.csv CSVFormat newFormat

List of usage examples for org.apache.commons.csv CSVFormat newFormat

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat newFormat.

Prototype

public static CSVFormat newFormat(final char delimiter) 

Source Link

Document

Creates a new CSV format with the specified delimiter.

Usage

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests the CSV scheme reading and writing nulls.
 *///w  w w  . j av a 2  s  .  c  o  m
@Test
public void testCsvNulls() throws Exception {

    String sourcePath = "src/test/resources/input/with-nulls.txt";
    String sinkPath = "src/test/resources/output/with-nulls";
    String expectedPath = "src/test/resources/expected/with-nulls.txt";

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name")
            .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n').withNullString("\\N");

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n')
            .withNullString("null");

    testScheme(sourcePath, sourceFormat, sinkPath, sinkFormat, expectedPath, true);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests the CSV scheme sink without headers.
 *///from  w  ww.j a  v a 2  s .com
@Test
public void testCsvSinkWithHeaders() throws Exception {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-with-headers";
    String expectedPath = "src/test/resources/expected/with-headers.txt";

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name")
            .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n');

    testScheme(sourcePath, sourceFormat, sinkPath, sinkFormat, expectedPath, true);

}

From source file:ca.uhn.fhir.jpa.term.TerminologyLoaderSvc.java

private void iterateOverZipFile(List<byte[]> theZipBytes, String fileNamePart, IRecordHandler handler,
        char theDelimiter, QuoteMode theQuoteMode) {
    boolean found = false;

    for (byte[] nextZipBytes : theZipBytes) {
        ZipInputStream zis = new ZipInputStream(
                new BufferedInputStream(new ByteArrayInputStream(nextZipBytes)));
        try {/*  ww  w  .ja v  a  2  s. c  o m*/
            for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null;) {
                ZippedFileInputStream inputStream = new ZippedFileInputStream(zis);

                String nextFilename = nextEntry.getName();
                if (nextFilename.contains(fileNamePart)) {
                    ourLog.info("Processing file {}", nextFilename);
                    found = true;

                    Reader reader = null;
                    CSVParser parsed = null;
                    try {
                        reader = new InputStreamReader(zis, Charsets.UTF_8);
                        CSVFormat format = CSVFormat.newFormat(theDelimiter).withFirstRecordAsHeader();
                        if (theQuoteMode != null) {
                            format = format.withQuote('"').withQuoteMode(theQuoteMode);
                        }
                        parsed = new CSVParser(reader, format);
                        Iterator<CSVRecord> iter = parsed.iterator();
                        ourLog.debug("Header map: {}", parsed.getHeaderMap());

                        int count = 0;
                        int logIncrement = LOG_INCREMENT;
                        int nextLoggedCount = 0;
                        while (iter.hasNext()) {
                            CSVRecord nextRecord = iter.next();
                            handler.accept(nextRecord);
                            count++;
                            if (count >= nextLoggedCount) {
                                ourLog.info(" * Processed {} records in {}", count, nextFilename);
                                nextLoggedCount += logIncrement;
                            }
                        }

                    } catch (IOException e) {
                        throw new InternalErrorException(e);
                    }
                }
            }
        } catch (IOException e) {
            throw new InternalErrorException(e);
        } finally {
            IOUtils.closeQuietly(zis);
        }
    }

    // This should always be true, but just in case we've introduced a bug...
    Validate.isTrue(found);
}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests the CSV scheme sink without headers.
 *//*  w w w.  jav a 2s . com*/
@Test
public void testCsvSinkWithoutHeaders() throws Exception {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-without-headers";
    String expectedPath = "src/test/resources/expected/without-headers.txt";

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name")
            .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    testScheme(sourcePath, sourceFormat, sinkPath, sinkFormat, expectedPath, true);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests the Csv Scheme Generating Valid headers when not provided.
 *//*from w ww  . j  ava 2s. c o m*/
@Test
public void schemeGenerateHeadersWhenNotProvided() {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-with-headers";

    Set<String> expected = new HashSet<String>();
    expected.addAll(Arrays.asList("id", "first name", "last name"));

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    CsvScheme sourceScheme = new CsvScheme(sourceFormat);
    CsvScheme sinkScheme = new CsvScheme(sinkFormat);

    testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Test the CsvScheme generating positional headers names when not provided.
 *///  ww  w  . ja va  2s . com
@Test
public void schemeGeneratePositionalFieldNames() {

    String sourcePath = "src/test/resources/input/without-headers.txt";
    String sinkPath = "src/test/resources/output/sink-without-headers";

    Set<String> expected = new HashSet<String>();
    expected.addAll(Arrays.asList("col0", "col1", "col2"));

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withSkipHeaderRecord(false)
            .withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    CsvScheme sourceScheme = new CsvScheme(sourceFormat);
    CsvScheme sinkScheme = new CsvScheme(sinkFormat);

    testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Test CsvScheme generating Headers when header is defined in source format.
 *//*from   w w w  .  j av  a2 s.c om*/
@Test
public void schemeGenerateFieldsWhenSourceFormatHeaderGiven() {

    String sourcePath = "src/test/resources/input/without-headers.txt";
    String sinkPath = "src/test/resources/output/sink-without-headers";

    Set<String> expected = new HashSet<String>();
    expected.addAll(Arrays.asList("id", "first name", "last name"));

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name")
            .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    CsvScheme sourceScheme = new CsvScheme(sourceFormat);
    CsvScheme sinkScheme = new CsvScheme(sinkFormat);

    testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Test CsvScheme Generating headers when Source Fields are provided.
 *//*from ww w  . jav  a  2 s .co m*/
@Test
public void schemeGenerateFieldsWhenSourceFieldsGiven() {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-without-headers";

    Set<String> expected = new HashSet<String>();
    expected.addAll(Arrays.asList("id", "first name", "last name"));

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    Fields sourceFields = new Fields("id", "first name", "last name");

    CsvScheme sourceScheme = new CsvScheme(sourceFields, sourceFormat);
    CsvScheme sinkScheme = new CsvScheme(sinkFormat);

    testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Test CsvScheme Generating headers when both Source Fields and Headers are provided.
 *///from w  w  w  .j  ava 2s .  c  o  m
@Test
public void schemeGeneratingHeadersWhenSourceHeadersAndFieldsAreGiven() {

    String sourcePath = "src/test/resources/input/without-headers.txt";
    String sinkPath = "src/test/resources/output/sink-without-headers";

    Set<String> expected = new HashSet<String>();
    expected.addAll(Arrays.asList("id", "first name", "last name"));

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name")
            .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    Fields sourceFields = new Fields("id", "first name", "last name");

    CsvScheme sourceScheme = new CsvScheme(sourceFields, sourceFormat);
    CsvScheme sinkScheme = new CsvScheme(sinkFormat);

    testSchemeFields(sourcePath, sourceScheme, sinkPath, sinkScheme, expected);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests if correct number of input headers are provided.
 *//*w w w  .  j a  v a2  s  .c  o m*/
@Test(expected = RuntimeException.class)
public void headerCountMismatchColumnsTest() {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-with-headers";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"')
            .withHeader("id", "first name", "last name", "phone").withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    Tap source = new Hfs(new CsvScheme(sourceFormat), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
    Pipe pipe = new Pipe("pipe");

    connector.connect(source, sink, pipe).complete();

}