Example usage for org.apache.commons.csv CSVFormat newFormat

List of usage examples for org.apache.commons.csv CSVFormat newFormat

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat newFormat.

Prototype

public static CSVFormat newFormat(final char delimiter) 

Source Link

Document

Creates a new CSV format with the specified delimiter.

Usage

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests if correct number of input fields are provided.
 *//*from  www. j  a v  a 2  s.  com*/
@Test(expected = RuntimeException.class)
public void fieldsCountGreaterThanColumnsTest() {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-with-headers";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    Fields sourceFields = new Fields("id", "last name", "first name", "phone");
    Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
    Pipe pipe = new Pipe("pipe");

    connector.connect(source, sink, pipe).complete();

}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests if subset of input fields are provided, properly outputs only that subset.
 *///from  w  w w .  j a va 2  s .  c  o m
@Test
public void fieldsIncludedButNotMatchLengthTest() throws Exception {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-with-headers";
    String expectedPath = "src/test/resources/expected/sink-with-headers-id-only.txt";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat(',').withHeader("id", "first name", "last name").withQuote('"')
            .withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    Fields sourceFields = new Fields("id");
    Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
    Pipe pipe = new Pipe("pipe");

    connector.connect(source, sink, pipe).complete();

    testPaths(sinkPath, expectedPath);

}

From source file:com.datascience.cascading.CsvSchemeTest.java

@Test
public void testWhenFieldsAndHeadersAreinDifferentOrder() throws Exception {

    String sourcePath = "src/test/resources/input/with-headers.txt";
    String sinkPath = "src/test/resources/output/sink-with-headers";
    String expectedPath = "src/test/resources/expected/with-headers-difforder.txt";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name")
            .withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    Fields sourceFields = new Fields("id", "last name", "first name");

    Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);

    Pipe pipe = new Pipe("pipe");

    connector.connect(source, sink, pipe).complete();

    testPaths(sinkPath, expectedPath);//from w w w .jav  a 2 s.  com

}

From source file:com.datascience.cascading.CsvSchemeTest.java

@Test
public void testWhenExtraColumnsNotStrict() throws Exception {
    String sourcePath = "src/test/resources/input/with-extra-columns.txt";
    String sinkPath = "src/test/resources/input/sink-with-headers";
    String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict.txt";
    String trapPath = "src/test/resources/input/trap-sink-with-headers";
    String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict.txt";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat('\t').withQuote('"')
            .withHeader("id", "first name", "last name", "city", "zip").withEscape('\\')
            .withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withSkipHeaderRecord().withEscape('\\')
            .withRecordSeparator('\n');

    Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
    Tap trap = new Hfs(new TextDelimited(true, "\t"), trapPath, SinkMode.REPLACE);

    Pipe pipe = new Pipe("pipe");

    connector.connect("extra-columns-not-strict", source, sink, trap, pipe).complete();

    testPaths(sinkPath, expectedPath);/*w  ww .j  a v a2 s. c  o  m*/
    testPaths(trapPath, expectedTrapPath);
}

From source file:com.webtide.jetty.load.generator.jenkins.LoadGeneratorBuilder.java

protected void parseTimeValues(FilePath workspace, Path responseTimeResultFilePath,
        List<Resource.NodeListener> nodeListeners) throws Exception {
    Path responseTimeResultFile = Files.createTempFile("loadgenerator_result_responsetime", ".csv");

    workspace.child(responseTimeResultFilePath.toString())
            .copyTo(Files.newOutputStream(responseTimeResultFile));

    CSVParser csvParser = new CSVParser(Files.newBufferedReader(responseTimeResultFile),
            CSVFormat.newFormat('|'));

    csvParser.forEach(strings -> {/*from w w w.  j  a v  a2s .c  om*/
        Values values = new Values() //
                .eventTimestamp(Long.parseLong(strings.get(0))) //
                .method(strings.get(1)) //
                .path(strings.get(2)) //
                .status(Integer.parseInt(strings.get(3))) //
                .size(Long.parseLong(strings.get(4))) //
                .responseTime(Long.parseLong(strings.get(5))) //
                .latencyTime(Long.parseLong(strings.get(6)));

        for (Resource.NodeListener listener : nodeListeners) {
            listener.onResourceNode(values.getInfo());
        }
    });

    Files.deleteIfExists(responseTimeResultFile);
}

From source file:com.datascience.cascading.CsvSchemeTest.java

@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrict() throws Exception {
    String sourcePath = "src/test/resources/input/with-extra-columns.txt";
    String sinkPath = "src/test/resources/input/sink-with-headers";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat('\t')
            .withHeader("id", "first name", "last name", "city", "zip").withQuote('"').withEscape('\\')
            .withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n');

    Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);

    Pipe pipe = new Pipe("pipe");

    connector.connect(source, sink, pipe).complete();
}

From source file:com.datascience.cascading.CsvSchemeTest.java

@Test
public void testWhenExtraColumnsNotStrictNoHeaders() throws Exception {
    String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
    String sinkPath = "src/test/resources/input/sink-no-headers";
    String trapPath = "src/test/resources/input/trap-no-headers";
    String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict-no-header.txt";
    String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict-no-header.txt";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat('\t').withQuote('"').withEscape('\\')
            .withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n');

    Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
    Tap trap = new Hfs(new TextDelimited(false, "\t"), trapPath, SinkMode.REPLACE);

    Pipe pipe = new Pipe("pipe");

    connector.connect("test-extra-columns-no-header", source, sink, trap, pipe).complete();
    testPaths(sinkPath, expectedPath);/* w  w  w.  j a v a  2  s .co m*/
    testPaths(trapPath, expectedTrapPath);
}

From source file:com.datascience.cascading.CsvSchemeTest.java

@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrictNoHeaders() throws Exception {
    String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
    String sinkPath = "src/test/resources/input/sink-no-headers";

    FlowConnector connector = new Hadoop2MR1FlowConnector();
    CSVFormat sourceFormat = CSVFormat.newFormat('\t').withQuote('"').withEscape('\\')
            .withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n');

    Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
    Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);

    Pipe pipe = new Pipe("pipe");

    connector.connect(source, sink, pipe).complete();
}

From source file:org.apache.nifi.csv.CSVUtils.java

private static CSVFormat buildCustomFormat(final PropertyContext context) {
    final char valueSeparator = getUnescapedChar(context, VALUE_SEPARATOR);
    CSVFormat format = CSVFormat.newFormat(valueSeparator).withAllowMissingColumnNames().withIgnoreEmptyLines();

    final PropertyValue skipHeaderPropertyValue = context.getProperty(FIRST_LINE_IS_HEADER);
    if (skipHeaderPropertyValue.getValue() != null && skipHeaderPropertyValue.asBoolean()) {
        format = format.withFirstRecordAsHeader();
    }//w w w . j av  a  2 s .c o  m

    format = format.withQuote(getChar(context, QUOTE_CHAR));
    format = format.withEscape(getChar(context, ESCAPE_CHAR));
    format = format.withTrim(context.getProperty(TRIM_FIELDS).asBoolean());

    if (context.getProperty(COMMENT_MARKER).isSet()) {
        format = format.withCommentMarker(getChar(context, COMMENT_MARKER));
    }
    if (context.getProperty(NULL_STRING).isSet()) {
        format = format.withNullString(CSVUtils.unescape(context.getProperty(NULL_STRING).getValue()));
    }

    final PropertyValue quoteValue = context.getProperty(QUOTE_MODE);
    if (quoteValue != null) {
        final QuoteMode quoteMode = QuoteMode.valueOf(quoteValue.getValue());
        format = format.withQuoteMode(quoteMode);
    }

    final PropertyValue trailingDelimiterValue = context.getProperty(TRAILING_DELIMITER);
    if (trailingDelimiterValue != null) {
        final boolean trailingDelimiter = trailingDelimiterValue.asBoolean();
        format = format.withTrailingDelimiter(trailingDelimiter);
    }

    final PropertyValue recordSeparator = context.getProperty(RECORD_SEPARATOR);
    if (recordSeparator != null) {
        final String separator = unescape(recordSeparator.getValue());
        format = format.withRecordSeparator(separator);
    }

    return format;
}

From source file:org.apache.ranger.unixusersync.process.FileSourceUserGroupBuilder.java

public Map<String, List<String>> readTextFile(File textFile) throws Exception {

    Map<String, List<String>> ret = new HashMap<String, List<String>>();

    String delimiter = config.getUserSyncFileSourceDelimiter();

    CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0));

    CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat);

    List<CSVRecord> csvRecordList = csvParser.getRecords();

    if (csvRecordList != null) {
        for (CSVRecord csvRecord : csvRecordList) {
            List<String> groups = new ArrayList<String>();
            String user = csvRecord.get(0);

            user = user.replaceAll("^\"|\"$", "");

            int i = csvRecord.size();

            for (int j = 1; j < i; j++) {
                String group = csvRecord.get(j);
                if (group != null && !group.isEmpty()) {
                    group = group.replaceAll("^\"|\"$", "");
                    groups.add(group);/*from  w  ww  .ja v  a2 s .  c o  m*/
                }
            }
            ret.put(user, groups);
        }
    }

    csvParser.close();

    return ret;
}