Example usage for org.apache.commons.csv CSVFormat RFC4180

List of usage examples for org.apache.commons.csv CSVFormat RFC4180

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat RFC4180.

Prototype

CSVFormat RFC4180

To view the source code for org.apache.commons.csv CSVFormat RFC4180.

Click Source Link

Document

Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.

Usage

From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaValidatorTest.java

@Test
public void sniffBadHeader2() throws Exception {
    CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6"));

    EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class))).andReturn(-1);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(0);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(0);

    PowerMock.replayAll();//from  w ww.  j  a  v  a  2 s .  c  o  m
    CSDValidationResult<CSDFieldPattern> r = this.validator.validate(this.s, p);
    Assert.assertEquals(0, r.errorCount());
    PowerMock.verifyAll();
}

From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSnifferTest.java

@Test
public void sniffGoodHeader() throws Exception {
    CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6"));

    EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp),
            EasyMock.isA(CSVRecord.class))).andReturn(0);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(0);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(0);
    EasyMock.expect(this.sp.newSchema(EasyMock.eq(this.fy), EasyMock.isA(CSVRecord.class))).andReturn(this.s);

    PowerMock.replayAll();//  w  w w .  j a v  a2s. c  o m
    CSDSchema<CSDFieldPattern> s2 = this.sniffer.sniff(this.sp, p, 10);
    Assert.assertEquals(s, s2);
    PowerMock.verifyAll();
}

From source file:com.xceptance.xlt.common.tests.AbstractURLTestCase.java

/**
 * Loading of the data. There is a state variable used to indicate that we already did that.
 * //from  ww  w .  java2 s . c  om
 * @throws IOException
 */
@Before
public void loadData() throws IOException {
    login = getProperty("login", getProperty("com.xceptance.xlt.auth.userName"));
    password = getProperty("password", getProperty("com.xceptance.xlt.auth.password"));

    // load the data. Ideally we would offload the file searching to
    // XltProperties.getDataFile(String name)
    // or XltProperties.getDataFile(String name, String locale)
    // or XltProperties.getDataFile(String name, Locale locale)
    final String dataDirectory = XltProperties.getInstance().getProperty(
            XltConstants.XLT_PACKAGE_PATH + ".data.directory", "config" + File.separatorChar + "data");
    final File file = new File(dataDirectory,
            getProperty("filename", Session.getCurrent().getUserName() + ".csv"));

    BufferedReader br = null;
    boolean incorrectLines = false;

    try {
        br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));

        // permit # as comment, empty lines, set comma as separator, and activate the header
        final CSVFormat csvFormat = CSVFormat.RFC4180.toBuilder().withIgnoreEmptyLines(true)
                .withCommentStart('#').withHeader().withIgnoreSurroundingSpaces(true).build();
        final CSVParser parser = new CSVParser(br, csvFormat);
        final Iterator<CSVRecord> csvRecords = parser.iterator();

        // verify header fields to avoid problems with incorrect spelling or spaces
        final Map<String, Integer> headerMap = parser.getHeaderMap();

        for (final String headerField : headerMap.keySet()) {
            if (!CSVBasedURLAction.isPermittedHeaderField(headerField)) {
                Assert.fail(MessageFormat.format("Unsupported or misspelled header field: {0}", headerField));
            }
        }

        // go over all lines, this is a little odd, because we have to catch the iterator exception
        while (true) {
            try {
                final boolean hasNext = csvRecords.hasNext();
                if (!hasNext) {
                    break;
                }
            } catch (final Exception e) {
                // the plus 1 is meant to correct the increment missing because of the exception
                throw new RuntimeException(
                        MessageFormat.format("Line at {0} is invalid, because of <{1}>. Line is ignored.",
                                parser.getLineNumber() + 1, e.getMessage()));
            }

            final CSVRecord csvRecord = csvRecords.next();

            // only take ok lines
            if (csvRecord.isConsistent()) {
                // guard against data exceptions
                try {
                    // do we have an url?
                    if (csvRecord.get(CSVBasedURLAction.URL) != null) {
                        // take it
                        csvBasedActions.add(new CSVBasedURLAction(csvRecord, interpreter));
                    } else {
                        XltLogger.runTimeLogger.error(MessageFormat.format(
                                "Line at {0} does not contain any URL. Line is ignored: {1}",
                                parser.getLineNumber(), csvRecord));
                    }
                } catch (final Exception e) {
                    throw new RuntimeException(MessageFormat.format(
                            "Line at {0} is invalid, because of <{2}>. Line is ignored: {1}",
                            parser.getLineNumber(), csvRecord, e.getMessage()));
                }
            } else {
                XltLogger.runTimeLogger.error(MessageFormat.format(
                        "Line at {0} has not been correctly formatted. Line is ignored: {1}",
                        parser.getLineNumber(), csvRecord));
                incorrectLines = true;
            }
        }
    } finally {
        IOUtils.closeQuietly(br);
    }

    // stop if we have anything the is incorrect, avoid half running test cases
    if (incorrectLines) {
        throw new RuntimeException("Found incorrectly formatted lines. Stopping here.");
    }
}

From source file:com.mahisoft.elasticsearchprediction.engine.ElasticsearchGenericIndexEngine.java

private void loadData(File dataFile, Client client, String indexName, String mappingFilename)
        throws IOException {
    CSVParser parser = null;//ww  w .  ja  v  a2 s .  co  m
    PrintWriter mappingFileWriter = null;
    List<String> headers = new ArrayList<String>();

    try {
        mappingFileWriter = new PrintWriter(mappingFilename, Constants.UTF8);
        parser = CSVParser.parse(dataFile, Charset.forName(Constants.UTF8), CSVFormat.RFC4180);

        for (CSVRecord csvRecord : parser) {
            if (csvRecord.getRecordNumber() == 1) {
                addHeaders(csvRecord, headers);
                continue;
            }

            if (csvRecord.getRecordNumber() == 2) {
                createIndex(client, indexName, mappingFileWriter, headers, csvRecord);
            }
            addValue(client, indexName, headers, csvRecord);
        }
    } finally {
        if (mappingFileWriter != null)
            mappingFileWriter.close();
        if (parser != null)
            parser.close();
    }

    LOGGER.info("Done!");
}

From source file:com.google.cloud.genomics.dockerflow.args.ArgsTableBuilder.java

/**
 * Load the workflow arguments from a CSV file. The header of the CSV contains the input or output
 * parameter names. Each row contains the workflow args for a single run. To run 100 instances of
 * a workflow concurrently, create a CSV with a header row plus 100 rows for each set of
 * parameters./*from w w w. j a  v a 2  s .  co  m*/
 *
 * <p>Columns by default are input parameters, passed as environment variables to the Docker
 * script. For file parameters, you can prefix the column header with "<" for input or ">" for
 * output. For clarity, you can also prefix the regular input parameters as "<", if you like.
 *
 * <p>The column header can also be "logging", which is a reserved name for the logging path.
 *
 * @param csvFile CSV file (RFC4180) that's local or in GCS
 * @return a map with the key being the clientId
 * @throws IOException
 */
static Map<String, WorkflowArgs> loadCsv(String csvFile) throws IOException {
    Map<String, WorkflowArgs> retval = new HashMap<String, WorkflowArgs>();

    String csv = FileUtils.readAll(csvFile);
    CSVParser parser = CSVParser.parse(csv, CSVFormat.RFC4180);

    // Parse header
    List<String> header = null;

    int row = 0;

    // Parse by row
    for (CSVRecord csvRecord : parser) {
        ArgsBuilder args = ArgsBuilder.of(String.valueOf(row));

        LOG.debug(StringUtils.toJson(csvRecord));

        // Parse header the first time
        if (row == 0) {
            header = new ArrayList<String>();
            for (String col : csvRecord) {
                header.add(col);
            }
        } else {
            // Set parameter defined in each column
            for (int col = 0; col < header.size(); ++col) {
                String name = header.get(col);
                String val = csvRecord.get(col);

                if (name.startsWith(PREFIX_INPUT)) {
                    name = name.replace(PREFIX_INPUT, "");
                    args.input(name, val);
                } else if (name.startsWith(PREFIX_OUTPUT)) {
                    name = name.replace(PREFIX_OUTPUT, "");
                    args.output(name, val);
                } else if (LOGGING.equals(name)) {
                    args.logging(val);
                } else {
                    args.input(name, val);
                }
            }
            WorkflowArgs a = args.build();
            a.setRunIndex(row);
            retval.put(a.getClientId(), a);
        }
        ++row;
    }
    return retval;
}

From source file:com.github.jferard.pgloaderutils.sniffer.csv.HeaderRowAnalyzer.java

public CSVFormat analyze(List<String> expectedHeaderStart, String firstReadLine) throws IOException {
    if (expectedHeaderStart.size() < 2)
        throw new IllegalArgumentException();

    List<String> expectedFields = new ArrayList<String>(expectedHeaderStart.size());

    for (String field : expectedHeaderStart)
        expectedFields.add(StringUtils.normalize(field));

    String line = StringUtils.normalize(firstReadLine);
    int curFieldStartIndex = 0;

    Iterator<String> iterator = expectedFields.iterator();
    assert iterator.hasNext();

    String curExpectedField = iterator.next();
    char firstCharOfCurExpectedField = curExpectedField.charAt(0);
    int curFieldFirstLetterIndex = line.indexOf(firstCharOfCurExpectedField, curFieldStartIndex);

    if (curFieldFirstLetterIndex == -1)
        throw new IOException("Can't find first letter:" + curExpectedField + " (" + line + ")");

    while (iterator.hasNext()) {
        // get the index of the first char after cur field, ie the first
        // char of the delimiter block
        int curFieldDelimiterBlockIndex = this.getFieldDelimiterIndex(curExpectedField, line,
                curFieldStartIndex, curFieldFirstLetterIndex);

        String nextExpectedField = iterator.next();
        char firstCharOfNextExpectedField = nextExpectedField.charAt(0);

        // get the index of the first char of the next field
        int nextFieldFirstLetterIndex = line.indexOf(firstCharOfNextExpectedField, curFieldDelimiterBlockIndex);
        if (nextFieldFirstLetterIndex == -1)
            throw new IOException("Can't find first letter:" + nextExpectedField + " (" + line + ")");
        // get nextIndex
        curFieldStartIndex = this.advanceCurFieldStartIndex(curExpectedField, line, curFieldDelimiterBlockIndex,
                nextFieldFirstLetterIndex);

        curExpectedField = nextExpectedField;
        curFieldFirstLetterIndex = nextFieldFirstLetterIndex;
    }/*from   w w  w. j a va 2 s  .c o  m*/

    this.delimiter = this.delimiterCounter.maxElementOr(this.delimiter);
    this.escape = this.escapeCounter.maxElementOr(this.escape);
    this.quote = this.quoteCounter.maxElementOr(this.quote);

    return CSVFormat.RFC4180.withDelimiter(this.delimiter).withEscape(this.escape).withQuote(this.quote);

}

From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaValidatorTest.java

@Test
public void sniffGoodHeader() throws Exception {
    CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6"));

    EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class))).andReturn(0);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(0);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(0);

    PowerMock.replayAll();//w ww.  ja  v a 2  s.  co m
    CSDValidationResult<CSDFieldPattern> r = this.validator.validate(this.s, p);
    Assert.assertEquals(0, r.errorCount());
    Assert.assertTrue(r.isOk());
    PowerMock.verifyAll();
}

From source file:com.ibm.g11n.pipeline.example.MultiBundleCSVFilter.java

@Override
public void merge(InputStream baseStream, OutputStream outStream, Map<String, LanguageBundle> languageBundles,
        FilterOptions options) throws IOException, ResourceFilterException {
    // create key-value map for each bundle
    Map<String, Map<String, String>> kvMaps = new HashMap<String, Map<String, String>>();
    for (Entry<String, LanguageBundle> bundleEntry : languageBundles.entrySet()) {
        LanguageBundle languageBundle = bundleEntry.getValue();
        Map<String, String> kvMap = new HashMap<String, String>();
        for (ResourceString resString : languageBundle.getResourceStrings()) {
            kvMap.put(resString.getKey(), resString.getValue());
        }//from w w  w  . j  av a 2 s  .  co  m
        kvMaps.put(bundleEntry.getKey(), kvMap);
    }

    CSVParser parser = CSVParser.parse(baseStream, StandardCharsets.UTF_8,
            CSVFormat.RFC4180.withHeader("module", "key", "value").withSkipHeaderRecord(true));
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outStream, StandardCharsets.UTF_8));
    CSVPrinter printer = CSVFormat.RFC4180.withHeader("module", "key", "value").print(writer);
    for (CSVRecord record : parser) {
        String module = record.get(0);
        String key = record.get(1);
        String value = record.get(2);
        Map<String, String> moduleKVMap = kvMaps.get(module);
        if (moduleKVMap != null) {
            String trValue = moduleKVMap.get(key);
            if (trValue != null) {
                value = trValue;
            }
        }
        printer.printRecord(module, key, value);
    }
    printer.flush();
}

From source file:de.upb.wdqa.wdvd.processors.statistics.ActionStatisticsProcessor.java

private void logResults() {
    logger.info("Action frequency distribution:\n" + FrequencyUtils.formatFrequency(actionDistribution));
    logger.info("Action frequency distribution of rollback-reverted revisions:\n"
            + FrequencyUtils.formatFrequency(rollbackRevertedActionDistribution));
    logger.info("Action frequency distribution of non-rollback-reverted revisions:\n"
            + FrequencyUtils.formatFrequency(nonRollbackRevertedActionDistribution));

    try {/*from w  w  w.j  a  v a  2  s.  c o  m*/
        Writer writer = new PrintWriter(path, "UTF-8");
        CSVPrinter csvWriter = CSVFormat.RFC4180.withQuoteMode(QuoteMode.ALL)
                .withHeader("month", "action", "count").print(writer);

        for (Entry<String, HashMap<String, Integer>> entry : getSortedList(monthlyActionDistribution)) {
            String month = entry.getKey();

            for (Entry<String, Integer> entry2 : getSortedList2(entry.getValue())) {
                String action = entry2.getKey();
                Integer value = entry2.getValue();

                csvWriter.printRecord(month, action, value);
            }
        }
        csvWriter.close();
    } catch (IOException e) {
        logger.error("", e);
    }
}

From source file:com.linkedin.pinot.core.data.readers.CSVRecordReader.java

private CSVFormat getFormatFromConfig() {
    String format = (_config != null) ? _config.getCsvFileFormat() : null;

    if (format == null) {
        return CSVFormat.DEFAULT;
    }/*from  w w w  .  jav  a  2 s. c o m*/

    format = format.toUpperCase();
    if ((format.equals("DEFAULT"))) {
        return CSVFormat.DEFAULT;

    } else if (format.equals("EXCEL")) {
        return CSVFormat.EXCEL;

    } else if (format.equals("MYSQL")) {
        return CSVFormat.MYSQL;

    } else if (format.equals("RFC4180")) {
        return CSVFormat.RFC4180;

    } else if (format.equals("TDF")) {
        return CSVFormat.TDF;
    } else {
        return CSVFormat.DEFAULT;
    }
}