Example usage for org.apache.commons.csv CSVFormat RFC4180

List of usage examples for org.apache.commons.csv CSVFormat RFC4180

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat RFC4180.

Prototype

CSVFormat RFC4180

To view the source code for org.apache.commons.csv CSVFormat RFC4180.

Click Source Link

Document

Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.

Usage

From source file:de.upb.wdqa.wdvd.revisiontags.TagDownloader.java

/**
 * Reads the csv file of the TagDownloader
 *///from   w  ww.  j a  va  2s  .c  o  m
public static void readFile(File file) {
    try {
        logger.info("Starting to read file of TagDownloader ...");
        BufferedReader reader = new BufferedReader(new InputStreamReader(
                new BZip2CompressorInputStream(new BufferedInputStream(new FileInputStream(file))), "UTF-8"));

        CSVParser parser = new CSVParser(reader, CSVFormat.RFC4180);

        dataStore.connect();

        for (CSVRecord csvRecord : parser) {
            parseRecord(csvRecord);
            if (csvRecord.getRecordNumber() % 1000000 == 0) {
                logger.info("Current Record: " + csvRecord.getRecordNumber());
            }
        }

        dataStore.disconnect();
        parser.close();
        logger.info("Tag Distribution:\n" + FrequencyUtils.formatFrequency(tagDistribution));
        logger.info("Finished");
    } catch (Exception e) {
        logger.error("", e);
    }
}

From source file:com.ibm.g11n.pipeline.example.MultiBundleCSVFilter.java

@Override
public Map<String, LanguageBundle> parse(InputStream inStream, FilterOptions options)
        throws IOException, ResourceFilterException {
    Map<String, LanguageBundleBuilder> builders = new HashMap<String, LanguageBundleBuilder>();
    CSVParser parser = CSVParser.parse(inStream, StandardCharsets.UTF_8,
            CSVFormat.RFC4180.withHeader("module", "key", "value").withSkipHeaderRecord(true));

    for (CSVRecord record : parser) {
        String bundle = record.get(0);
        String key = record.get(1);
        String value = record.get(2);

        LanguageBundleBuilder bundleBuilder = builders.get(bundle);
        if (bundleBuilder == null) {
            bundleBuilder = new LanguageBundleBuilder(true);
            builders.put(bundle, bundleBuilder);
        }//from  w  w  w  .  ja  v  a  2  s  .  c o m
        bundleBuilder.addResourceString(key, value);
    }

    Map<String, LanguageBundle> result = new TreeMap<String, LanguageBundle>();
    for (Entry<String, LanguageBundleBuilder> bundleEntry : builders.entrySet()) {
        String bundleName = bundleEntry.getKey();
        LanguageBundle bundleData = bundleEntry.getValue().build();
        result.put(bundleName, bundleData);
    }

    return result;
}

From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSnifferTest.java

@Test
public void sniffNoLine() throws Exception {
    CSVParser p = CSVFormat.RFC4180.parse(new StringReader(""));

    PowerMock.replayAll();/*from ww w.  j  av a  2s  .co m*/
    CSDSchema<CSDFieldPattern> s = this.sniffer.sniff(this.sp, p, 10);
    Assert.assertEquals(null, s);
    PowerMock.verifyAll();
}

From source file:com.github.jferard.pgloaderutils.loader.CSVCleanerFileReader.java

public CSVCleanerFileReader(CSVParser parser, CSVRecordCleaner recordCleaner) throws IOException {
    this.recordCleaner = recordCleaner;
    PipedWriter pipedWriter = new PipedWriter();
    this.modifiedStreamReader = new PipedReader(pipedWriter, BUFFER_SIZE);

    this.parser = parser;
    this.printer = new CSVPrinter(pipedWriter, CSVFormat.RFC4180);
    this.logger = Logger.getLogger("Cleaner");
}

From source file:api.startup.PDFIndexer.java

/**
 * Indexes all the documents in the CSV file
 * @param writer - index writer//from  w  w  w  .  ja v  a  2s  .  c  o m
 * @param indexConfiguration - the configuration for all the indexable documents
 * @throws IOException
 */
static void indexDocs(final IndexWriter writer, String indexConfiguration) throws IOException {
    Reader in = new FileReader(indexConfiguration);
    CSVParser parser = CSVFormat.RFC4180.withHeader().parse(in);
    List<Callable<Object>> tasks = new ArrayList<>();
    int threadPoolSize = Runtime.getRuntime().availableProcessors();
    log.info("Indexing with " + threadPoolSize + " processors");
    ExecutorService pool = Executors.newFixedThreadPool(threadPoolSize);
    for (CSVRecord record : parser) {
        DocumentMetadata meta = new DocumentMetadata(record);
        tasks.add(() -> {
            indexDoc(writer, meta);
            return null;
        });
    }

    try {
        pool.invokeAll(tasks);
    } catch (InterruptedException e) {
        log.error("Indexing was interrupted " + e.getMessage());
    }

}

From source file:net.javacrumbs.ccspring.common.CsvFileLogger.java

@Override
public void addMessage(Message message) {
    try {/*from   w ww.  j a  v a 2  s . c  o m*/
        String newRow = CSVFormat.RFC4180.format(message.getSeverity(), message.getMessage(),
                message.getTime());
        Files.write(file.toPath(), singletonList(newRow), APPEND, CREATE);
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaValidatorTest.java

@Test
public void sniffBadHeader1() throws Exception {
    CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6"));

    EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class))).andReturn(10);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(100);
    EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s),
            EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(1000);
    EasyMock.expect(this.s.hasOptionalHeader()).andReturn(false);

    PowerMock.replayAll();//from ww w .  j  a  va 2  s .co  m
    CSDValidationResult<CSDFieldPattern> r = this.validator.validate(this.s, p);
    Assert.assertEquals(0, r.errorCount());
    PowerMock.verifyAll();
}

From source file:de.upb.wdqa.wdvd.labels.CorpusLabelReader.java

/**
 *   Initializes the label reader./*  w  ww  .  j a  v a  2  s  . c o  m*/
 */
public void startReading() {
    try {
        BufferedReader csvReader = new BufferedReader(new InputStreamReader(labelsStream, "UTF-8"),
                BUFFER_SIZE);

        csvParser = new CSVParser(csvReader, CSVFormat.RFC4180.withHeader(FILE_HEADER));
        iterator = csvParser.iterator();

        CSVRecord headerRecord = iterator.next();

        for (int i = 0; i < FILE_HEADER.length; i++) {
            if (!FILE_HEADER[i].equals(headerRecord.get(i))) {
                throw new IOException("The header of the CSV file is wrong.");
            }
        }
    } catch (IOException e) {
        logger.error("", e);
        finishReading();
    }
}

From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSnifferTest.java

@Test
public void sniffBadHeader1() throws Exception {
    CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6"));

    EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp),
            EasyMock.isA(CSVRecord.class))).andReturn(10);
    EasyMock.expect(this.sp.hasOptionalHeader()).andReturn(false);

    PowerMock.replayAll();//  ww  w .  j a v a 2 s .  co  m
    CSDSchema<CSDFieldPattern> s = this.sniffer.sniff(this.sp, p, 10);
    Assert.assertEquals(null, s);
    PowerMock.verifyAll();
}

From source file:com.ibm.g11n.pipeline.example.CSVFilter.java

@Override
public void write(OutputStream outStream, LanguageBundle languageBundle, FilterOptions options)
        throws IOException, ResourceFilterException {
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outStream, StandardCharsets.UTF_8));
    CSVPrinter printer = CSVFormat.RFC4180.withHeader("key", "value").print(writer);
    for (ResourceString resString : languageBundle.getSortedResourceStrings()) {
        printer.printRecord(resString.getKey(), resString.getValue());
    }/*from   ww w  .j  ava 2 s .  c  o  m*/
    printer.flush();
}