List of usage examples for org.apache.commons.csv CSVFormat RFC4180
CSVFormat RFC4180
To view the source code for org.apache.commons.csv CSVFormat RFC4180.
Click Source Link
From source file:de.upb.wdqa.wdvd.revisiontags.TagDownloader.java
/** * Reads the csv file of the TagDownloader *///from w ww. j a va 2s .c o m public static void readFile(File file) { try { logger.info("Starting to read file of TagDownloader ..."); BufferedReader reader = new BufferedReader(new InputStreamReader( new BZip2CompressorInputStream(new BufferedInputStream(new FileInputStream(file))), "UTF-8")); CSVParser parser = new CSVParser(reader, CSVFormat.RFC4180); dataStore.connect(); for (CSVRecord csvRecord : parser) { parseRecord(csvRecord); if (csvRecord.getRecordNumber() % 1000000 == 0) { logger.info("Current Record: " + csvRecord.getRecordNumber()); } } dataStore.disconnect(); parser.close(); logger.info("Tag Distribution:\n" + FrequencyUtils.formatFrequency(tagDistribution)); logger.info("Finished"); } catch (Exception e) { logger.error("", e); } }
From source file:com.ibm.g11n.pipeline.example.MultiBundleCSVFilter.java
@Override public Map<String, LanguageBundle> parse(InputStream inStream, FilterOptions options) throws IOException, ResourceFilterException { Map<String, LanguageBundleBuilder> builders = new HashMap<String, LanguageBundleBuilder>(); CSVParser parser = CSVParser.parse(inStream, StandardCharsets.UTF_8, CSVFormat.RFC4180.withHeader("module", "key", "value").withSkipHeaderRecord(true)); for (CSVRecord record : parser) { String bundle = record.get(0); String key = record.get(1); String value = record.get(2); LanguageBundleBuilder bundleBuilder = builders.get(bundle); if (bundleBuilder == null) { bundleBuilder = new LanguageBundleBuilder(true); builders.put(bundle, bundleBuilder); }//from w w w . ja v a 2 s . c o m bundleBuilder.addResourceString(key, value); } Map<String, LanguageBundle> result = new TreeMap<String, LanguageBundle>(); for (Entry<String, LanguageBundleBuilder> bundleEntry : builders.entrySet()) { String bundleName = bundleEntry.getKey(); LanguageBundle bundleData = bundleEntry.getValue().build(); result.put(bundleName, bundleData); } return result; }
From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSnifferTest.java
@Test public void sniffNoLine() throws Exception { CSVParser p = CSVFormat.RFC4180.parse(new StringReader("")); PowerMock.replayAll();/*from ww w. j av a 2s .co m*/ CSDSchema<CSDFieldPattern> s = this.sniffer.sniff(this.sp, p, 10); Assert.assertEquals(null, s); PowerMock.verifyAll(); }
From source file:com.github.jferard.pgloaderutils.loader.CSVCleanerFileReader.java
public CSVCleanerFileReader(CSVParser parser, CSVRecordCleaner recordCleaner) throws IOException { this.recordCleaner = recordCleaner; PipedWriter pipedWriter = new PipedWriter(); this.modifiedStreamReader = new PipedReader(pipedWriter, BUFFER_SIZE); this.parser = parser; this.printer = new CSVPrinter(pipedWriter, CSVFormat.RFC4180); this.logger = Logger.getLogger("Cleaner"); }
From source file:api.startup.PDFIndexer.java
/** * Indexes all the documents in the CSV file * @param writer - index writer//from w w w . ja v a 2s . c o m * @param indexConfiguration - the configuration for all the indexable documents * @throws IOException */ static void indexDocs(final IndexWriter writer, String indexConfiguration) throws IOException { Reader in = new FileReader(indexConfiguration); CSVParser parser = CSVFormat.RFC4180.withHeader().parse(in); List<Callable<Object>> tasks = new ArrayList<>(); int threadPoolSize = Runtime.getRuntime().availableProcessors(); log.info("Indexing with " + threadPoolSize + " processors"); ExecutorService pool = Executors.newFixedThreadPool(threadPoolSize); for (CSVRecord record : parser) { DocumentMetadata meta = new DocumentMetadata(record); tasks.add(() -> { indexDoc(writer, meta); return null; }); } try { pool.invokeAll(tasks); } catch (InterruptedException e) { log.error("Indexing was interrupted " + e.getMessage()); } }
From source file:net.javacrumbs.ccspring.common.CsvFileLogger.java
@Override public void addMessage(Message message) { try {/*from w ww. j a v a 2 s . c o m*/ String newRow = CSVFormat.RFC4180.format(message.getSeverity(), message.getMessage(), message.getTime()); Files.write(file.toPath(), singletonList(newRow), APPEND, CREATE); } catch (IOException e) { throw new IllegalStateException(e); } }
From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaValidatorTest.java
@Test public void sniffBadHeader1() throws Exception { CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6")); EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class))).andReturn(10); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class), EasyMock.eq(1))).andReturn(100); EasyMock.expect(this.vh.validateRecord(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.s), EasyMock.isA(CSVRecord.class), EasyMock.eq(2))).andReturn(1000); EasyMock.expect(this.s.hasOptionalHeader()).andReturn(false); PowerMock.replayAll();//from ww w . j a va 2 s .co m CSDValidationResult<CSDFieldPattern> r = this.validator.validate(this.s, p); Assert.assertEquals(0, r.errorCount()); PowerMock.verifyAll(); }
From source file:de.upb.wdqa.wdvd.labels.CorpusLabelReader.java
/** * Initializes the label reader./* w ww . j a v a 2 s . c o m*/ */ public void startReading() { try { BufferedReader csvReader = new BufferedReader(new InputStreamReader(labelsStream, "UTF-8"), BUFFER_SIZE); csvParser = new CSVParser(csvReader, CSVFormat.RFC4180.withHeader(FILE_HEADER)); iterator = csvParser.iterator(); CSVRecord headerRecord = iterator.next(); for (int i = 0; i < FILE_HEADER.length; i++) { if (!FILE_HEADER[i].equals(headerRecord.get(i))) { throw new IOException("The header of the CSV file is wrong."); } } } catch (IOException e) { logger.error("", e); finishReading(); } }
From source file:com.github.jferard.pgloaderutils.sniffer.csd.CSDSchemaSnifferTest.java
@Test public void sniffBadHeader1() throws Exception { CSVParser p = CSVFormat.RFC4180.parse(new StringReader("a,b,c\n1,2,3\n4,5,6")); EasyMock.expect(this.vh.validateHeader(EasyMock.isA(CSDValidationResult.class), EasyMock.eq(this.sp), EasyMock.isA(CSVRecord.class))).andReturn(10); EasyMock.expect(this.sp.hasOptionalHeader()).andReturn(false); PowerMock.replayAll();// ww w . j a v a 2 s . co m CSDSchema<CSDFieldPattern> s = this.sniffer.sniff(this.sp, p, 10); Assert.assertEquals(null, s); PowerMock.verifyAll(); }
From source file:com.ibm.g11n.pipeline.example.CSVFilter.java
@Override public void write(OutputStream outStream, LanguageBundle languageBundle, FilterOptions options) throws IOException, ResourceFilterException { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outStream, StandardCharsets.UTF_8)); CSVPrinter printer = CSVFormat.RFC4180.withHeader("key", "value").print(writer); for (ResourceString resString : languageBundle.getSortedResourceStrings()) { printer.printRecord(resString.getKey(), resString.getValue()); }/*from ww w .j ava 2 s . c o m*/ printer.flush(); }