List of usage examples for org.apache.commons.csv CSVParser CSVParser
public CSVParser(final Reader reader, final CSVFormat format) throws IOException
If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .
From source file:org.apache.solr.handler.CSVRequestHandler.java
/** load the CSV input */ @Override/*from w w w. j a va 2 s. c o m*/ public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws IOException { errHeader = "CSVLoader: input=" + stream.getSourceInfo(); Reader reader = null; try { reader = stream.getReader(); if (skipLines > 0) { if (!(reader instanceof BufferedReader)) { reader = new BufferedReader(reader); } BufferedReader r = (BufferedReader) reader; for (int i = 0; i < skipLines; i++) { r.readLine(); } } CSVParser parser = new CSVParser(reader, strategy); // parse the fieldnames from the header of the file if (fieldnames == null) { fieldnames = parser.getLine(); if (fieldnames == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Expected fieldnames in CSV input"); } prepareFields(); } // read the rest of the CSV file for (;;) { int line = parser.getLineNumber(); // for error reporting in MT mode String[] vals = null; try { vals = parser.getLine(); } catch (IOException e) { //Catch the exception and rethrow it with more line information input_err("can't read line: " + line, null, line, e); } if (vals == null) break; if (vals.length != fields.length) { input_err("expected " + fields.length + " values but got " + vals.length, vals, line); } addDoc(line, vals); } } finally { if (reader != null) { IOUtils.closeQuietly(reader); } } }
From source file:org.apache.tika.parser.isatab.ISATabAssayParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Automatically detect the character encoding AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER)); CSVParser csvParser = null;//from w w w . ja v a2 s .c o m try { Charset charset = reader.getCharset(); MediaType type = new MediaType(MediaType.application("x-isatab-assay"), charset); metadata.set(Metadata.CONTENT_TYPE, type.toString()); metadata.set(Metadata.CONTENT_ENCODING, charset.name()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.newline(); csvParser = new CSVParser(reader, CSVFormat.TDF); xhtml.startDocument(); xhtml.newline(); xhtml.startElement("table"); xhtml.newline(); List<CSVRecord> records = csvParser.getRecords(); for (int i = 0; i < records.get(0).size(); i++) { xhtml.startElement("th"); xhtml.characters(records.get(0).get(i)); xhtml.endElement("th"); xhtml.newline(); } for (int i = 1; i < records.size(); i++) { xhtml.startElement("tr"); xhtml.newline(); for (int j = 0; j < records.get(i).size(); j++) { xhtml.startElement("td"); xhtml.characters(records.get(i).get(j)); xhtml.endElement("td"); xhtml.newline(); } xhtml.endElement("tr"); xhtml.newline(); } xhtml.endElement("table"); xhtml.newline(); xhtml.endDocument(); } finally { reader.close(); csvParser.close(); } }
From source file:org.apache.tika.parser.isatab.ISATabStudyParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Automatically detect the character encoding AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER)); CSVParser csvParser = null;/*w w w . j a v a2 s. co m*/ try { Charset charset = reader.getCharset(); MediaType type = new MediaType(MediaType.application("x-isatab-study"), charset); metadata.set(Metadata.CONTENT_TYPE, type.toString()); metadata.set(Metadata.CONTENT_ENCODING, charset.name()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.newline(); csvParser = new CSVParser(reader, CSVFormat.TDF); xhtml.startDocument(); xhtml.newline(); xhtml.startElement("table"); xhtml.newline(); List<CSVRecord> records = csvParser.getRecords(); for (int i = 0; i < records.get(0).size(); i++) { xhtml.startElement("th"); xhtml.characters(records.get(0).get(i)); xhtml.endElement("th"); xhtml.newline(); } for (int i = 1; i < records.get(0).size(); i++) { xhtml.startElement("tr"); xhtml.newline(); for (int j = 0; j < records.get(i).size(); j++) { xhtml.startElement("td"); xhtml.characters(records.get(i).get(j)); xhtml.endElement("td"); xhtml.newline(); } xhtml.endElement("tr"); xhtml.newline(); } xhtml.endElement("table"); xhtml.newline(); xhtml.endDocument(); } finally { reader.close(); csvParser.close(); } }
From source file:org.apache.tika.parser.isatab.ISATabUtils.java
public static void parseStudy(InputStream stream, XHTMLContentHandler xhtml, Metadata metadata, ParseContext context) throws IOException, TikaException, SAXException { TikaInputStream tis = TikaInputStream.get(stream); // Automatically detect the character encoding TikaConfig tikaConfig = context.get(TikaConfig.class); if (tikaConfig == null) { tikaConfig = TikaConfig.getDefaultConfig(); }//from ww w .j a va2 s.co m try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(tis), metadata, tikaConfig.getEncodingDetector()); CSVParser csvParser = new CSVParser(reader, CSVFormat.TDF)) { Iterator<CSVRecord> iterator = csvParser.iterator(); xhtml.startElement("table"); xhtml.startElement("thead"); if (iterator.hasNext()) { CSVRecord record = iterator.next(); for (int i = 0; i < record.size(); i++) { xhtml.startElement("th"); xhtml.characters(record.get(i)); xhtml.endElement("th"); } } xhtml.endElement("thead"); xhtml.startElement("tbody"); while (iterator.hasNext()) { CSVRecord record = iterator.next(); xhtml.startElement("tr"); for (int j = 0; j < record.size(); j++) { xhtml.startElement("td"); xhtml.characters(record.get(j)); xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("tbody"); xhtml.endElement("table"); } }
From source file:org.apache.tika.parser.isatab.ISATabUtils.java
public static void parseAssay(InputStream stream, XHTMLContentHandler xhtml, Metadata metadata, ParseContext context) throws IOException, TikaException, SAXException { TikaInputStream tis = TikaInputStream.get(stream); // Automatically detect the character encoding TikaConfig tikaConfig = context.get(TikaConfig.class); if (tikaConfig == null) { tikaConfig = TikaConfig.getDefaultConfig(); }//from w w w .j ava 2 s . c o m try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(tis), metadata, tikaConfig.getEncodingDetector()); CSVParser csvParser = new CSVParser(reader, CSVFormat.TDF)) { xhtml.startElement("table"); Iterator<CSVRecord> iterator = csvParser.iterator(); xhtml.startElement("thead"); if (iterator.hasNext()) { CSVRecord record = iterator.next(); for (int i = 0; i < record.size(); i++) { xhtml.startElement("th"); xhtml.characters(record.get(i)); xhtml.endElement("th"); } } xhtml.endElement("thead"); xhtml.startElement("tbody"); while (iterator.hasNext()) { CSVRecord record = iterator.next(); xhtml.startElement("tr"); for (int j = 0; j < record.size(); j++) { xhtml.startElement("td"); xhtml.characters(record.get(j)); xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("tbody"); xhtml.endElement("table"); } }
From source file:org.apache.tika.parser.isatab.ISATabUtils.java
private static void extractMetadata(Reader reader, Metadata metadata, String studyFileName) throws IOException { boolean investigationSection = false; boolean studySection = false; boolean studyTarget = false; Map<String, String> map = new HashMap<String, String>(); try (CSVParser csvParser = new CSVParser(reader, CSVFormat.TDF)) { Iterator<CSVRecord> iterator = csvParser.iterator(); while (iterator.hasNext()) { CSVRecord record = iterator.next(); String field = record.get(0); if ((field.toUpperCase(Locale.ENGLISH).equals(field)) && (record.size() == 1)) { investigationSection = Arrays.asList(sections).contains(field); studySection = (studyFileName != null) && (field.equals(studySectionField)); } else { if (investigationSection) { addMetadata(field, record, metadata); } else if (studySection) { if (studyTarget) { break; }/*from ww w . j a va2 s. com*/ String value = record.get(1); map.put(field, value); studyTarget = (field.equals(studyFileNameField)) && (value.equals(studyFileName)); if (studyTarget) { mapStudyToMetadata(map, metadata); studySection = false; } } else if (studyTarget) { addMetadata(field, record, metadata); } } } } catch (IOException ioe) { throw ioe; } }
From source file:org.chanthing.csvtool.CSVTrans.java
public static void main(String[] args) throws IOException { CSVParser reader = null;//from w w w . ja v a 2 s . c o m CSVPrinter writer = null; CSVXform xFormer = null; if (args.length < 2) { System.out.println("Usage: java CSVTrans <src_csv_file> <dest_csv_file>"); return; } try { reader = new CSVParser(new FileReader(args[0]), srcFormat); writer = new CSVPrinter(new FileWriter(args[1]), destFormat); xFormer = new CSVXform(destHeaders.size(), xforms); writer.printRecord(destHeaders); // Write out headers to destination file /* * For each record in the input file */ for (CSVRecord record : reader) { List<String> destRecord = xFormer.xform(record); writer.printRecord(destRecord); } } finally { if (reader != null) { reader.close(); } if (writer != null) { writer.close(); } } }
From source file:org.easybatch.extensions.apache.common.csv.ApacheCommonCsvBatchReaderTest.java
@Before public void setUp() throws Exception { CSVFormat csvFormat = CSVFormat.DEFAULT; CSVParser parser = new CSVParser(new FileReader(this.getClass().getResource("/tweets.csv").getFile()), csvFormat);//from www .jav a 2 s.co m apacheCommonCsvBatchReader = new ApacheCommonCsvBatchReader(parser, BATCH_SIZE); }
From source file:org.easybatch.extensions.apache.common.csv.ApacheCommonCsvRecordMapperTest.java
private ApacheCommonCsvRecord getApacheCommonCsvRecord(StringReader stringReader, CSVFormat csvFormat) throws IOException { CSVParser parser = new CSVParser(stringReader, csvFormat); CSVRecord csvRecord = parser.iterator().next(); return new ApacheCommonCsvRecord(header, csvRecord); }
From source file:org.easybatch.extensions.apache.common.csv.ApacheCommonCsvRecordReaderTest.java
@Before public void setUp() throws Exception { StringReader stringReader = new StringReader("foo,bar,15,true"); CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader("firstName", "lastName", "age", "married"); CSVParser parser = new CSVParser(stringReader, csvFormat); recordReader = new ApacheCommonCsvRecordReader(parser); recordReader.open();//ww w. j ava2 s . c o m }