Example usage for org.apache.commons.csv CSVParser CSVParser

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser CSVParser.

Prototype

public CSVParser(final Reader reader, final CSVFormat format) throws IOException

Source Link

Document

Customized CSV parser using the given CSVFormat

If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .

Usage

From source file:org.apache.solr.handler.CSVRequestHandler.java

/** load the CSV input */
@Override/*from w w w. j  a va  2 s.  c o m*/
public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws IOException {
    errHeader = "CSVLoader: input=" + stream.getSourceInfo();
    Reader reader = null;
    try {
        reader = stream.getReader();
        if (skipLines > 0) {
            if (!(reader instanceof BufferedReader)) {
                reader = new BufferedReader(reader);
            }
            BufferedReader r = (BufferedReader) reader;
            for (int i = 0; i < skipLines; i++) {
                r.readLine();
            }
        }

        CSVParser parser = new CSVParser(reader, strategy);

        // parse the fieldnames from the header of the file
        if (fieldnames == null) {
            fieldnames = parser.getLine();
            if (fieldnames == null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                        "Expected fieldnames in CSV input");
            }
            prepareFields();
        }

        // read the rest of the CSV file
        for (;;) {
            int line = parser.getLineNumber(); // for error reporting in MT mode
            String[] vals = null;
            try {
                vals = parser.getLine();
            } catch (IOException e) {
                //Catch the exception and rethrow it with more line information
                input_err("can't read line: " + line, null, line, e);
            }
            if (vals == null)
                break;

            if (vals.length != fields.length) {
                input_err("expected " + fields.length + " values but got " + vals.length, vals, line);
            }

            addDoc(line, vals);
        }
    } finally {
        if (reader != null) {
            IOUtils.closeQuietly(reader);
        }
    }
}

From source file:org.apache.tika.parser.isatab.ISATabAssayParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // Automatically detect the character encoding
    AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata,
            context.get(ServiceLoader.class, LOADER));
    CSVParser csvParser = null;//from  w  w  w . ja  v  a2 s  .c  o m

    try {
        Charset charset = reader.getCharset();
        MediaType type = new MediaType(MediaType.application("x-isatab-assay"), charset);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());
        metadata.set(Metadata.CONTENT_ENCODING, charset.name());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.newline();

        csvParser = new CSVParser(reader, CSVFormat.TDF);

        xhtml.startDocument();
        xhtml.newline();

        xhtml.startElement("table");
        xhtml.newline();

        List<CSVRecord> records = csvParser.getRecords();

        for (int i = 0; i < records.get(0).size(); i++) {
            xhtml.startElement("th");
            xhtml.characters(records.get(0).get(i));
            xhtml.endElement("th");
            xhtml.newline();
        }

        for (int i = 1; i < records.size(); i++) {
            xhtml.startElement("tr");
            xhtml.newline();

            for (int j = 0; j < records.get(i).size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(records.get(i).get(j));
                xhtml.endElement("td");
                xhtml.newline();
            }

            xhtml.endElement("tr");
            xhtml.newline();
        }

        xhtml.endElement("table");
        xhtml.newline();

        xhtml.endDocument();

    } finally {
        reader.close();
        csvParser.close();
    }
}

From source file:org.apache.tika.parser.isatab.ISATabStudyParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // Automatically detect the character encoding
    AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata,
            context.get(ServiceLoader.class, LOADER));
    CSVParser csvParser = null;/*w w  w  . j  a v a2 s. co  m*/

    try {
        Charset charset = reader.getCharset();
        MediaType type = new MediaType(MediaType.application("x-isatab-study"), charset);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());
        metadata.set(Metadata.CONTENT_ENCODING, charset.name());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.newline();

        csvParser = new CSVParser(reader, CSVFormat.TDF);

        xhtml.startDocument();
        xhtml.newline();

        xhtml.startElement("table");
        xhtml.newline();

        List<CSVRecord> records = csvParser.getRecords();

        for (int i = 0; i < records.get(0).size(); i++) {
            xhtml.startElement("th");
            xhtml.characters(records.get(0).get(i));
            xhtml.endElement("th");
            xhtml.newline();
        }

        for (int i = 1; i < records.get(0).size(); i++) {
            xhtml.startElement("tr");
            xhtml.newline();

            for (int j = 0; j < records.get(i).size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(records.get(i).get(j));
                xhtml.endElement("td");
                xhtml.newline();
            }

            xhtml.endElement("tr");
            xhtml.newline();
        }

        xhtml.endElement("table");
        xhtml.newline();

        xhtml.endDocument();

    } finally {
        reader.close();
        csvParser.close();
    }
}

From source file:org.apache.tika.parser.isatab.ISATabUtils.java

public static void parseStudy(InputStream stream, XHTMLContentHandler xhtml, Metadata metadata,
        ParseContext context) throws IOException, TikaException, SAXException {
    TikaInputStream tis = TikaInputStream.get(stream);
    // Automatically detect the character encoding
    TikaConfig tikaConfig = context.get(TikaConfig.class);
    if (tikaConfig == null) {
        tikaConfig = TikaConfig.getDefaultConfig();
    }//from ww w  .j  a  va2 s.co  m
    try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(tis), metadata,
            tikaConfig.getEncodingDetector()); CSVParser csvParser = new CSVParser(reader, CSVFormat.TDF)) {
        Iterator<CSVRecord> iterator = csvParser.iterator();

        xhtml.startElement("table");

        xhtml.startElement("thead");
        if (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            for (int i = 0; i < record.size(); i++) {
                xhtml.startElement("th");
                xhtml.characters(record.get(i));
                xhtml.endElement("th");
            }
        }
        xhtml.endElement("thead");

        xhtml.startElement("tbody");
        while (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            xhtml.startElement("tr");
            for (int j = 0; j < record.size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(record.get(j));
                xhtml.endElement("td");
            }
            xhtml.endElement("tr");
        }
        xhtml.endElement("tbody");

        xhtml.endElement("table");
    }
}

From source file:org.apache.tika.parser.isatab.ISATabUtils.java

public static void parseAssay(InputStream stream, XHTMLContentHandler xhtml, Metadata metadata,
        ParseContext context) throws IOException, TikaException, SAXException {
    TikaInputStream tis = TikaInputStream.get(stream);

    // Automatically detect the character encoding

    TikaConfig tikaConfig = context.get(TikaConfig.class);
    if (tikaConfig == null) {
        tikaConfig = TikaConfig.getDefaultConfig();
    }//from   w w  w .j  ava 2 s  . c  o m
    try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(tis), metadata,
            tikaConfig.getEncodingDetector()); CSVParser csvParser = new CSVParser(reader, CSVFormat.TDF)) {
        xhtml.startElement("table");

        Iterator<CSVRecord> iterator = csvParser.iterator();

        xhtml.startElement("thead");
        if (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            for (int i = 0; i < record.size(); i++) {
                xhtml.startElement("th");
                xhtml.characters(record.get(i));
                xhtml.endElement("th");
            }
        }
        xhtml.endElement("thead");

        xhtml.startElement("tbody");
        while (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            xhtml.startElement("tr");
            for (int j = 0; j < record.size(); j++) {
                xhtml.startElement("td");
                xhtml.characters(record.get(j));
                xhtml.endElement("td");
            }
            xhtml.endElement("tr");
        }
        xhtml.endElement("tbody");

        xhtml.endElement("table");
    }
}

From source file:org.apache.tika.parser.isatab.ISATabUtils.java

private static void extractMetadata(Reader reader, Metadata metadata, String studyFileName) throws IOException {
    boolean investigationSection = false;
    boolean studySection = false;
    boolean studyTarget = false;

    Map<String, String> map = new HashMap<String, String>();

    try (CSVParser csvParser = new CSVParser(reader, CSVFormat.TDF)) {
        Iterator<CSVRecord> iterator = csvParser.iterator();

        while (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            String field = record.get(0);
            if ((field.toUpperCase(Locale.ENGLISH).equals(field)) && (record.size() == 1)) {
                investigationSection = Arrays.asList(sections).contains(field);
                studySection = (studyFileName != null) && (field.equals(studySectionField));
            } else {
                if (investigationSection) {
                    addMetadata(field, record, metadata);
                } else if (studySection) {
                    if (studyTarget) {
                        break;
                    }/*from  ww w  . j a va2  s.  com*/
                    String value = record.get(1);
                    map.put(field, value);
                    studyTarget = (field.equals(studyFileNameField)) && (value.equals(studyFileName));
                    if (studyTarget) {
                        mapStudyToMetadata(map, metadata);
                        studySection = false;
                    }
                } else if (studyTarget) {
                    addMetadata(field, record, metadata);
                }
            }
        }
    } catch (IOException ioe) {
        throw ioe;
    }
}

From source file:org.chanthing.csvtool.CSVTrans.java

public static void main(String[] args) throws IOException {
    CSVParser reader = null;//from w  w w . ja v a 2 s  .  c  o m
    CSVPrinter writer = null;
    CSVXform xFormer = null;

    if (args.length < 2) {
        System.out.println("Usage: java CSVTrans <src_csv_file> <dest_csv_file>");
        return;
    }

    try {
        reader = new CSVParser(new FileReader(args[0]), srcFormat);
        writer = new CSVPrinter(new FileWriter(args[1]), destFormat);
        xFormer = new CSVXform(destHeaders.size(), xforms);

        writer.printRecord(destHeaders); // Write out headers to destination file

        /*
         *  For each record in the input file
         */
        for (CSVRecord record : reader) {
            List<String> destRecord = xFormer.xform(record);
            writer.printRecord(destRecord);
        }

    } finally {
        if (reader != null) {
            reader.close();
        }
        if (writer != null) {
            writer.close();
        }
    }
}

From source file:org.easybatch.extensions.apache.common.csv.ApacheCommonCsvBatchReaderTest.java

@Before
public void setUp() throws Exception {
    CSVFormat csvFormat = CSVFormat.DEFAULT;
    CSVParser parser = new CSVParser(new FileReader(this.getClass().getResource("/tweets.csv").getFile()),
            csvFormat);//from   www .jav a  2  s.co m
    apacheCommonCsvBatchReader = new ApacheCommonCsvBatchReader(parser, BATCH_SIZE);
}

From source file:org.easybatch.extensions.apache.common.csv.ApacheCommonCsvRecordMapperTest.java

private ApacheCommonCsvRecord getApacheCommonCsvRecord(StringReader stringReader, CSVFormat csvFormat)
        throws IOException {
    CSVParser parser = new CSVParser(stringReader, csvFormat);
    CSVRecord csvRecord = parser.iterator().next();
    return new ApacheCommonCsvRecord(header, csvRecord);
}

From source file:org.easybatch.extensions.apache.common.csv.ApacheCommonCsvRecordReaderTest.java

@Before
public void setUp() throws Exception {
    StringReader stringReader = new StringReader("foo,bar,15,true");
    CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader("firstName", "lastName", "age", "married");
    CSVParser parser = new CSVParser(stringReader, csvFormat);
    recordReader = new ApacheCommonCsvRecordReader(parser);
    recordReader.open();//ww  w. j  ava2 s  .  c  o m
}