Example usage for org.apache.commons.csv CSVParser CSVParser

List of usage examples for org.apache.commons.csv CSVParser CSVParser

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser CSVParser.

Prototype

public CSVParser(final Reader reader, final CSVFormat format) throws IOException 

Source Link

Document

Customized CSV parser using the given CSVFormat

If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .

Usage

From source file:org.alfresco.repo.web.scripts.person.UserCSVUploadPost.java

protected void processCSVUpload(InputStream input, List<Map<QName, String>> users) throws IOException {
    InputStreamReader reader = new InputStreamReader(input, Charset.forName("UTF-8"));
    CSVParser csv = new CSVParser(reader, CSVStrategy.EXCEL_STRATEGY);
    String[][] data = csv.getAllValues();
    if (data != null && data.length > 0) {
        processSpreadsheetUpload(data, users);
    }//  w ww . java 2s .  c  om
}

From source file:org.andresoft.datasource.FileReformatTest.java

@Test
public void testReformatChicagoFoodInpectionCsv() throws IOException {
    // Inspection ID | DBA Name | AKA Name| License # | Facility Type| Risk| Address| City|
    // State| Zip| Inspection Date| Inspection Type| Results| Violations| Latitude| Longitude|
    // Location/* ww w.  j av a 2 s  . co  m*/
    System.setProperty("line.separator", "\n");
    Reader in = new FileReader("/Development/andresoft/hadoop_data/Food_Inspections_chicago.csv");
    File file = new File("/Development/andresoft/hadoop_data/Food_Inspections_chicago_nomalized-2.csv");
    FileWriter fw = new FileWriter(file);

    final CSVPrinter printer = CSVFormat.DEFAULT
            .withHeader("Inspection ID", "DBA Name", "AKA Name", "License #", "Facility Type", "Risk",
                    "Address", "City", "State", "Zip", "Inspection Date", "Inspection Type", "Results",
                    "Violation Number", "Violation", "Comments", "Latitude", "Longitude", "Loacation")
            .print(fw);

    final CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader());

    // Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
    for (CSVRecord record : parser) {
        String inspectionId = record.get("Inspection ID");
        String dbaName = record.get("DBA Name");
        String akaName = record.get("AKA Name");
        String licenseNum = record.get("License #");
        String facility = record.get("Facility Type");
        String risk = record.get("Risk");
        String address = record.get("Address");
        String city = record.get("City");
        String state = record.get("State");
        String zip = record.get("Zip");
        String inspectionDate = record.get("Inspection Date");
        String inspectionType = record.get("Inspection Type");
        String results = record.get("Results");
        String violations = record.get("Violations");
        String latitude = record.get("Latitude");
        String longitude = record.get("Longitude");
        String location = record.get("Location");

        String violationsArray[] = violations.split("\\|");
        for (String v : violationsArray) {
            String comments = "None";
            String violation = "None";
            String[] violationWihComment = v.split("Comments:");
            if (violationWihComment.length == 2) {
                violation = violationWihComment[0];
                comments = violationWihComment[1];
            } else {
                violation = violationWihComment[0];
            }
            if (!StringUtils.isBlank(violation)) {
                int violationNumberEndIndex = violation.indexOf('.');
                int viloationNumber = Integer.valueOf((violation.substring(0, violationNumberEndIndex)).trim());

                printer.printRecord(inspectionId, dbaName, akaName, licenseNum, facility, risk, address, city,
                        state, zip, inspectionDate, inspectionType, results, viloationNumber,
                        violation.substring(violationNumberEndIndex + 1), comments, latitude, longitude,
                        location);
            }
        }

    }
    printer.close();
    in.close();
}

From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java

@Test
public void testSerializeResources_NoColumnInfo() throws Exception {
    Result result = new ResultImpl(true);
    result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK));
    TreeNode<Resource> tree = result.getResultTree();

    List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() {
        {/*ww w  .jav a 2s .  co m*/
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1a");
                    put("property2", "value2a");
                    put("property3", "value3a");
                    put("property4", "value4a");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1'b");
                    put("property2", "value2'b");
                    put("property3", "value3'b");
                    put("property4", "value4'b");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1,c");
                    put("property2", "value2,c");
                    put("property3", "value3,c");
                    put("property4", "value4,c");
                }
            });
        }
    };

    tree.setName("items");
    tree.setProperty("isCollection", "true");

    addChildResource(tree, "resource", 0, data.get(0));
    addChildResource(tree, "resource", 1, data.get(1));
    addChildResource(tree, "resource", 2, data.get(2));

    replayAll();

    //execute test
    Object o = new CsvSerializer().serialize(result).toString().replace("\r", "");

    verifyAll();

    assertNotNull(o);

    StringReader reader = new StringReader(o.toString());
    CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
    List<CSVRecord> records = csvParser.getRecords();

    assertNotNull(records);
    assertEquals(3, records.size());

    int i = 0;
    for (CSVRecord record : records) {
        TreeMap<String, Object> actualData = data.get(i++);
        assertEquals(actualData.size(), record.size());

        for (String item : record) {
            assertTrue(actualData.containsValue(item));
        }
    }

    csvParser.close();
}

From source file:org.apache.any23.extractor.csv.CSVReaderBuilder.java

/**
 * Builds a not <code>null</code> {@link org.apache.commons.csv.CSVParser} guessing
 * from the provided <i>CSV</i> file.
 *
 * @param is {@link InputStream} of the <i>CSV</i> file where guess the configuration.
 * @return a {@link CSVParser}//from   ww  w  . ja va  2  s.c  om
 * @throws java.io.IOException
 */
public static CSVParser build(InputStream is) throws IOException {
    CSVStrategy bestStrategy = getBestStrategy(is);
    if (bestStrategy == null)
        bestStrategy = getCSVStrategyFromConfiguration();
    return new CSVParser(new InputStreamReader(is), bestStrategy);
}

From source file:org.apache.any23.extractor.csv.CSVReaderBuilder.java

/**
 * make sure the reader has correct delimiter and quotation set.
 * Check first lines and make sure they have the same amount of columns and at least 2
 *
 * @param is input stream to be checked//from www .  j  a  va2 s.  co m
 * @param strategy strategy to be verified.
 * @return
 * @throws IOException
 * @param is
 */
private static boolean testStrategy(InputStream is, CSVStrategy strategy) throws IOException {
    final int MIN_COLUMNS = 2;

    is.mark(Integer.MAX_VALUE);
    try {
        final CSVParser parser = new CSVParser(new InputStreamReader(is), strategy);
        int linesToCheck = 5;
        int headerColumnCount = -1;
        while (linesToCheck > 0) {
            String[] row;
            row = parser.getLine();
            if (row == null) {
                break;
            }
            if (row.length < MIN_COLUMNS) {
                return false;
            }
            if (headerColumnCount == -1) { // first row
                headerColumnCount = row.length;
            } else { // make sure rows have the same number of columns or one more than the header
                if (row.length < headerColumnCount) {
                    return false;
                } else if (row.length - 1 > headerColumnCount) {
                    return false;
                }
            }
            linesToCheck--;
        }
        return true;
    } finally {
        is.reset();
    }
}

From source file:org.apache.camel.dataformat.csv.CsvDataFormat.java

public Object unmarshal(Exchange exchange, InputStream inputStream) throws Exception {
    if (delimiter != null) {
        config.setDelimiter(delimiter.charAt(0));
    }/*from w ww. j  av  a  2s. co  m*/
    strategy.setDelimiter(config.getDelimiter());

    Reader reader = null;
    boolean error = false;
    try {
        reader = IOHelper.buffered(new InputStreamReader(inputStream, IOHelper.getCharsetName(exchange)));
        CSVParser parser = new CSVParser(reader, strategy);

        CsvLineConverter<?> lineConverter;
        if (useMaps) {
            lineConverter = CsvLineConverters.getMapLineConverter(parser.getLine());
        } else {
            lineConverter = CsvLineConverters.getListConverter();
            if (skipFirstLine) {
                // read one line ahead and skip it
                parser.getLine();
            }
        }

        @SuppressWarnings({ "unchecked", "rawtypes" })
        CsvIterator<?> csvIterator = new CsvIterator(parser, reader, lineConverter);
        return lazyLoad ? csvIterator : loadAllAsList(csvIterator);
    } catch (Exception e) {
        error = true;
        throw e;
    } finally {
        if (error) {
            IOHelper.close(reader);
        }
    }
}

From source file:org.apache.camel.dataformat.csv.CsvRecordConvertersTest.java

@Before
public void setUp() throws Exception {
    CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C");
    CSVParser parser = new CSVParser(new StringReader("1,2,3"), format);
    List<CSVRecord> records = parser.getRecords();
    record = records.get(0);//from w w w . j  a va  2s.co  m
}

From source file:org.apache.ctakes.jdl.data.loader.CsvLoader.java

/**
 * @param loader//from   ww  w.jav  a2 s.co  m
 *            the loader
 * @param file
 *            the file
 * @throws FileNotFoundException
 *             exception
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
public CsvLoader(final CsvLoadType loader, final File file) throws FileNotFoundException {
    InputStream inputStrem = new FileInputStream(file);
    Reader reader = new InputStreamReader(inputStrem);
    char delimiter = CharUtils.toChar(loader.getDelimiter());
    char encapsulator = (loader.getEncapsulator() == null || loader.getEncapsulator().length() == 0)
            ? CSVStrategy.ENCAPSULATOR_DISABLED
            : CharUtils.toChar(loader.getEncapsulator());
    log.info(String.format("delimiter %d encapsulator %d", (int) delimiter, (int) encapsulator));
    CSVStrategy strategy = new CSVStrategy(delimiter, encapsulator, CSVStrategy.COMMENTS_DISABLED,
            CSVStrategy.ESCAPE_DISABLED, true, true, false, true);
    parser = new CSVParser(reader, strategy);
    this.loader = loader;
    formatMap = new HashMap<String, Format>();
    try {
        for (Column col : loader.getColumn()) {
            if (col.getFormat() != null && col.getFormat().length() > 0) {
                Class cf = Class.forName(col.getFormat());
                Constructor ccf = cf.getConstructor(String.class);
                this.formatMap.put(col.getName(), (Format) ccf.newInstance(col.getPattern()));
            }
        }
    } catch (Exception e) {
        throw new RuntimeException("oops", e);
    }

}

From source file:org.apache.mahout.utils.vectors.csv.CSVVectorIterator.java

public CSVVectorIterator(Reader reader, CSVStrategy strategy) {
    parser = new CSVParser(reader, strategy);
}

From source file:org.apache.nifi.csv.CSVHeaderSchemaStrategy.java

@Override
public RecordSchema getSchema(Map<String, String> variables, final InputStream contentStream,
        final RecordSchema readSchema) throws SchemaNotFoundException {
    if (this.context == null) {
        throw new SchemaNotFoundException(
                "Schema Access Strategy intended only for validation purposes and cannot obtain schema");
    }//from  www . j a v  a2 s .com

    try {
        final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader();
        try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream));
                final CSVParser csvParser = new CSVParser(reader, csvFormat)) {

            final List<RecordField> fields = new ArrayList<>();
            for (final String columnName : csvParser.getHeaderMap().keySet()) {
                fields.add(new RecordField(columnName, RecordFieldType.STRING.getDataType(), true));
            }

            return new SimpleRecordSchema(fields);
        }
    } catch (final Exception e) {
        throw new SchemaNotFoundException("Failed to read Header line from CSV", e);
    }
}