Example usage for org.apache.commons.csv CSVParser CSVParser

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser CSVParser.

Prototype

public CSVParser(final Reader reader, final CSVFormat format) throws IOException

Source Link

Document

Customized CSV parser using the given CSVFormat

If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .

Usage

From source file:tr.edu.firat.ceng.aml.assignments.decisiontree.util.CSV2DatasetUtil.java

public Dataset convert(String resourceName) throws UnsupportedEncodingException, IOException {
    Reader reader = null;/*  ww w  .j a va2 s  .  co m*/
    try {
        List<Property> properties = new ArrayList<Property>();
        properties.add(new NumericPropertyImpl("sepal_length"));
        properties.add(new NumericPropertyImpl("sepal_width"));
        properties.add(new NumericPropertyImpl("petal_length"));
        properties.add(new NumericPropertyImpl("petal_width"));
        ClassProperty classProperty = new ClassPropertyImpl("class");
        final URL url = getClass().getResource(resourceName);
        reader = new InputStreamReader(url.openStream(), "UTF-8");
        CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT);
        for (CSVRecord record : parser) {
            for (int i = 0; i < properties.size(); i++) {
                Property get = properties.get(i);
                if (get instanceof NumericProperty) {
                    NumericProperty numericProperty = (NumericProperty) get;
                    numericProperty.getValues().add(new Double(record.get(i)));
                }
            }
            classProperty.getValues().add(record.get(properties.size()));
        }
        Dataset dataset = new DatasetImpl(classProperty);
        dataset.addProperties(properties);
        return dataset;
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (IOException ex) {
            System.out.println(ex);
        }
    }
}

From source file:uk.co.droidinactu.common.file.DelimitedFile.java

public ArrayList<String> convertLineToFields(String line) {
    // Log.w("DelimitedFile", "convertLineToFields converting line [" + line +
    // "] to fields");
    ArrayList<String> alFields = new ArrayList<String>();

    CSVStrategy csvStrat = new CSVStrategy(this.m_sDelimiter.charAt(0), '\"', '#');
    csvStrat.setIgnoreLeadingWhitespaces(true);
    CSVParser csvParser = new CSVParser(new StringReader(line), csvStrat);
    String[][] st;//from  w  ww .ja v  a2s  . co m
    try {
        st = csvParser.getAllValues();
        for (String[] element : st) {
            for (int tokenNbr = 0; tokenNbr < element.length; tokenNbr++) {
                alFields.add(element[tokenNbr]);
            }
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return alFields;
}

From source file:uk.trainwatch.ogl.naptan.loader.Importer.java

default void importFile(Connection con) throws IOException, SQLException {
    Path path = getPath();/*from  w w w  .j  ava  2 s. c o m*/
    getLog().log(Level.INFO, () -> "Importing " + path);
    try (Reader r = new BufferedReader(new FileReader(path.toFile()));
            CSVParser parser = new CSVParser(r, CSVFormat.RFC4180.withHeader())) {
        try (PreparedStatement ps = prepare(con)) {
            parser.getRecords().stream().map(getMapper()).forEach(insert(ps));
        }
    }
}

From source file:uk.trainwatch.osgb.codepoint.util.CodePointImport.java

private void importCode(Connection con, String table, Function<CSVRecord, String> code,
        Function<CSVRecord, String> name, File file) throws SQLException {

    try (CSVParser p = new CSVParser(new FileReader(file), CSVFormat.EXCEL)) {
        LOG.log(Level.INFO, () -> "Clearing down " + table);

        SQL.deleteIdTable(con, SCHEMA, table);
        try (Statement s = con.createStatement()) {
            s.executeUpdate("INSERT INTO " + SCHEMA + "." + table + " VALUES (0,'','')");
        }/*from  w  w  w.  j ava2 s .  c o  m*/

        try (PreparedStatement ps = con
                .prepareStatement("INSERT INTO " + SCHEMA + "." + table + " (code,name) VALUES(?,?)")) {

            p.getRecords().stream().collect(Collectors.toMap(code::apply, name::apply, (a, b) -> a))
                    .forEach(SQLBiConsumer.guard((c, n) -> SQL.executeUpdate(ps, c, n)));

            con.commit();

            LOG.log(Level.INFO, () -> "Imported " + p.getRecordNumber() + " entries into code table");
        }
    } catch (SQLException ex) {
        con.rollback();
        throw new UncheckedSQLException(ex);
    } catch (IOException ex) {
        throw new UncheckedIOException(ex);
    }
}

From source file:uk.trainwatch.osgb.codepoint.util.CodePointImport.java

private void importer(Connection con, Path path) throws SQLException {
    LOG.log(Level.INFO, () -> "Importing " + path);

    try {//  w w  w.jav  a2 s.c  om
        try (CSVParser parser = new CSVParser(new FileReader(path.toFile()), CSVFormat.DEFAULT)) {
            List<CSVRecord> records = parser.getRecords();

            // Do the import in one massive transaction
            con.setAutoCommit(false);

            try (PreparedStatement ps = con.prepareStatement(CP_SQL)) {
                records.stream()
                        .map(r -> new PostCode(r.get(0), Integer.parseInt(r.get(1)), Integer.parseInt(r.get(2)),
                                Integer.parseInt(r.get(3)), r.get(4), r.get(5), r.get(6), r.get(7), r.get(8),
                                r.get(9)))
                        .forEach(SQLConsumer.guard(pc -> {
                            SQL.executeUpdate(ps, pc.getPostCode(), pc.getPqi(), pc.getEastings(),
                                    pc.getNorthings(), codeLookup.getOrDefault(pc.getCountry(), 0),
                                    codeLookup.getOrDefault(pc.getCounty(), 0),
                                    codeLookup.getOrDefault(pc.getDistrict(), 0),
                                    codeLookup.getOrDefault(pc.getWard(), 0),
                                    nhsLookup.getOrDefault(pc.getNhsRegion(), 0),
                                    nhsLookup.getOrDefault(pc.getNhs(), 0));
                        }));
            }

            con.commit();

            int parseCount = records.size();
            lineCount += parseCount;
            LOG.log(Level.INFO, () -> "Parsed " + parseCount);
        }

    } catch (IOException ex) {
        con.rollback();
        LOG.log(Level.SEVERE, null, ex);
        throw new UncheckedIOException(ex);
    } catch (UncheckedSQLException ex) {
        con.rollback();
        LOG.log(Level.SEVERE, null, ex);
        throw ex.getCause();
    } catch (Exception ex) {
        con.rollback();
        LOG.log(Level.SEVERE, null, ex);
        throw new RuntimeException(ex);
    }
}

From source file:umich.ms.batmass.filesupport.files.types.xcms.peaks.model.XCMSPeaks.java

/**
 * Parse XCMS peaks from the file, which you can create from R after running
 * XCMS feature finding. <br/>//from  ww  w.j  a  v a  2  s  .c  o m
 * Example:<br/>
 * {@code xs <- xcmsSet(files = file_mzml, method = "massifquant", prefilter = c(1, 10000), peakwidth = c(5, 500), ppm = 20, criticalValue = 1.0, consecMissedLimit = 0, withWave = 0, nSlaves=4)} <br/>
 * {@code peaks <- ixs@peaks[1:7108,1:9]} <br/>
 * {@code write.table(peaks, sep = "\t",file = "D:/projects/XCMS/peaks.xcms.csv")}
 * @param path
 * @return
 */
public static XCMSPeaks create(Path path) throws IOException {
    if (!Files.exists(path))
        throw new IllegalArgumentException("File path for XCMS peaks does not exist.");

    XCMSPeaks peaks = new XCMSPeaks();
    BufferedReader reader = new BufferedReader(new FileReader(path.toFile()));
    String[] header = {};
    CSVFormat format = CSVFormat.newFormat(',').withHeader().withIgnoreSurroundingSpaces()
            .withAllowMissingColumnNames().withQuoteMode(QuoteMode.NON_NUMERIC).withQuote('"');
    CSVParser parser = new CSVParser(reader, format);
    String val;
    for (final CSVRecord r : parser) {
        XCMSPeak p = new XCMSPeak();
        val = r.get(0);
        p.setRowNum(Integer.parseInt(val));
        val = r.get("mz");
        p.setMz(Double.parseDouble(val));
        val = r.get("mzmin");
        p.setMzMin(Double.parseDouble(val));
        val = r.get("mzmax");
        p.setMzMax(Double.parseDouble(val));
        val = r.get("rt");
        p.setRt(Double.parseDouble(val));
        val = r.get("rtmin");
        p.setRtMin(Double.parseDouble(val));
        val = r.get("rtmax");
        p.setRtMax(Double.parseDouble(val));
        val = r.get("into");
        p.setInto(Double.parseDouble(val));
        val = r.get("maxo");
        p.setMaxo(Double.parseDouble(val));
        val = r.get("sample");
        p.setSample(val);

        // these are optional and are only added by further R package 
        // called 'CAMERA' processing
        try {
            val = getRecordValueForColName(r, "isotopes");
            p.setIsotopes(val);
        } catch (IllegalArgumentException e) {
        }
        try {
            val = r.get("adduct");
            p.setAdduct(val);
        } catch (IllegalArgumentException e) {
            p.setAdduct("");
        }
        try {
            val = r.get("pcgroup");
            p.setPcgroup(Integer.parseInt(val));
        } catch (IllegalArgumentException e) {
        }

        peaks.add(p);
    }

    return peaks;
}

From source file:us.parr.animl.data.DataTable.java

public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride,
        String[] colNamesOverride, boolean hasHeaderRow) {
    try {/*w  w  w .jav  a  2  s . com*/
        // use apache commons io + csv to load but convert to list of String[]
        // byte-order markers are handled if present at start of file.
        FileInputStream fis = new FileInputStream(fileName);
        final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
        CSVFormat format;
        if (formatType == null) {
            format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
        } else {
            switch (formatType.toLowerCase()) {
            case "tsv":
                format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF;
                break;
            case "mysql":
                format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL;
                break;
            case "excel":
                format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL;
                break;
            case "rfc4180":
            default:
                format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
                break;
            }
        }
        final CSVParser parser = new CSVParser(reader, format);
        List<String[]> rows = new ArrayList<>();
        int numHeaderNames = parser.getHeaderMap().size();
        try {
            for (final CSVRecord record : parser) {
                String[] row = new String[record.size()];
                for (int j = 0; j < record.size(); j++) {
                    row[j] = record.get(j);
                }
                rows.add(row);
            }
        } finally {
            parser.close();
            reader.close();
        }

        VariableType[] actualTypes = computeColTypes(rows, numHeaderNames);

        Set<String> colNameSet = parser.getHeaderMap().keySet();
        String[] colNames = colNameSet.toArray(new String[colNameSet.size()]);
        if (colNamesOverride != null) {
            colNames = colNamesOverride;
        }
        if (colTypesOverride != null) {
            actualTypes = colTypesOverride;
        }
        return fromStrings(rows, actualTypes, colNames, false);
    } catch (Exception e) {
        throw new IllegalArgumentException("Can't open and/or read " + fileName, e);
    }
}

From source file:Utils.CVEUtils.java

/**
 * Find the CVE in the CSV file and return the relevant parts
 *
 * @param cveid//from ww w  . j av  a  2 s  .c  o m
 * @return String[] with format { cveid, cvss_risk, summary } - If no cve
 * exits then this returns null
 */
public String[] getCVE(String cveid) {
    String[] cve = new String[3];

    // get the id from the cveid
    CSVFormat format = CSVFormat.DEFAULT.withDelimiter(',');
    try {
        CSVParser parser = new CSVParser(new FileReader(csvfile), format);
        for (CSVRecord record : parser) {

            String thiscve = record.get(0);
            if (thiscve.equalsIgnoreCase(cveid)) {
                // we have found our cve, get all the details and return
                cve[0] = record.get(0);
                cve[1] = record.get(1);
                cve[2] = record.get(2);
                return cve;
            }
        }

    } catch (IOException ex) {
        ex.printStackTrace();
    }

    // If we get here then there was no vuln with that ID, return null.
    return null;
}

From source file:xc.mst.services.normalization.NormalizationService.java

protected void setupOrganizationCodeProperties() throws ServiceValidationException {
    int num001Properties = 0;
    boolean valid = true;
    try {/*  ww w .j ava2s .c o  m*/
        num001Properties = Integer.parseInt(enabledSteps.getProperty("001Config.NumberOfRows"));
    } catch (NumberFormatException nfe) {
        valid = false;
    }
    if (num001Properties < 1)
        valid = false;
    if (!valid) {
        throw new ServiceValidationException(
                "Service configuration file Organization Code error: 001Config.NumberOfRows must be an integer value greater than zero.");
    }
    orgCodeProperties001 = new HashMap<String, HashMap<String, String>>(num001Properties);
    // # 001Config.<row>=<Source_repository_URL>,<Supply_001_for_Bib>,<Check_for_Holdings_001>
    HashMap<String, String> uniqueURLs = new HashMap<String, String>(num001Properties);
    for (int i = 1; i <= num001Properties; i++) {
        String row = enabledSteps.getProperty("001Config." + i);
        CSVParser csv = new CSVParser(new StringReader(row), CSVStrategy.EXCEL_STRATEGY);
        try {
            String values[] = csv.getLine();
            if (values.length != 3) {
                throw new ServiceValidationException(
                        "Service configuration file Organization Code error: Couldn't parse 001Config row: expecting 3 values separated by commas.");
            }

            if (uniqueURLs.containsKey(values[0])) {
                throw new ServiceValidationException(
                        "Service configuration file Organization Code error: <Source_repository_URLs> must all be unique.");
            }
            uniqueURLs.put(values[0], values[0]);

            HashMap<String, String> hm = new HashMap<String, String>(2);

            String[] yn = { "Y", "N" };
            ValidateSOCConfig(true, "Supply_001_for_Bib", values[1], yn);
            hm.put("Supply_001_for_Bib", values[1]);

            ValidateSOCConfig(true, "Check_for_Holdings_001", values[2], yn);
            hm.put("Check_for_Holdings_001", values[2]);

            orgCodeProperties001.put(values[0], hm);
        } catch (IOException e) {
            throw new ServiceValidationException(
                    "Service configuration file Organization Code error: Couldn't parse 001Config row: "
                            + e.getMessage());
        }
    }

    int num003Properties = 0;
    try {
        num003Properties = Integer.parseInt(enabledSteps.getProperty("003Config.NumberOfRows"));
    } catch (NumberFormatException nfe) {
        valid = false;
    }
    if (num003Properties < 1)
        valid = false;
    if (!valid) {
        throw new ServiceValidationException(
                "Service configuration file Organization Code error: 003Config.NumberOfRows must be an integer value greater than zero.");
    }
    orgCodeProperties003 = new HashMap<String, HashMap<String, String>>(num003Properties);
    // # 003Config.<row>=<Source repository_URL>,<MARC_Org_Code>,<Check_Bib_003>,<Overwrite_Bib_003>,<Check_Holdings_003>,<Overwrite_Holdings_003>
    uniqueURLs = new HashMap<String, String>(num003Properties);
    for (int i = 1; i <= num003Properties; i++) {
        String row = enabledSteps.getProperty("003Config." + i);
        CSVParser csv = new CSVParser(new StringReader(row), CSVStrategy.EXCEL_STRATEGY);
        try {
            String values[] = csv.getLine();
            if (values.length != 6) {
                throw new ServiceValidationException(
                        "Service configuration file Organization Code error: Couldn't parse 003Config row: expecting 6 values separated by commas.");
            }

            if (uniqueURLs.containsKey(values[0])) {
                throw new ServiceValidationException(
                        "Service configuration file Organization Code error: <Source_repository_URLs> must all be unique.");
            }
            uniqueURLs.put(values[0], values[0]);

            HashMap<String, String> hm = new HashMap<String, String>(5);

            if (values[1].length() < 1)
                ValidateSOCConfig(false, "MARC_Org_Code", null, null);
            hm.put("MARC_Org_Code", values[1]);

            String[] ynm0 = { "Y", "N", "M", "0" };
            ValidateSOCConfig(false, "Check_Bib_003", values[2], ynm0);
            hm.put("Check_Bib_003", values[2]);

            String[] yn = { "Y", "N" };
            ValidateSOCConfig(false, "Overwrite_Bib_003", values[3], yn);
            hm.put("Overwrite_Bib_003", values[3]);

            ValidateSOCConfig(false, "Check_Holdings_003", values[4], ynm0);
            hm.put("Check_Holdings_003", values[4]);

            ValidateSOCConfig(false, "Overwrite_Holdings_003", values[5], yn);
            hm.put("Overwrite_Holdings_003", values[5]);

            orgCodeProperties003.put(values[0], hm);
        } catch (IOException e) {
            throw new ServiceValidationException(
                    "Service configuration file Organization Code error: Couldn't parse 003Config row: "
                            + e.getMessage());
        }
    }

}