Example usage for org.apache.commons.csv CSVParser CSVParser

List of usage examples for org.apache.commons.csv CSVParser CSVParser

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser CSVParser.

Prototype

public CSVParser(final Reader reader, final CSVFormat format) throws IOException 

Source Link

Document

Customized CSV parser using the given CSVFormat

If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .

Usage

From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java

/**
 * 1. Gets the file header or creates one.
 * 2. Sets the number of columns// ww w .  j  av a  2 s  .c om
 * 3. Checks the type of data stored in each variable using the first rowsToScan rows.
 *    Variables are integers by default. This method will change the data type to either
 *    double or string.
 *
 */
private void setDataTypes() {
    CSVParser parser = null;
    Reader reader = null;

    try {
        reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");

        //Get column names from variable attributes
        colNames = new String[variableAttributeMap.size()];
        int index = 0;
        Iterator<VariableName> iter = variableAttributeMap.keySet().iterator();
        VariableName tempName = null;
        while (iter.hasNext()) {
            colNames[index++] = iter.next().toString();
        }

        //Create a parser with variable names from the variable attributes
        if (hasHeader) {
            parser = new CSVParser(reader,
                    dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true).withCommentMarker('#'));
        } else {
            parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withCommentMarker('#'));
        }

        //Check data types in each column.
        String value = "";
        Iterator<CSVRecord> csvIter = parser.iterator();
        CSVRecord csvRecord = null;
        double testValue = 0;
        nrow = 0;

        while (csvIter.hasNext()) {
            csvRecord = csvIter.next();

            iter = variableAttributeMap.keySet().iterator();
            while (iter.hasNext()) {
                tempName = iter.next();
                value = csvRecord.get(tempName.toString()).trim();

                //Check that string can be converted to double. If not, Change variable type.
                //Ignore missing data and other special codes
                try {
                    if (!"".equals(value) && !specialDataCodes.isMissing(value)) {
                        testValue = Double.parseDouble(value);
                        if (testValue != Math.floor(testValue)) {
                            //if any value is a double, the variable is a double
                            variableAttributeMap.get(tempName).setDataType(DataType.DOUBLE);
                        }
                    }
                } catch (NumberFormatException ex) {
                    //if any value is a String, the variable is a String
                    variableAttributeMap.get(tempName).setDataType(DataType.STRING);
                }
            }
            nrow++;
        }

    } catch (IOException ex) {
        theException = ex;
    } finally {
        try {
            if (parser != null)
                parser.close();
            if (reader != null)
                reader.close();
        } catch (IOException ex) {
            theException = ex;
            logger.fatal(ex);
        }
    }

}

From source file:com.marklogic.contentpump.DelimitedTextReader.java

protected void initParser(InputSplit inSplit) throws IOException, InterruptedException {
    setFile(((FileSplit) inSplit).getPath());
    configFileNameAsCollection(conf, file);

    fileIn = fs.open(file);//from  w w  w  .  j a  v  a  2 s  .c  o m
    instream = new InputStreamReader(fileIn, encoding);

    bytesRead = 0;
    fileLen = inSplit.getLength();
    if (uriName == null) {
        generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false);
        if (generateId) {
            idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart());
        } else {
            uriId = 0;
        }
    }
    parser = new CSVParser(instream, CSVParserFormatter.getFormat(delimiter, encapsulator, true, true));
    parserIterator = parser.iterator();
}

From source file:data.io.csv.CSVDataReader.java

/**
 * {@inheritDoc}/*from  ww  w. ja va 2  s.com*/
 * Note : multiple iterators on the same instance are not supported
 */
@Override
public Iterator<MVDataEntry> iterator() {
    // When a new iterator is requested, everything should be reset
    CSVParser parser;
    try {
        dataSourceStream.reset();
        parser = new CSVParser(dataSourceStream, format);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    csvIt = parser.iterator();
    nextCSVRecord = null;
    nextEntry = null;
    return this;
}

From source file:br.edimarmanica.weir2.integration.ScoredPairs.java

public static List<ScoredPair> loadAndSort(Domain domain) {
    List<ScoredPair> pairs = new ArrayList<>();

    try (Reader in = new FileReader(new File(Paths.PATH_WEIR_V2 + "/" + domain.getPath() + "/scores.csv"))) {
        try (CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader())) {
            for (CSVRecord record : parser) { //para cada value
                Rule rule1 = new Rule(domain.getSiteOf(record.get("SITE1")), record.get("RULE1"));
                Rule rule2 = new Rule(domain.getSiteOf(record.get("SITE2")), record.get("RULE2"));
                double score = Double.valueOf(record.get("SCORE"));
                if (score == 1) {
                    continue;
                }//from  ww  w  .  j  av a 2s  .  co m
                ScoredPair pair = new ScoredPair(rule1, rule2, score);
                pairs.add(pair);
            }
        }
    } catch (FileNotFoundException ex) {
        Logger.getLogger(RulesDataTypeController.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(RulesDataTypeController.class.getName()).log(Level.SEVERE, null, ex);
    }

    Collections.sort(pairs);

    return pairs;

}

From source file:edu.si.sidora.tabularmetadata.TabularScannerTest.java

@Test
public void testOperationWithLimitedScan() throws IOException {
    try (Reader reader = new FileReader(smalltestfile);
            final CSVParser parser = new CSVParser(reader, DEFAULT.withHeader())) {
        log.debug("Found header map: {}", parser.getHeaderMap());
        final TabularScanner testScanner = new TabularScanner(parser.iterator(), mockTypeStrategy,
                mockRangeStrategy, mockEnumStrategy);
        testScanner.scan(2);//ww  w  .j  a v  a 2  s .com
        final List<DataType> guesses = testScanner.getTypeStrategies().stream().map(Heuristic::results)
                .collect(toList());
        assertEquals("Failed to find the correct column types!", expectedResults, guesses);
    }
}

From source file:com.x460dot10.b.registrar.StartupManager.java

/**
 * Imports data/passwords.txt into <code>University.passwords</code>
 *
 * @return             Indicates import of passwords was successful
 * @throws IOException /*www  . j  a  va  2 s .c o  m*/
 */
public boolean importPasswords() throws IOException {
    Boolean importPasswordsSuccessful = true;
    File file = new File("data/mockpasswords.dat");
    FileReader reader = null;
    Object nextPassword;
    ArrayList<Password> filePasswords = new ArrayList<Password>();
    try {
        reader = new FileReader(file);
        CSVFormat format = CSVFormat.DEFAULT;
        List<CSVRecord> records = new CSVParser(reader, format).getRecords();

        for (CSVRecord record : records) {
            String idAsString = record.values[0];
            Integer id = Integer.parseInt(idAsString);
            String userName = record.values[1];
            String password = record.values[2];
            nextPassword = Password.getStaticInstance(id, userName, password).clone();
            filePasswords.add((Password) nextPassword);
        }
        uni.passwordManager.importPasswords(filePasswords);
    } catch (Exception ex) {
        // TODO send error message to a log file
        System.err.println("Error: " + ex.getMessage());
        importPasswordsSuccessful = false;
    } finally {
        if (reader != null)
            reader.close();
    }
    return importPasswordsSuccessful;
}

From source file:com.alcatel_lucent.nz.wnmsextract.reader.BorgSelectionReader.java

/**
 * Main readAll method with calendar type args. Sets up data array and uses 
 * DB utilities class to bulk insert//from   w w  w .ja va 2s  . c  om
 */
@Override
public void readAll(Calendar start, Calendar end) {
    ArrayList<ColumnStructure> colstruct = new ArrayList<ColumnStructure>();
    colstruct.add(ColumnStructure.VC);
    colstruct.add(ColumnStructure.TS);
    colstruct.add(ColumnStructure.IT);

    ArrayList<ArrayList<String>> dmap = new ArrayList<ArrayList<String>>();
    try {
        //URL borg = new URL(BORG+getYesterday());
        URL borg = new URL(BORG + getDateSelection(start, end));
        URLConnection conn = borg.openConnection();
        BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));

        CSVParser parser = new CSVParser(in, strategy);
        //if header 
        //[just consumes header] 
        //String[] header = 
        parser.getLine();
        //and body
        String[] line = null;

        while ((line = parser.getLine()) != null) {
            ArrayList<String> list = new ArrayList<String>();
            list.add(idConvert(line[1], line[2], line[3]));
            Calendar cal = Calendar.getInstance();
            cal.setTime(BORG_DATA_DF.parse(line[0]));
            list.add(ALUDBUtilities.ALUDB_DF.format(cal.getTime()));
            list.add(line[5]);

            dmap.add(list);

        }
        in.close();
    } catch (ArrayIndexOutOfBoundsException aiobe) {
        System.err.println("Result not parseable " + aiobe);
        System.exit(1);
    } catch (MalformedURLException mrue) {
        System.err.println("Borg Path incorrect " + mrue);
        System.exit(1);
    } catch (IOException ioe) {
        System.err.println("Cannot read Borg file " + ioe);
        System.exit(1);
    } catch (ParseException pe) {
        System.err.println("Cannot parse Date field " + pe);
        System.exit(1);
    }

    /* bulk insert */
    ALUDBUtilities.insert(databasetype, TABLE, colstruct, dmap);

}

From source file:edu.clemson.lph.civet.addons.VspsCviFile.java

/**
 * Test only/* w  w  w .  j  ava2 s. c o  m*/
 */
private void printme(File fIn) {
    try {
        CSVParser parserIn = new CSVParser(new FileReader(fIn), CSVFormat.EXCEL);
        parser = new LabeledCSVParser(parserIn);
        aCols = parser.getNext();
    } catch (FileNotFoundException e) {
        logger.error(e.getMessage() + "\nCould not read file: " + fIn.getName());
    } catch (IOException e) {
        logger.error(e.getMessage() + "\nCould not read file: " + fIn.getName());
    }
    VspsCvi cvi;
    try {
        while ((cvi = nextCVI()) != null) {
            if (cvi.getStatus().equals("SAVED"))
                continue;
            VspsCviEntity orig = cvi.getOrigin();
            VspsCviEntity dest = cvi.getDestination();
            System.out.println(cvi.getCVINumber() + " created: " + cvi.getCreateDate());
            System.out
                    .println("  origin = " + orig.getName() + " " + orig.getPhone() + " " + orig.getAddress1());
            System.out.println(
                    "  destination = " + dest.getName() + " " + dest.getPhone() + " " + dest.getAddress1());
            System.out.println(cvi.getOriginState() + " " + orig.getState());
            System.out.println(
                    cvi.getVeterinarianName() + ": " + cvi.getVetFirstName() + " " + cvi.getVetLastName());
            System.out.println(cvi.getAnimals().size() + " Animals in CVI");
            System.out.println(cvi.getRemarks());
            for (List<String> aKey : cvi.getSpecies().keySet()) {
                Integer iCount = cvi.getSpecies().get(aKey);
                System.out.println(iCount + " " + aKey.get(0) + " (" + aKey.get(1) + ")");
            }
            for (VspsCviAnimal animal : cvi.getAnimals()) {
                System.out.println("\t" + animal.getSpecies() + " " + animal.getBreed() + " "
                        + animal.getGender() + " " + animal.getDateOfBirth());
                for (int i = 1; i <= 5; i++) {
                    String sIdType = animal.getIdentifierType(i);
                    if (sIdType != null)
                        System.out.println("\t\t" + sIdType + " = " + animal.getIdentifier(i));
                }
            }
        }
    } catch (IOException e) {
        logger.error(e);
    }
}

From source file:javalibs.CSVExtractor.java

private void readCSV() {
    try {// www. j a  v a 2 s .  co m
        CSVParser parser = new CSVParser(Files.newBufferedReader(Paths.get(this.inCSV)),
                CSVFormat.DEFAULT.withHeader().withIgnoreHeaderCase().withTrim());

        // Get all headers
        Map<String, Integer> rawHeaders = parser.getHeaderMap();

        // Store the inRecords
        this.inRecords = parser.getRecords();
        parser.close();

        orderHeaders(rawHeaders);
    } catch (IOException e) {
        log_.die(e);
    }
}

From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }/*from  www.j av  a  2  s  .  c  om*/

    if (splits.size() >= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter = 0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file : splits) {
        FileSplit fsplit = ((FileSplit) file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);

        if (fsplit.getStart() == 0) {
            // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character.");
            }
            String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                    MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream,
                    CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true));
            Iterator<CSVRecord> it = parser.iterator();

            String[] header = null;
            if (it.hasNext()) {
                CSVRecord record = (CSVRecord) it.next();
                Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                    if (recordIterator.hasNext()) {
                        header[i] = (String) recordIterator.next();
                    } else {
                        throw new IOException("Record size doesn't match the real size");
                    }
                }

                EncodingUtil.handleBOMUTF8(header, 0);

                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }

        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])),
                path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations());
        populatedSplits.add(ds);
    }

    return populatedSplits;
}