Example usage for org.apache.commons.csv CSVParser CSVParser

List of usage examples for org.apache.commons.csv CSVParser CSVParser

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser CSVParser.

Prototype

public CSVParser(final Reader reader, final CSVFormat format) throws IOException 

Source Link

Document

Customized CSV parser using the given CSVFormat

If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .

Usage

From source file:edu.washington.gs.skyline.model.quantification.QuantificationTest.java

private Map<RecordKey, Double> readExpectedRows(String filename) throws Exception {
    Map<RecordKey, Double> map = new HashMap<>();
    Reader reader = new InputStreamReader(QuantificationTest.class.getResourceAsStream(filename));
    try {/*from  ww  w.  j  a  v  a  2 s.  c o m*/
        CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
        for (CSVRecord record : parser.getRecords()) {
            map.put(new RecordKey(record), parseNullableDouble(record.get("NormalizedArea")));
        }
    } finally {
        reader.close();
    }
    return map;
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.sequence.annotator.OnlyFilesMatchingPredictionsReader.java

public static List<Sequence> extractSequences(File tokenLevelPredictionsCsvFile1) throws IOException {
    List<Sequence> result = new ArrayList<>();
    // load the CSV
    CSVParser csvParser = new CSVParser(new FileReader(tokenLevelPredictionsCsvFile1),
            CSVFormat.DEFAULT.withCommentMarker('#'));

    String prevSeqId = null;/*from w w w .  j a v  a  2  s  .  co m*/
    Sequence currentSequence = new Sequence();

    int tokenCounter = 0;

    for (CSVRecord csvRecord : csvParser) {
        // row for particular instance (token)
        String predictedTag = csvRecord.get(1);
        String token = csvRecord.get(2);
        String seqId = csvRecord.get(3);

        TokenEntry tokenEntry = new TokenEntry(token, predictedTag);

        // if the token belongs to the previous seqId, add it to the sequence
        if (prevSeqId == null || seqId.equals(prevSeqId)) {
            currentSequence.getTokens().add(tokenEntry);
        } else {
            // otherwise start a new sequence
            result.add(currentSequence);

            currentSequence = new Sequence();
            currentSequence.getTokens().add(tokenEntry);
        }

        prevSeqId = seqId;
        tokenCounter++;
    }

    // don't forget to add the last sequence
    result.add(currentSequence);

    System.out.println("Loaded " + result.size() + " sequences with total " + tokenCounter + " tokens.");

    return result;
}

From source file:com.sojw.TableNamesFinderExecutor.java

public static List<CSVRecord> getCSVFileContents(final String filePath) throws IOException {
    final Reader in = new BufferedReader(new FileReader(filePath));
    final CSVParser parser = new CSVParser(in, CSVFormat.DEFAULT);
    List<CSVRecord> fileContentList = parser.getRecords();
    return fileContentList;
}

From source file:core.reporting.ImportFromFile.java

/**
 * validate the content of csv file against the column definition file. this methos store the valid record into a
 * buffer and record in system log file any error found in input file
 * /*from  ww  w.  j a  v  a  2s.c om*/
 * @param infile - record list parsed from imput file
 * 
 * @return number of error found.
 */
private int validateRecord(Iterable<CSVRecord> ircdlist) {
    // clear all previous log for import flag
    SystemLog.clearLogByFlag("ie");
    // column definition
    Iterable<CSVRecord> coldefl = null;
    try {
        Reader in = new FileReader(TResourceUtils.getFile(columnModelDef + ".csv"));
        coldefl = (new CSVParser(in, CSVFormat.EXCEL.withHeader()).getRecords());
    } catch (Exception e) {
        SystemLog.logException(e);
    }
    SimpleDateFormat dfomat = null;
    tempBuffer.clear();
    int line = 0;
    int error = 0;
    int coldeflen = 0;
    fileColumns = "";
    for (CSVRecord ircd : ircdlist) {
        Record frcd = new Record(recordModel);
        line++;
        coldeflen = 0;
        // premature return if error > coldefl
        if (error > coldeflen) {
            SystemLog.log("inputfile.msg05", "ie", "", error);
            return error;
        }
        for (CSVRecord cdr : coldefl) {
            coldeflen++;
            Object iobj = null;
            String siobj = null;
            String fieldn = null;
            // contain field name
            try {
                fieldn = cdr.get("field");
                ircd.get(fieldn);
            } catch (Exception e) {
                // if field is mandatory, log error
                if (cdr.get("mandatory").equals("true")) {
                    SystemLog.log("inputfile.msg02", "ie", "", line, cdr.get("field"));
                    error++;
                }
                continue;
            }
            // value class.
            try {
                String cls = cdr.get("class");
                // String class by default
                iobj = ircd.get(fieldn);
                if (cls.equals("Integer")) {
                    iobj = Integer.parseInt(ircd.get(fieldn));
                }
                if (cls.equals("Double")) {
                    iobj = Double.parseDouble(ircd.get(fieldn));
                }
                if (cls.equals("Date")) {
                    // date may be not present
                    dfomat = new SimpleDateFormat(cdr.get("format"));
                    Date d = iobj.equals("") ? TStringUtils.ZERODATE : dfomat.parse((String) iobj);
                    iobj = new java.sql.Date(d.getTime());
                }
            } catch (Exception e) {
                SystemLog.log("inputfile.msg03", "ie", "", line, cdr.get("field"), cdr.get("class"),
                        cdr.get("format"));
                error++;
                continue;
            }
            // valid value
            siobj = ircd.get(fieldn);
            boolean vvb = true;
            String vv = cdr.get("valid values");
            if (!vv.equals("")) {
                vvb = false;
                String[] vvlst = vv.split(";");
                for (String vvi : vvlst) {
                    vvb = (siobj.equals(vvi)) ? true : vvb;
                }
            }
            if (vvb == false) {
                SystemLog.log("inputfile.msg04", "ie", "", line, cdr.get("field"), cdr.get("valid values"));
                error++;
                continue;
            }
            // no problem? add field
            String tf = cdr.get("target_field");
            fileColumns += tf + ";";
            frcd.setFieldValue(tf, iobj);
        }
        tempBuffer.add(frcd);
    }
    fileColumns = fileColumns.substring(0, fileColumns.length() - 1);
    return error;
}

From source file:com.publictransitanalytics.scoregenerator.datalayer.directories.GTFSReadingStopTimesDirectory.java

private void parseStopTimesFile(final ImmutableSetMultimap<String, FrequencyRecord> frequencyRecordMap,
        final Reader stopTimesReader) throws FileNotFoundException, IOException, InterruptedException {

    final CSVParser parser = new CSVParser(stopTimesReader, CSVFormat.DEFAULT.withHeader());

    final SortedSetMultimap<String, RawTripStop> rawTripMap = TreeMultimap.create(Comparator.naturalOrder(),
            (stop1, stop2) -> Integer.compare(stop1.getSequence(), stop2.getSequence()));

    final Iterator<CSVRecord> stopTimesIter = parser.iterator();
    while (stopTimesIter.hasNext()) {
        final CSVRecord record = stopTimesIter.next();
        final String rawTripId = record.get("trip_id");
        final int stopSequence = Integer.valueOf(record.get("stop_sequence"));
        final String stopId = record.get("stop_id");
        final String arrivalTimeString = record.get("arrival_time");
        final TransitTime arrivalTime = (arrivalTimeString == null) ? null
                : TransitTime.parse(arrivalTimeString);
        final String departureTimeString = record.get("departure_time");
        final TransitTime departureTime = (departureTimeString == null) ? null
                : TransitTime.parse(arrivalTimeString);

        if (frequencyRecordMap.containsKey(rawTripId)) {
            final RawTripStop rawTripStop = new RawTripStop(arrivalTime, departureTime, stopId, rawTripId,
                    stopSequence);//  ww  w. j ava  2s . c o m
            rawTripMap.put(rawTripId, rawTripStop);
        } else {
            final TripId tripId = new TripId(rawTripId);
            final TripStop tripStop = new TripStop(arrivalTime, stopId, tripId, stopSequence);
            try {
                final TripIdKey tripIdKey = new TripIdKey(rawTripId);
                tripsStore.put(tripIdKey, tripId);
                tripSequenceStore.put(new TripSequenceKey(tripIdKey, arrivalTime, stopSequence), tripStop);
                stopTimesStore.put(StopTimeKey.getWriteKey(stopId, arrivalTime), tripStop);
            } catch (final BitvantageStoreException e) {
                throw new ScoreGeneratorFatalException(e);
            }
        }
    }
    for (final String rawTripId : rawTripMap.keySet()) {
        final ImmutableSet<FrequencyRecord> frequencyRecords = frequencyRecordMap.get(rawTripId);
        for (final FrequencyRecord frequencyRecord : frequencyRecords) {

            TransitTime recurringTime = frequencyRecord.getStartTime();
            while (recurringTime.isBefore(frequencyRecord.getEndTime())) {
                final TransitTime baseArrivalTime = rawTripMap.get(rawTripId).first().getArrivalTime();
                final TripId tripId = new TripId(rawTripId, recurringTime.toString());

                for (final RawTripStop rawTripStop : rawTripMap.get(rawTripId)) {
                    final TransitTime arrivalTime = recurringTime
                            .plus(TransitTime.durationBetween(baseArrivalTime, rawTripStop.getArrivalTime()));
                    final int stopSequence = rawTripStop.getSequence();
                    final String stopId = rawTripStop.getStopId();

                    final TripStop tripStop = new TripStop(arrivalTime, stopId, tripId, stopSequence);

                    final TripIdKey tripIdKey = new TripIdKey(tripId.getRawTripId(), tripId.getQualifier());

                    try {
                        tripsStore.put(tripIdKey, tripId);
                        tripSequenceStore.put(new TripSequenceKey(tripIdKey, arrivalTime, stopSequence),
                                tripStop);
                        stopTimesStore.put(StopTimeKey.getWriteKey(stopId, arrivalTime), tripStop);
                    } catch (final BitvantageStoreException e) {
                        throw new ScoreGeneratorFatalException(e);
                    }
                }
                recurringTime = recurringTime.plus(frequencyRecord.getInterval());
            }
        }
    }
}

From source file:com.ibm.watson.app.qaclassifier.tools.GenerateTrainingAndPopulationData.java

/**
 * Reads in the question input file and creates a POJO for each question it finds.  If the label associated with 
 * the question does not exist in the previously read in answer store then it is skipped
 * /*www  .  ja v  a2 s .  c  om*/
 * @return TrainingData - full POJO of the training data
 */
private static NLClassifierTrainingData readQuestionInput(List<ManagedAnswer> store) {
    NLClassifierTrainingData data = null;

    try (FileReader reader = new FileReader(questionInput);
            CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL)) {
        // read in the csv file and get the records            
        List<CSVRecord> records = parser.getRecords();

        // now we can create the training data because we have read the records
        data = new NLClassifierTrainingData();
        data.setLanguage("en");
        for (CSVRecord r : records) {
            // order is: QuestionText, LabelId

            // check for existence of label first, if not there, skip                
            // we only add the training instance if there is an associated answer
            String text = r.get(0);
            String label = r.get(1);
            if (labelHasAnswer(label, store)) {
                data.addTrainingData(text, label);
            } else {
                System.out.println(MessageKey.AQWQAC24009E_label_not_found_in_answer_store_including_2
                        .getMessage(text, label).getFormattedMessage());
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return data;
}

From source file:edu.washington.gs.skyline.model.quantification.QuantificationTest.java

private List<ReplicateData> readReplicates(String filename) throws Exception {
    Map<String, ReplicateData> replicates = new LinkedHashMap<>();
    Reader reader = new InputStreamReader(QuantificationTest.class.getResourceAsStream(filename));
    try {//from  w  ww . j  av a  2s.  c om
        CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
        for (CSVRecord record : parser.getRecords()) {
            String fileName = record.get("FileName");
            ReplicateData replicate = replicates.get(fileName);
            if (replicate == null) {
                replicate = new ReplicateData();
                replicates.put(fileName, replicate);
            }
        }

    } finally {
        reader.close();
    }
    throw new NotImplementedException();
}

From source file:com.itemanalysis.jmetrik.file.JmetrikFileReader.java

/**
 * Opens a connection by instantiating the reader and CSVParser. It extracts the
 * header into the VariableAttributeMap and moves the cursor to the first row of data.
 * //from   w  w  w . j a  va2  s. c om
 * @throws IOException
 */
public void openConnection() throws IOException {
    //opens file and advances cursor to beginning of data
    dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8");
    dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#'));
    dataIterator = dataParser.iterator();
    setVariableAttributes(dataIterator);

    //Get Column names
    String[] colNames = new String[variableAttributes.size()];
    int index = 0;
    for (VariableName v : variableAttributes.keySet()) {
        colNames[index] = v.toString();
        index++;
    }
    dataReader.close();
    dataParser.close();

    //Advance iterator to first data record (A little inefficient because loops over header a second time)
    //This inefficiency is because CSVReader only allows the header to be set in the constructor.
    dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8");
    dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#').withHeader(colNames));
    dataIterator = dataParser.iterator();
    advanceToFirstDataRecord();
}

From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java

/**
 * Create a header map to the CSV file, but imposes naming conventions on the column names.
 *
 *//*w w w.j ava2s . c o  m*/
private void setVariableAttributes() {
    VariableAttributes variableAttributes = null;
    int position = 0;

    Reader reader = null;
    CSVParser parser = null;
    VariableName tempName = null;

    try {
        reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");
        parser = new CSVParser(reader, dataFileFormat.withHeader());

        if (hasHeader) {
            Map<String, Integer> csvMap = parser.getHeaderMap();
            for (String s : csvMap.keySet()) {
                variableAttributes = new VariableAttributes(new VariableName(s), new VariableLabel(""),
                        DataType.INTEGER, position);
                variableAttributeMap.put(variableAttributes.getName(), variableAttributes);
                position++;
            }
        } else {
            Iterator<CSVRecord> iter = parser.iterator();
            CSVRecord csvRecord = iter.next();

            for (int i = 0; i < csvRecord.size(); i++) {
                variableAttributes = new VariableAttributes(new VariableName("v" + (i + 1)),
                        new VariableLabel(""), DataType.INTEGER, position);
                variableAttributeMap.put(variableAttributes.getName(), variableAttributes);
                position++;
            }
        }

    } catch (IOException ex) {
        theException = ex;
    } finally {
        try {
            if (parser != null)
                parser.close();
            if (reader != null)
                reader.close();
        } catch (IOException ex) {
            theException = ex;
        }
    }
}

From source file:com.marklogic.contentpump.SplitDelimitedTextReader.java

@Override
protected void initParser(InputSplit inSplit) throws IOException, InterruptedException {
    setFile(((DelimitedSplit) inSplit).getPath());
    configFileNameAsCollection(conf, file);

    // get header from the DelimitedSplit
    TextArrayWritable taw = ((DelimitedSplit) inSplit).getHeader();
    fields = taw.toStrings();//  w ww .  java 2s . co  m
    try {
        docBuilder.configFields(conf, fields);
    } catch (IllegalArgumentException e) {
        LOG.error("Skipped file: " + file.toUri() + ", reason: " + e.getMessage());
        return;
    }

    fileIn = fs.open(file);
    lineSeparator = retrieveLineSeparator(fileIn);
    if (start != 0) {
        // in case the cut point is \n, back off 1 char to create a partial
        // line so that 1st line can be skipped
        start--;
    }

    fileIn.seek(start);

    instream = new InputStreamReader(fileIn, encoding);

    bytesRead = 0;
    fileLen = inSplit.getLength();
    if (uriName == null) {
        generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false);
        if (generateId) {
            idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart());
        } else {
            uriId = 0;
        }
    }

    boolean found = generateId || uriId == 0;

    for (int i = 0; i < fields.length && !found; i++) {
        if (fields[i].equals(uriName)) {
            uriId = i;
            found = true;
            break;
        }
    }
    if (found == false) {
        // idname doesn't match any columns
        LOG.error("Skipped file: " + file.toUri() + ", reason: " + URI_ID + " " + uriName + " is not found");
        return;
    }

    // keep leading and trailing whitespaces to ensure accuracy of pos
    // do not skip empty line just in case the split boundary is \n
    parser = new CSVParser(instream, CSVParserFormatter.getFormat(delimiter, encapsulator, false, false));
    parserIterator = parser.iterator();

    // skip first line:
    // 1st split, skip header; other splits, skip partial line
    if (parserIterator.hasNext()) {
        String[] values = getLine();
        start += getBytesCountFromLine(values);
        pos = start;
    }
}