Example usage for org.apache.commons.csv CSVRecord get

List of usage examples for org.apache.commons.csv CSVRecord get

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord get.

Prototype

public String get(final String name) 

Source Link

Document

Returns a value by name.

Usage

From source file:de.inren.service.banking.BankDataServiceImpl.java

@Override
public void importTransactionCsv(byte[] bytes) throws IOException {
    Iterable<CSVRecord> records = getIngDibaCsvFormat().parse(createReader(bytes));
    Account account = new Account();
    for (CSVRecord record : records) {
        switch ((int) record.getRecordNumber()) {
        case 1: // Umsatzanzeige
            break;
        case 2: // Kunde
            account.setOwner(record.get(1).trim());
            break;
        case 3: // Konto
            String[] vals = record.get(1).split(":");
            account.setName(vals[0].trim());
            account.setNumber(vals[1].trim());
            account = validateAccount(account);
            break;
        case 4: //
            break;
        case 5: // Zeitraum
            break;
        case 6: // Saldo
            break;
        case 7: // Leer
            break;
        case 8: // berschrift
            break;
        default: // Eintrag
            Transaction transaction = new Transaction();
            transaction.setAccountNumber(account.getNumber().trim());
            transaction.setAccountingDate(getDate(record.get(0)));
            transaction.setValutaDate(getDate(record.get(1)));
            transaction.setPrincipal(record.get(2).trim());
            transaction.setAccountingText(record.get(3).trim());
            transaction.setPurpose(record.get(4).trim());
            transaction.setAmount(getBigDecimal(record.get(5)));
            transaction.setTransactionCurrency(record.get(6).trim());
            transaction.setBalance(getBigDecimal(record.get(7)));
            transaction.setBalanceCurrency(record.get(8).trim());
            transaction.setHashCode(transaction.createHashCode());
            Transaction oldTransaction = transactionRepository.findByHashCode(transaction.getHashCode());
            // only save new transactions
            if (oldTransaction == null) {
                transactionRepository.save(transaction);
            }//from   www.jav a 2 s .c  o m
        }
    }
    // Add the categories to the new (all) transactions. Should be
    // optimized.
    Iterable<Category> categories = categoryRepository.findAll();
    for (Category category : categories) {
        applyCategoryToTransactions(category);
    }
}

From source file:br.edimarmanica.trinity.intrasitemapping.auto.MappingController.java

private void reading() {
    /**/*w ww . j a  v a2  s  .co m*/
     * Lendos os Run02.NR_SHARED_PAGES primeiros elementos de cada offset
     */
    File dir = new File(Paths.PATH_TRINITY + site.getPath() + "/offset");

    for (int nrOffset = 0; nrOffset < dir.listFiles(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".csv");
        }
    }).length; nrOffset++) {
        List<List<String>> offset = new ArrayList<>(); //cada arquivo  um offset

        try (Reader in = new FileReader(dir.getAbsoluteFile() + "/result_" + nrOffset + ".csv")) {
            try (CSVParser parser = new CSVParser(in, CSVFormat.EXCEL)) {
                int nrRegistro = 0;
                for (CSVRecord record : parser) {
                    if (nrRegistro >= Extract.NR_SHARED_PAGES) {
                        break;
                    }

                    for (int nrRegra = 0; nrRegra < record.size(); nrRegra++) {
                        if (nrRegistro == 0) {
                            List<String> regra = new ArrayList<>();
                            try {
                                regra.add(Preprocessing.filter(record.get(nrRegra)));
                            } catch (InvalidValue ex) {
                                regra.add("");
                            }
                            offset.add(regra);
                        } else {
                            try {
                                offset.get(nrRegra).add(Preprocessing.filter(record.get(nrRegra)));
                            } catch (InvalidValue ex) {
                                offset.get(nrRegra).add("");
                            }
                        }
                    }
                    nrRegistro++;
                }
            }
            offsets.add(offset);
        } catch (FileNotFoundException ex) {
            Logger.getLogger(MappingController.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(MappingController.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /**
     * Mostrando a leitura
     */
    /*for (int i = 1; i < offsets.size(); i++) {
    for (int j = 0; j < 5; j++) {
        System.out.print(offsets.get(i).get(0).get(j) + " - ");
    }
    System.out.println("");
    }*/
}

From source file:fr.univ_tours.li.mdjedaini.ideb.io.CsvLogLoader.java

/**
 * /*from w w w. j a  v  a 2  s  .c  om*/
 * @param arg_sessionFilePath
 * @return 
 */
public Session loadSession(String arg_sessionFilePath) {

    Session result = new Session();

    try {

        Reader in = new FileReader(arg_sessionFilePath);
        Iterable<CSVRecord> records = CSVFormat.newFormat(';').withFirstRecordAsHeader().parse(in);

        // each record is a query
        for (CSVRecord record : records) {

            //                System.out.println("I am parsing the line: " + record);

            String cubeName = record.get("cube");
            EAB_Cube cube = this.be.getBenchmarkData().getInternalCubeList().get(cubeName);

            QueryTriplet q_tmp = new QueryTriplet(cube);

            // extract measures
            String currentMeasure = record.get("Measures");

            // only add measure if not empty
            if (!currentMeasure.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)");
                Matcher m = p.matcher(currentMeasure);

                // manage multiple measures
                while (m.find()) {
                    //System.out.println("Current measure: " + currentMeasure + " --- trouve: " + m.groupCount());
                    String measure = m.group(1);
                    //                        System.out.println("Measure: " + measure);    

                    // add the current measure to the current query
                    MeasureFragment mf = new MeasureFragment(q_tmp, measure);

                    if (null == mf.getMeasure()) {
                        int i = 2;
                    }

                    q_tmp.addMeasure(mf);
                }

            }

            // extract GBS
            String currentProjection = record.get("GroupBy");

            // only add projections if not empty
            if (!currentProjection.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)");
                Matcher m = p.matcher(currentProjection);

                // manage multiple group by
                while (m.find()) {
                    //System.out.println("Group " + i + ": " + m.group(i));
                    String level = m.group(1);
                    //                        System.out.println("Level: " + level);

                    EAB_Level l_tmp = cube.getLevelByAtomicName(level);
                    ProjectionFragment pf_tmp = new ProjectionFragment(q_tmp, l_tmp);

                    if (null == pf_tmp.getLevel()) {
                        int i = 2;
                    }

                    q_tmp.addProjection(pf_tmp);
                }

            }

            // extract filters
            String currentSelection = record.get("Filters");
            // only add projections if not empty
            if (!currentSelection.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)=>\\[EQUAL ([a-zA-Z_0-9& ]+)\\]");
                Matcher m = p.matcher(currentSelection);

                // manage multiple occurrences
                while (m.find()) {
                    //                        System.out.println("Current selection: " + currentSelection + " --- trouve: " + m.groupCount());

                    String level = m.group(1);
                    String member = m.group(2);

                    EAB_Level l_tmp = cube.getLevelByAtomicName(level);

                    //                        System.out.println("Cube: " + cubeName);
                    //                        System.out.println("Level: " + level);
                    //                        System.out.println("Member: " + member);

                    if (null == l_tmp) {
                        int i = 2;
                    }

                    String dimName = l_tmp.getHierarchy().getDimension().getMondrianDimension().getName();
                    String hieName = l_tmp.getHierarchy().getName();

                    //hieName.spl

                    SelectionFragment sf_tmp = new SelectionFragment(q_tmp, dimName, hieName, level, member);

                    if (null != sf_tmp.getMemberValue()) {
                        q_tmp.addSelection(sf_tmp);
                    }

                }

            }

            // add the query to the session
            result.addQuery(q_tmp);

            QueryConverter qc = new QueryConverter(this.be);

            try {
                System.out.println("******************");
                System.out.println("Record:" + record);

                QueryMdx q_mdx = qc.toMdx(q_tmp);
                System.out.println("MDX with my converter:");
                System.out.println(q_mdx);
                q_mdx.execute(Boolean.TRUE);
                //                    System.out.println("-----");
                //                    System.out.println("Query: " + q_tmp);
                //                    System.out.println("-----");
                //                    System.out.println("Mdx: " + qc.toMdx(q_tmp));
                //                    System.out.println("******************");
            } catch (Exception arg_e) {
                System.out.println("******************");
                System.out.println("Exception: " + arg_e.getClass().getName());
                System.out.println("Record:" + record);
                //                    System.out.println("-----");
                //                    System.out.println("Query: " + q_tmp);
                //                    System.out.println("-----");
                //qc.toMdx(q_tmp);
                //System.out.println("******************");
                //System.err.println("Exception avec: ");
                //System.err.println("Record: " + record);
            }

        } // end foreach record

    } catch (Exception arg_e) {
        arg_e.printStackTrace();
    }

    //        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
    //        
    //        // add the name as metadata of the session
    //        result.addMetaData("name", arg_sessionFilePath);
    //        
    //        System.out.println("I am parsing the file: " + arg_sessionFilePath);
    //        
    //        // pattern for extracting cube name
    //        Pattern p = Pattern.compile("from \\[(.*?)\\].*");
    //
    //        File file   = new File(arg_sessionFilePath);
    //        
    //        
    //        try {
    //            //BufferedReader br = new BufferedReader(new FileReader(file));
    //            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(arg_sessionFilePath), "UTF-8"));
    //            String line = null;
    //            
    //            String currentQuery = "";
    //            
    //            // pour parser une requete, je cherche "select"
    //            // je prends toutes les lignes suivantes, jusqu'a rencontrer une ligne vide...
    //            while ((line = br.readLine()) != null) {
    //
    //                if(line.contains("select")) {
    //                    
    //                    // look for the time before query execution
    //                    String date     = line.substring(0, 23);
    //                    Date d          = sdf.parse(date);
    //                    Long tsBefore   = d.getTime();
    //                    
    //                    // je recupere la position du mot "select" dans la ligne
    //                    Integer position    = line.indexOf("select");
    //                    currentQuery        = line.substring(position, line.length());
    //                    
    //                    String line_tmp = br.readLine();
    //                    while(!line_tmp.equals("")) {
    //                        currentQuery    += System.lineSeparator();
    //                        //currentQuery    += System.lineSeparator();
    //                        currentQuery    += line_tmp;
    //                        line_tmp    = br.readLine();
    //                    }
    //                    
    //                    // extract cubename from the query text
    //                    // Normally, the pattern is always found!
    //                    Matcher m = p.matcher(currentQuery);
    //                    m.find();
    //                    String cubeName = m.group(1);
    //                    
    //                    //System.out.println(currentQuery);
    //                    //System.out.println("cubeName: " + cubeName);
    //                    //System.out.println("-------");
    //                    
    //                    // look for the execution time
    //                    while(!line_tmp.contains("exec:")) {
    //                        line_tmp    = br.readLine();
    //                    }
    //                    
    //                    // here the line contains exec
    //                    // look for the time before query execution
    //                    date            = line_tmp.substring(0, 23);
    //                    d               = sdf.parse(date);
    //                    Long tsAfter    = d.getTime();
    //                    
    //                    Query q_tmp = new QueryMdx(this.be.getInternalCubeByName(cubeName), currentQuery);
    //                    
    //                    result.addQuery(q_tmp, tsBefore, tsAfter);
    //                }
    //                
    //            }
    // 
    //            br.close();
    //        } catch(Exception arg_e) {
    //            arg_e.printStackTrace();
    //        }

    return result;
}

From source file:com.hurence.logisland.service.cache.CSVKeyValueCacheService.java

@Override
// @OnEnabled/*from w w w .ja  va  2s.  co m*/
public void init(ControllerServiceInitializationContext context) throws InitializationException {
    super.init(context);
    try {

        if (context.getPropertyValue(DATABASE_FILE_URI).isSet()) {
            dbUri = context.getPropertyValue(DATABASE_FILE_URI).asString();
        }

        if (context.getPropertyValue(DATABASE_FILE_PATH).isSet()) {
            dbPath = context.getPropertyValue(DATABASE_FILE_PATH).asString();
        }

        if ((dbUri == null) && (dbPath == null)) {
            throw new Exception(
                    "You must declare " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        InputStream is = null;
        if (dbUri != null) {
            logger.info("opening csv database from hdfs : " + dbUri);
            is = initFromUri(dbUri);
        }

        if (dbPath != null) {
            logger.info("opening csv database from local fs : " + dbPath);
            is = initFromPath(context, dbPath);
        }

        if (is == null) {
            throw new InitializationException("Something went wrong while initializing csv db from "
                    + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        // final Reader reader = new InputStreamReader(is);
        CSVFormat format = CSVFormat.DEFAULT;
        if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL.getValue())) {
            format = CSVFormat.EXCEL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL_FR.getValue())) {
            format = CSVFormat.EXCEL.withDelimiter(';');
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_MYSQL.getValue())) {
            format = CSVFormat.MYSQL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_RFC4180.getValue())) {
            format = CSVFormat.RFC4180;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_TDF.getValue())) {
            format = CSVFormat.TDF;
        }

        if (context.getPropertyValue(CSV_HEADER).isSet()) {
            String[] columnNames = context.getPropertyValue(CSV_HEADER).asString().split(",");
            for (String name : columnNames) {
                headers.get().put(name, "string");
            }
            format = format.withHeader(columnNames);
        } else if (context.getPropertyValue(FIRST_LINE_HEADER).isSet()) {
            format = format.withFirstRecordAsHeader();
        } else {
            throw new InitializationException("unable to get headers from somewhere");
        }

        Charset charset = Charset.forName("UTF-8");
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
            String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
            charset = Charset.forName(encoding);
        }

        rowKey = context.getPropertyValue(ROW_KEY).asString();
        CSVParser parser = CSVParser.parse(is, charset, format); //new CSVParser(reader, format);

        /*
        *    CSVParser parser = null;
                
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
        String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
        parser = CSVParser.parse(reader, Charset.forName(encoding), format);
        } else {
        parser = CSVParser.parse(reader, format);
        }
        */
        long count = 0;
        try {
            final Set<String> columnNames = parser.getHeaderMap().keySet();
            for (final CSVRecord record : parser) {

                Record logislandRecord = new StandardRecord();
                for (final String column : columnNames) {
                    logislandRecord.setStringField(column, record.get(column));
                }

                set(logislandRecord.getField(rowKey).asString(), logislandRecord);
                count++;
            }
        } finally {
            logger.info("successfully loaded " + count + " records from CSV file");

            parser.close();
            is.close();
        }

    } catch (Exception e) {
        getLogger().error("Could not load database file: {}", new Object[] { e.getMessage() });
        throw new InitializationException(e);
    }
}

From source file:com.thinkbiganalytics.discovery.parsers.csv.CSVFileSchemaParser.java

private DefaultFileSchema populateSchema(CSVParser parser) {
    DefaultFileSchema fileSchema = new DefaultFileSchema();
    int i = 0;//from   w  w  w .j a  v  a2s . co  m
    ArrayList<Field> fields = new ArrayList<>();
    for (CSVRecord record : parser) {
        if (i > 9) {
            break;
        }
        int size = record.size();
        for (int j = 0; j < size; j++) {
            DefaultField field = null;
            if (i == 0) {
                field = new DefaultField();
                if (headerRow) {
                    field.setName(record.get(j));
                } else {
                    field.setName("Col_" + (j + 1));
                }
                fields.add(field);
            } else {
                try {
                    field = (DefaultField) fields.get(j);
                    field.getSampleValues().add(StringUtils.defaultString(record.get(j), ""));

                } catch (IndexOutOfBoundsException e) {
                    LOG.warn("Sample file has potential sparse column problem at row [?] field [?]", i + 1,
                            j + 1);
                }
            }
        }
        i++;
    }
    fileSchema.setFields(fields);
    return fileSchema;
}

From source file:geovista.readers.csv.GeogCSVReader.java

public Object[] readFileStreaming(InputStream is, ArrayList<Integer> columns) {

    BufferedReader in = new BufferedReader(new InputStreamReader(is));
    Iterable<CSVRecord> parser = null;
    try {// w  w  w  .  j a v a 2 s  .  c  om
        parser = CSVFormat.DEFAULT.withDelimiter(this.currDelimiter).parse(in);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    int count = 0;
    for (CSVRecord rec : parser) {

        // eDays.add(rec.get(0));
        // type.add(rec.get(10) + " - " + rec.get(8));

        System.out.println(rec.get(0));
        System.out.println(rec.toString());
        count++;
    }
    // CSVParser shredder = new CSVParser()
    // CSVParser shredder = new CSVParser(is);

    // shredder.setCommentStart("#;!");
    // shredder.setEscapes("nrtf", "\n\r\t\f");
    String[] headers = null;
    String[] types = null;
    int[] dataTypes = null;
    String[][] fileContent = null;
    int dataBegin;
    Object[] data;
    try {
        // fileContent = shredder.getAllValues();

    } catch (Exception ex) {
        ex.printStackTrace();
    }

    types = fileContent[0];// first line tells us types
    dataTypes = new int[types.length];
    int len;
    if (types[0].equalsIgnoreCase("int") || types[0].equalsIgnoreCase("double")
            || types[0].equalsIgnoreCase("string")) {
        dataBegin = 2;
        headers = fileContent[1];
        data = new Object[headers.length + 1];// plus one for the headers
        // themselves
        len = fileContent.length - dataBegin;
        for (int i = 0; i < headers.length; i++) {
            if (types[i].equalsIgnoreCase("int")) {
                data[i + 1] = new int[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_INT;
            } else if (types[i].equalsIgnoreCase("double")) {
                data[i + 1] = new double[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_DOUBLE;
            } else if (types[i].equalsIgnoreCase("string")) {
                data[i + 1] = new String[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_STRING;
            } else {
                throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[i]);
            }
        }
    } else {
        dataBegin = 1;
        headers = fileContent[0];
        data = new Object[headers.length + 1];// plus one for the headers
        // themselves
        len = fileContent.length - dataBegin;
        for (int i = 0; i < headers.length; i++) {
            String firstString = fileContent[1][i];
            String secondString = fileContent[2][i];
            String thirdString = fileContent[3][i];
            String lastString = fileContent[fileContent[0].length][i];

            if (isNumeric(firstString) && isNumeric(secondString) && isNumeric(thirdString)
                    && isNumeric(lastString)) {
                if (isInt(fileContent, i) == false) {
                    // if (isDouble(firstString) || isDouble(secondString)
                    // || isDouble(thirdString) || isDouble(lastString)) {
                    data[i + 1] = new double[len];
                    dataTypes[i] = GeogCSVReader.DATA_TYPE_DOUBLE;
                } else {
                    data[i + 1] = new int[len];
                    dataTypes[i] = GeogCSVReader.DATA_TYPE_INT;
                }
            } else {
                data[i + 1] = new String[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_STRING;
            }
        }
    }
    data[0] = headers;

    String[] line = null;

    for (int row = dataBegin; row < len + dataBegin; row++) {

        line = fileContent[row];

        int[] ints = null;
        double[] doubles = null;
        String[] strings = null;

        for (int column = 0; column < line.length; column++) {
            String item = line[column];
            if (dataTypes[column] == GeogCSVReader.DATA_TYPE_INT) {

                if (Arrays.binarySearch(GeogCSVReader.NULL_STRINGS, item) >= 0) {
                    ints = (int[]) data[column + 1];
                    ints[row - dataBegin] = GeogCSVReader.NULL_INT;
                } else {
                    ints = (int[]) data[column + 1];
                    try {
                        ints[row - dataBegin] = Integer.parseInt(item);
                    } catch (NumberFormatException nfe) {
                        logger.warning("could not parse " + item + " in column " + column);
                        // nfe.printStackTrace();
                        ints[row - dataBegin] = GeogCSVReader.NULL_INT;
                    }
                }
            } else if (dataTypes[column] == GeogCSVReader.DATA_TYPE_DOUBLE) {
                if (Arrays.binarySearch(GeogCSVReader.NULL_STRINGS, item) >= 0) {
                    doubles = (double[]) data[column + 1];
                    doubles[row - dataBegin] = GeogCSVReader.NULL_DOUBLE;
                } else {
                    doubles = (double[]) data[column + 1];
                    doubles[row - dataBegin] = parseDouble(item);
                }
            } else if (dataTypes[column] == GeogCSVReader.DATA_TYPE_STRING) {
                strings = (String[]) data[column + 1];
                strings[row - dataBegin] = item;
            } else {
                throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[row]);
            } // end if

        } // next column
    } // next row
    return data;

}

From source file:com.raceup.fsae.test.TesterGui.java

/**
 * Parses data file, builds a Test/*from   www.  j av a2 s .co  m*/
 * @param pathToDataFile path to data csv file
 */
private void parseDataFileAndCreateTestOrFail(String pathToDataFile) {
    ArrayList<Question> questions = new ArrayList<>();
    CSVRecord[] rows = null;

    try {
        CSVParser parser = CSVFormat.DEFAULT.parse(new FileReader(pathToDataFile));
        rows = parser.getRecords().toArray(new CSVRecord[parser.getRecords().size()]);
    } catch (Exception e) {
        System.err.println(e.toString());
    }

    for (CSVRecord row : rows) { // each row represent a question
        ArrayList<Answer> answers = new ArrayList<>(); // list of answers
        if (row.size() > 1) {
            for (int i = 1; i < row.size(); i++) {
                if (row.get(i).length() > 0) {
                    answers.add(new Answer(row.get(i)));
                }
            }

            Answer correctAnswer = answers.get(0); // the correct
            // answer is always the first one
            String questionText = row.get(0);
            questions.add(
                    new Question(questionText, answers.toArray(new Answer[answers.size()]), correctAnswer)); // add to list of questions
        }
    }
    test = new Test(questions.toArray(new Question[questions.size()]));
}

From source file:com.datascience.hadoop.CsvRecordReader.java

@Override
public boolean next(LongWritable key, ListWritable<Text> value) throws IOException {
    value.clear();/*from  w  w  w.  ja  v a 2  s. co  m*/
    try {
        if (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            position++;
            colLength = colLength == null ? record.size() : colLength;
            if ((!record.isConsistent() || record.size() != colLength) && strict) {
                String message = String.format("%s: %s", "inconsistent record at position", position);
                throw new CsvParseException(message);
            }

            key.set(record.getRecordNumber());

            for (int i = 0; i < record.size(); i++) {
                String item = record.get(i);
                if (item == null) {
                    value.add(null);
                } else {
                    Text text = cache[i];
                    if (text == null) {
                        text = new Text();
                        cache[i] = text;
                    }
                    text.set(item);
                    value.add(text);
                }
            }
            //position = record.getCharacterPosition();
            return true;
        }

    } catch (Exception e) {
        LOGGER.warn("failed to parse record at position: " + position);
        if (strict) {
            throw e;
        } else {
            return next(key, value);
        }
    }
    return false;
}

From source file:ch.eitchnet.csvrestendpoint.marshaller.CsvDataToJsonMarshaller.java

/**
 * Returns true if the given {@link CSVRecord} is to be selected
 * /*  w  ww  .j av  a  2  s  . co  m*/
 * @param headerMap
 *            the map containing the column headers with the column index
 * 
 * @param record
 *            the {@link CSVRecord} to check
 * 
 * @return true if the record is to be selected, false if not
 */
protected boolean isSelected(Map<String, Integer> headerMap, CSVRecord record) {

    if (StringHelper.isEmpty(this.query))
        return true;

    if (this.queryFields.isEmpty()) {

        // iterate all possible fields and see if the query matches
        for (String value : record) {
            if (value.toLowerCase().contains(this.query))
                return true;
        }
    } else {

        // iterate only the query fields
        for (String queryField : this.queryFields) {
            String value = record.get(queryField);
            if (value.toLowerCase().contains(this.query))
                return true;
        }
    }

    return false;
}

From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManager.java

/**
 * This function is responsible for parsing a duplicate Stack Exchange thread TSV file produced by
 * {@link StackExchangeThreadSerializer}, and partitioning each such thread into the training set,
 * test set, or validation set. In addition, the corresponding row of the TSV file will be written
 * out to a training-, test-, or validation-set-specific TSV file in the same directory as the
 * input TSV file.//from  w w  w  .j  a  va2  s . c o m
 * 
 * @param dupQuestionFile - A TSV file containing duplicate {@link StackExchangeThread} records
 * @param trainTestValidateCumulativeProbs - A CDF of the desired proportion of training, test,
 *        and validation set records
 * @throws PipelineException
 */
private void parseTsvAndPartitionRecords(File dupQuestionFile, double[] trainTestValidateCumulativeProbs)
        throws PipelineException {
    // Open the TSV file for parsing, and CSVPrinters for outputting train,
    // test, and validation set
    // TSV files
    String baseName = FilenameUtils.removeExtension(dupQuestionFile.getAbsolutePath());
    String extension = FilenameUtils.getExtension(dupQuestionFile.getAbsolutePath());
    try (FileReader reader = new FileReader(dupQuestionFile);
            CSVPrinter trainSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TRAIN_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()));
            CSVPrinter testSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TEST_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()));
            CSVPrinter validationSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_VALIDATE_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()))) {

        // Parse the duplicate thread TSV file
        CSVParser parser = CSVFormat.TDF.withHeader().parse(reader);

        // Iterate over each CSV record, and place into a desired partition
        // (train, test, or
        // validation)
        Iterator<CSVRecord> recordIterator = parser.iterator();
        while (recordIterator.hasNext()) {
            CSVRecord record = recordIterator.next();

            // Get the StackExchangeThread associated with this record, and
            // create a question from it
            StackExchangeThread duplicateThread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(
                    record.get(CorpusBuilder.TSV_COL_HEADER_SERIALIZED_FILE_PATH));
            StackExchangeQuestion duplicateQuestion = new StackExchangeQuestion(duplicateThread);
            String parentId = record.get(CorpusBuilder.TSV_COL_HEADER_PARENT_ID);

            // Now drop this question into a partition, and write it to a
            // corresponding TSV file
            double p = rng.nextDouble(); // Random number determines
            // partition for this record
            if (p <= trainTestValidateCumulativeProbs[0]) {
                // This record goes in the training set
                if (!addQuestionToSet(duplicateQuestion, parentId, this.trainingSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.TRAINING_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                trainSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            } else if (p <= trainTestValidateCumulativeProbs[1]) {
                // This record goes in the test set
                if (!addQuestionToSet(duplicateQuestion, parentId, this.testSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.TEST_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                testSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            } else {
                // This record goes in the validation set
                assert (p <= trainTestValidateCumulativeProbs[2]);
                if (!addQuestionToSet(duplicateQuestion, parentId, this.validationSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.VALIDATION_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                validationSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            }
        }

        // Flush all the printers prior to closing
        trainSetPrinter.flush();
        testSetPrinter.flush();
        validationSetPrinter.flush();
    } catch (IOException | IngestionException e) {
        throw new PipelineException(e);
    }
}