Example usage for org.apache.commons.csv CSVRecord get

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord get.

Prototype

public String get(final String name)

Source Link

Document

Returns a value by name.

Usage

From source file:de.inren.service.banking.BankDataServiceImpl.java

@Override
public void importTransactionCsv(byte[] bytes) throws IOException {
    Iterable<CSVRecord> records = getIngDibaCsvFormat().parse(createReader(bytes));
    Account account = new Account();
    for (CSVRecord record : records) {
        switch ((int) record.getRecordNumber()) {
        case 1: // Umsatzanzeige
            break;
        case 2: // Kunde
            account.setOwner(record.get(1).trim());
            break;
        case 3: // Konto
            String[] vals = record.get(1).split(":");
            account.setName(vals[0].trim());
            account.setNumber(vals[1].trim());
            account = validateAccount(account);
            break;
        case 4: //
            break;
        case 5: // Zeitraum
            break;
        case 6: // Saldo
            break;
        case 7: // Leer
            break;
        case 8: // berschrift
            break;
        default: // Eintrag
            Transaction transaction = new Transaction();
            transaction.setAccountNumber(account.getNumber().trim());
            transaction.setAccountingDate(getDate(record.get(0)));
            transaction.setValutaDate(getDate(record.get(1)));
            transaction.setPrincipal(record.get(2).trim());
            transaction.setAccountingText(record.get(3).trim());
            transaction.setPurpose(record.get(4).trim());
            transaction.setAmount(getBigDecimal(record.get(5)));
            transaction.setTransactionCurrency(record.get(6).trim());
            transaction.setBalance(getBigDecimal(record.get(7)));
            transaction.setBalanceCurrency(record.get(8).trim());
            transaction.setHashCode(transaction.createHashCode());
            Transaction oldTransaction = transactionRepository.findByHashCode(transaction.getHashCode());
            // only save new transactions
            if (oldTransaction == null) {
                transactionRepository.save(transaction);
            }//from   www.jav a 2 s .c  o m
        }
    }
    // Add the categories to the new (all) transactions. Should be
    // optimized.
    Iterable<Category> categories = categoryRepository.findAll();
    for (Category category : categories) {
        applyCategoryToTransactions(category);
    }
}

From source file:br.edimarmanica.trinity.intrasitemapping.auto.MappingController.java

private void reading() {
    /**/*w ww . j a  v a2  s  .co m*/
     * Lendos os Run02.NR_SHARED_PAGES primeiros elementos de cada offset
     */
    File dir = new File(Paths.PATH_TRINITY + site.getPath() + "/offset");

    for (int nrOffset = 0; nrOffset < dir.listFiles(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".csv");
        }
    }).length; nrOffset++) {
        List<List<String>> offset = new ArrayList<>(); //cada arquivo  um offset

        try (Reader in = new FileReader(dir.getAbsoluteFile() + "/result_" + nrOffset + ".csv")) {
            try (CSVParser parser = new CSVParser(in, CSVFormat.EXCEL)) {
                int nrRegistro = 0;
                for (CSVRecord record : parser) {
                    if (nrRegistro >= Extract.NR_SHARED_PAGES) {
                        break;
                    }

                    for (int nrRegra = 0; nrRegra < record.size(); nrRegra++) {
                        if (nrRegistro == 0) {
                            List<String> regra = new ArrayList<>();
                            try {
                                regra.add(Preprocessing.filter(record.get(nrRegra)));
                            } catch (InvalidValue ex) {
                                regra.add("");
                            }
                            offset.add(regra);
                        } else {
                            try {
                                offset.get(nrRegra).add(Preprocessing.filter(record.get(nrRegra)));
                            } catch (InvalidValue ex) {
                                offset.get(nrRegra).add("");
                            }
                        }
                    }
                    nrRegistro++;
                }
            }
            offsets.add(offset);
        } catch (FileNotFoundException ex) {
            Logger.getLogger(MappingController.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(MappingController.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /**
     * Mostrando a leitura
     */
    /*for (int i = 1; i < offsets.size(); i++) {
    for (int j = 0; j < 5; j++) {
        System.out.print(offsets.get(i).get(0).get(j) + " - ");
    }
    System.out.println("");
    }*/
}

From source file:fr.univ_tours.li.mdjedaini.ideb.io.CsvLogLoader.java

/**
 * /*from w w w. j a  v  a 2  s  .c  om*/
 * @param arg_sessionFilePath
 * @return 
 */
public Session loadSession(String arg_sessionFilePath) {

    Session result = new Session();

    try {

        Reader in = new FileReader(arg_sessionFilePath);
        Iterable<CSVRecord> records = CSVFormat.newFormat(';').withFirstRecordAsHeader().parse(in);

        // each record is a query
        for (CSVRecord record : records) {

            //                System.out.println("I am parsing the line: " + record);

            String cubeName = record.get("cube");
            EAB_Cube cube = this.be.getBenchmarkData().getInternalCubeList().get(cubeName);

            QueryTriplet q_tmp = new QueryTriplet(cube);

            // extract measures
            String currentMeasure = record.get("Measures");

            // only add measure if not empty
            if (!currentMeasure.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)");
                Matcher m = p.matcher(currentMeasure);

                // manage multiple measures
                while (m.find()) {
                    //System.out.println("Current measure: " + currentMeasure + " --- trouve: " + m.groupCount());
                    String measure = m.group(1);
                    //                        System.out.println("Measure: " + measure);    

                    // add the current measure to the current query
                    MeasureFragment mf = new MeasureFragment(q_tmp, measure);

                    if (null == mf.getMeasure()) {
                        int i = 2;
                    }

                    q_tmp.addMeasure(mf);
                }

            }

            // extract GBS
            String currentProjection = record.get("GroupBy");

            // only add projections if not empty
            if (!currentProjection.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)");
                Matcher m = p.matcher(currentProjection);

                // manage multiple group by
                while (m.find()) {
                    //System.out.println("Group " + i + ": " + m.group(i));
                    String level = m.group(1);
                    //                        System.out.println("Level: " + level);

                    EAB_Level l_tmp = cube.getLevelByAtomicName(level);
                    ProjectionFragment pf_tmp = new ProjectionFragment(q_tmp, l_tmp);

                    if (null == pf_tmp.getLevel()) {
                        int i = 2;
                    }

                    q_tmp.addProjection(pf_tmp);
                }

            }

            // extract filters
            String currentSelection = record.get("Filters");
            // only add projections if not empty
            if (!currentSelection.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)=>\\[EQUAL ([a-zA-Z_0-9& ]+)\\]");
                Matcher m = p.matcher(currentSelection);

                // manage multiple occurrences
                while (m.find()) {
                    //                        System.out.println("Current selection: " + currentSelection + " --- trouve: " + m.groupCount());

                    String level = m.group(1);
                    String member = m.group(2);

                    EAB_Level l_tmp = cube.getLevelByAtomicName(level);

                    //                        System.out.println("Cube: " + cubeName);
                    //                        System.out.println("Level: " + level);
                    //                        System.out.println("Member: " + member);

                    if (null == l_tmp) {
                        int i = 2;
                    }

                    String dimName = l_tmp.getHierarchy().getDimension().getMondrianDimension().getName();
                    String hieName = l_tmp.getHierarchy().getName();

                    //hieName.spl

                    SelectionFragment sf_tmp = new SelectionFragment(q_tmp, dimName, hieName, level, member);

                    if (null != sf_tmp.getMemberValue()) {
                        q_tmp.addSelection(sf_tmp);
                    }

                }

            }

            // add the query to the session
            result.addQuery(q_tmp);

            QueryConverter qc = new QueryConverter(this.be);

            try {
                System.out.println("******************");
                System.out.println("Record:" + record);

                QueryMdx q_mdx = qc.toMdx(q_tmp);
                System.out.println("MDX with my converter:");
                System.out.println(q_mdx);
                q_mdx.execute(Boolean.TRUE);
                //                    System.out.println("-----");
                //                    System.out.println("Query: " + q_tmp);
                //                    System.out.println("-----");
                //                    System.out.println("Mdx: " + qc.toMdx(q_tmp));
                //                    System.out.println("******************");
            } catch (Exception arg_e) {
                System.out.println("******************");
                System.out.println("Exception: " + arg_e.getClass().getName());
                System.out.println("Record:" + record);
                //                    System.out.println("-----");
                //                    System.out.println("Query: " + q_tmp);
                //                    System.out.println("-----");
                //qc.toMdx(q_tmp);
                //System.out.println("******************");
                //System.err.println("Exception avec: ");
                //System.err.println("Record: " + record);
            }

        } // end foreach record

    } catch (Exception arg_e) {
        arg_e.printStackTrace();
    }

    //        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
    //        
    //        // add the name as metadata of the session
    //        result.addMetaData("name", arg_sessionFilePath);
    //        
    //        System.out.println("I am parsing the file: " + arg_sessionFilePath);
    //        
    //        // pattern for extracting cube name
    //        Pattern p = Pattern.compile("from \\[(.*?)\\].*");
    //
    //        File file   = new File(arg_sessionFilePath);
    //        
    //        
    //        try {
    //            //BufferedReader br = new BufferedReader(new FileReader(file));
    //            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(arg_sessionFilePath), "UTF-8"));
    //            String line = null;
    //            
    //            String currentQuery = "";
    //            
    //            // pour parser une requete, je cherche "select"
    //            // je prends toutes les lignes suivantes, jusqu'a rencontrer une ligne vide...
    //            while ((line = br.readLine()) != null) {
    //
    //                if(line.contains("select")) {
    //                    
    //                    // look for the time before query execution
    //                    String date     = line.substring(0, 23);
    //                    Date d          = sdf.parse(date);
    //                    Long tsBefore   = d.getTime();
    //                    
    //                    // je recupere la position du mot "select" dans la ligne
    //                    Integer position    = line.indexOf("select");
    //                    currentQuery        = line.substring(position, line.length());
    //                    
    //                    String line_tmp = br.readLine();
    //                    while(!line_tmp.equals("")) {
    //                        currentQuery    += System.lineSeparator();
    //                        //currentQuery    += System.lineSeparator();
    //                        currentQuery    += line_tmp;
    //                        line_tmp    = br.readLine();
    //                    }
    //                    
    //                    // extract cubename from the query text
    //                    // Normally, the pattern is always found!
    //                    Matcher m = p.matcher(currentQuery);
    //                    m.find();
    //                    String cubeName = m.group(1);
    //                    
    //                    //System.out.println(currentQuery);
    //                    //System.out.println("cubeName: " + cubeName);
    //                    //System.out.println("-------");
    //                    
    //                    // look for the execution time
    //                    while(!line_tmp.contains("exec:")) {
    //                        line_tmp    = br.readLine();
    //                    }
    //                    
    //                    // here the line contains exec
    //                    // look for the time before query execution
    //                    date            = line_tmp.substring(0, 23);
    //                    d               = sdf.parse(date);
    //                    Long tsAfter    = d.getTime();
    //                    
    //                    Query q_tmp = new QueryMdx(this.be.getInternalCubeByName(cubeName), currentQuery);
    //                    
    //                    result.addQuery(q_tmp, tsBefore, tsAfter);
    //                }
    //                
    //            }
    // 
    //            br.close();
    //        } catch(Exception arg_e) {
    //            arg_e.printStackTrace();
    //        }

    return result;
}

From source file:com.hurence.logisland.service.cache.CSVKeyValueCacheService.java

@Override
// @OnEnabled/*from w w w .ja  va  2s.  co m*/
public void init(ControllerServiceInitializationContext context) throws InitializationException {
    super.init(context);
    try {

        if (context.getPropertyValue(DATABASE_FILE_URI).isSet()) {
            dbUri = context.getPropertyValue(DATABASE_FILE_URI).asString();
        }

        if (context.getPropertyValue(DATABASE_FILE_PATH).isSet()) {
            dbPath = context.getPropertyValue(DATABASE_FILE_PATH).asString();
        }

        if ((dbUri == null) && (dbPath == null)) {
            throw new Exception(
                    "You must declare " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        InputStream is = null;
        if (dbUri != null) {
            logger.info("opening csv database from hdfs : " + dbUri);
            is = initFromUri(dbUri);
        }

        if (dbPath != null) {
            logger.info("opening csv database from local fs : " + dbPath);
            is = initFromPath(context, dbPath);
        }

        if (is == null) {
            throw new InitializationException("Something went wrong while initializing csv db from "
                    + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        // final Reader reader = new InputStreamReader(is);
        CSVFormat format = CSVFormat.DEFAULT;
        if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL.getValue())) {
            format = CSVFormat.EXCEL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL_FR.getValue())) {
            format = CSVFormat.EXCEL.withDelimiter(';');
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_MYSQL.getValue())) {
            format = CSVFormat.MYSQL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_RFC4180.getValue())) {
            format = CSVFormat.RFC4180;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_TDF.getValue())) {
            format = CSVFormat.TDF;
        }

        if (context.getPropertyValue(CSV_HEADER).isSet()) {
            String[] columnNames = context.getPropertyValue(CSV_HEADER).asString().split(",");
            for (String name : columnNames) {
                headers.get().put(name, "string");
            }
            format = format.withHeader(columnNames);
        } else if (context.getPropertyValue(FIRST_LINE_HEADER).isSet()) {
            format = format.withFirstRecordAsHeader();
        } else {
            throw new InitializationException("unable to get headers from somewhere");
        }

        Charset charset = Charset.forName("UTF-8");
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
            String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
            charset = Charset.forName(encoding);
        }

        rowKey = context.getPropertyValue(ROW_KEY).asString();
        CSVParser parser = CSVParser.parse(is, charset, format); //new CSVParser(reader, format);

        /*
        *    CSVParser parser = null;
                
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
        String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
        parser = CSVParser.parse(reader, Charset.forName(encoding), format);
        } else {
        parser = CSVParser.parse(reader, format);
        }
        */
        long count = 0;
        try {
            final Set<String> columnNames = parser.getHeaderMap().keySet();
            for (final CSVRecord record : parser) {

                Record logislandRecord = new StandardRecord();
                for (final String column : columnNames) {
                    logislandRecord.setStringField(column, record.get(column));
                }

                set(logislandRecord.getField(rowKey).asString(), logislandRecord);
                count++;
            }
        } finally {
            logger.info("successfully loaded " + count + " records from CSV file");

            parser.close();
            is.close();
        }

    } catch (Exception e) {
        getLogger().error("Could not load database file: {}", new Object[] { e.getMessage() });
        throw new InitializationException(e);
    }
}

From source file:com.thinkbiganalytics.discovery.parsers.csv.CSVFileSchemaParser.java

private DefaultFileSchema populateSchema(CSVParser parser) {
    DefaultFileSchema fileSchema = new DefaultFileSchema();
    int i = 0;//from   w  w  w .j a  v  a2s . co  m
    ArrayList<Field> fields = new ArrayList<>();
    for (CSVRecord record : parser) {
        if (i > 9) {
            break;
        }
        int size = record.size();
        for (int j = 0; j < size; j++) {
            DefaultField field = null;
            if (i == 0) {
                field = new DefaultField();
                if (headerRow) {
                    field.setName(record.get(j));
                } else {
                    field.setName("Col_" + (j + 1));
                }
                fields.add(field);
            } else {
                try {
                    field = (DefaultField) fields.get(j);
                    field.getSampleValues().add(StringUtils.defaultString(record.get(j), ""));

                } catch (IndexOutOfBoundsException e) {
                    LOG.warn("Sample file has potential sparse column problem at row [?] field [?]", i + 1,
                            j + 1);
                }
            }
        }
        i++;
    }
    fileSchema.setFields(fields);
    return fileSchema;
}

From source file:geovista.readers.csv.GeogCSVReader.java

public Object[] readFileStreaming(InputStream is, ArrayList<Integer> columns) {

    BufferedReader in = new BufferedReader(new InputStreamReader(is));
    Iterable<CSVRecord> parser = null;
    try {// w  w  w  .  j a v a 2 s  .  c  om
        parser = CSVFormat.DEFAULT.withDelimiter(this.currDelimiter).parse(in);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    int count = 0;
    for (CSVRecord rec : parser) {

        // eDays.add(rec.get(0));
        // type.add(rec.get(10) + " - " + rec.get(8));

        System.out.println(rec.get(0));
        System.out.println(rec.toString());
        count++;
    }
    // CSVParser shredder = new CSVParser()
    // CSVParser shredder = new CSVParser(is);

    // shredder.setCommentStart("#;!");
    // shredder.setEscapes("nrtf", "\n\r\t\f");
    String[] headers = null;
    String[] types = null;
    int[] dataTypes = null;
    String[][] fileContent = null;
    int dataBegin;
    Object[] data;
    try {
        // fileContent = shredder.getAllValues();

    } catch (Exception ex) {
        ex.printStackTrace();
    }

    types = fileContent[0];// first line tells us types
    dataTypes = new int[types.length];
    int len;
    if (types[0].equalsIgnoreCase("int") || types[0].equalsIgnoreCase("double")
            || types[0].equalsIgnoreCase("string")) {
        dataBegin = 2;
        headers = fileContent[1];
        data = new Object[headers.length + 1];// plus one for the headers
        // themselves
        len = fileContent.length - dataBegin;
        for (int i = 0; i < headers.length; i++) {
            if (types[i].equalsIgnoreCase("int")) {
                data[i + 1] = new int[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_INT;
            } else if (types[i].equalsIgnoreCase("double")) {
                data[i + 1] = new double[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_DOUBLE;
            } else if (types[i].equalsIgnoreCase("string")) {
                data[i + 1] = new String[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_STRING;
            } else {
                throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[i]);
            }
        }
    } else {
        dataBegin = 1;
        headers = fileContent[0];
        data = new Object[headers.length + 1];// plus one for the headers
        // themselves
        len = fileContent.length - dataBegin;
        for (int i = 0; i < headers.length; i++) {
            String firstString = fileContent[1][i];
            String secondString = fileContent[2][i];
            String thirdString = fileContent[3][i];
            String lastString = fileContent[fileContent[0].length][i];

            if (isNumeric(firstString) && isNumeric(secondString) && isNumeric(thirdString)
                    && isNumeric(lastString)) {
                if (isInt(fileContent, i) == false) {
                    // if (isDouble(firstString) || isDouble(secondString)
                    // || isDouble(thirdString) || isDouble(lastString)) {
                    data[i + 1] = new double[len];
                    dataTypes[i] = GeogCSVReader.DATA_TYPE_DOUBLE;
                } else {
                    data[i + 1] = new int[len];
                    dataTypes[i] = GeogCSVReader.DATA_TYPE_INT;
                }
            } else {
                data[i + 1] = new String[len];
                dataTypes[i] = GeogCSVReader.DATA_TYPE_STRING;
            }
        }
    }
    data[0] = headers;

    String[] line = null;

    for (int row = dataBegin; row < len + dataBegin; row++) {

        line = fileContent[row];

        int[] ints = null;
        double[] doubles = null;
        String[] strings = null;

        for (int column = 0; column < line.length; column++) {
            String item = line[column];
            if (dataTypes[column] == GeogCSVReader.DATA_TYPE_INT) {

                if (Arrays.binarySearch(GeogCSVReader.NULL_STRINGS, item) >= 0) {
                    ints = (int[]) data[column + 1];
                    ints[row - dataBegin] = GeogCSVReader.NULL_INT;
                } else {
                    ints = (int[]) data[column + 1];
                    try {
                        ints[row - dataBegin] = Integer.parseInt(item);
                    } catch (NumberFormatException nfe) {
                        logger.warning("could not parse " + item + " in column " + column);
                        // nfe.printStackTrace();
                        ints[row - dataBegin] = GeogCSVReader.NULL_INT;
                    }
                }
            } else if (dataTypes[column] == GeogCSVReader.DATA_TYPE_DOUBLE) {
                if (Arrays.binarySearch(GeogCSVReader.NULL_STRINGS, item) >= 0) {
                    doubles = (double[]) data[column + 1];
                    doubles[row - dataBegin] = GeogCSVReader.NULL_DOUBLE;
                } else {
                    doubles = (double[]) data[column + 1];
                    doubles[row - dataBegin] = parseDouble(item);
                }
            } else if (dataTypes[column] == GeogCSVReader.DATA_TYPE_STRING) {
                strings = (String[]) data[column + 1];
                strings[row - dataBegin] = item;
            } else {
                throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[row]);
            } // end if

        } // next column
    } // next row
    return data;

}

From source file:com.raceup.fsae.test.TesterGui.java

/**
 * Parses data file, builds a Test/*from   www.  j av a2 s .co  m*/
 * @param pathToDataFile path to data csv file
 */
private void parseDataFileAndCreateTestOrFail(String pathToDataFile) {
    ArrayList<Question> questions = new ArrayList<>();
    CSVRecord[] rows = null;

    try {
        CSVParser parser = CSVFormat.DEFAULT.parse(new FileReader(pathToDataFile));
        rows = parser.getRecords().toArray(new CSVRecord[parser.getRecords().size()]);
    } catch (Exception e) {
        System.err.println(e.toString());
    }

    for (CSVRecord row : rows) { // each row represent a question
        ArrayList<Answer> answers = new ArrayList<>(); // list of answers
        if (row.size() > 1) {
            for (int i = 1; i < row.size(); i++) {
                if (row.get(i).length() > 0) {
                    answers.add(new Answer(row.get(i)));
                }
            }

            Answer correctAnswer = answers.get(0); // the correct
            // answer is always the first one
            String questionText = row.get(0);
            questions.add(
                    new Question(questionText, answers.toArray(new Answer[answers.size()]), correctAnswer)); // add to list of questions
        }
    }
    test = new Test(questions.toArray(new Question[questions.size()]));
}

From source file:com.datascience.hadoop.CsvRecordReader.java

@Override
public boolean next(LongWritable key, ListWritable<Text> value) throws IOException {
    value.clear();/*from  w  w  w.  ja  v a 2  s. co  m*/
    try {
        if (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            position++;
            colLength = colLength == null ? record.size() : colLength;
            if ((!record.isConsistent() || record.size() != colLength) && strict) {
                String message = String.format("%s: %s", "inconsistent record at position", position);
                throw new CsvParseException(message);
            }

            key.set(record.getRecordNumber());

            for (int i = 0; i < record.size(); i++) {
                String item = record.get(i);
                if (item == null) {
                    value.add(null);
                } else {
                    Text text = cache[i];
                    if (text == null) {
                        text = new Text();
                        cache[i] = text;
                    }
                    text.set(item);
                    value.add(text);
                }
            }
            //position = record.getCharacterPosition();
            return true;
        }

    } catch (Exception e) {
        LOGGER.warn("failed to parse record at position: " + position);
        if (strict) {
            throw e;
        } else {
            return next(key, value);
        }
    }
    return false;
}

From source file:ch.eitchnet.csvrestendpoint.marshaller.CsvDataToJsonMarshaller.java

/**
 * Returns true if the given {@link CSVRecord} is to be selected
 * /*  w  ww  .j av  a  2  s  . co  m*/
 * @param headerMap
 *            the map containing the column headers with the column index
 * 
 * @param record
 *            the {@link CSVRecord} to check
 * 
 * @return true if the record is to be selected, false if not
 */
protected boolean isSelected(Map<String, Integer> headerMap, CSVRecord record) {

    if (StringHelper.isEmpty(this.query))
        return true;

    if (this.queryFields.isEmpty()) {

        // iterate all possible fields and see if the query matches
        for (String value : record) {
            if (value.toLowerCase().contains(this.query))
                return true;
        }
    } else {

        // iterate only the query fields
        for (String queryField : this.queryFields) {
            String value = record.get(queryField);
            if (value.toLowerCase().contains(this.query))
                return true;
        }
    }

    return false;
}

From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManager.java

/**
 * This function is responsible for parsing a duplicate Stack Exchange thread TSV file produced by
 * {@link StackExchangeThreadSerializer}, and partitioning each such thread into the training set,
 * test set, or validation set. In addition, the corresponding row of the TSV file will be written
 * out to a training-, test-, or validation-set-specific TSV file in the same directory as the
 * input TSV file.//from  w w  w  .j  a  va2  s . c o m
 * 
 * @param dupQuestionFile - A TSV file containing duplicate {@link StackExchangeThread} records
 * @param trainTestValidateCumulativeProbs - A CDF of the desired proportion of training, test,
 *        and validation set records
 * @throws PipelineException
 */
private void parseTsvAndPartitionRecords(File dupQuestionFile, double[] trainTestValidateCumulativeProbs)
        throws PipelineException {
    // Open the TSV file for parsing, and CSVPrinters for outputting train,
    // test, and validation set
    // TSV files
    String baseName = FilenameUtils.removeExtension(dupQuestionFile.getAbsolutePath());
    String extension = FilenameUtils.getExtension(dupQuestionFile.getAbsolutePath());
    try (FileReader reader = new FileReader(dupQuestionFile);
            CSVPrinter trainSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TRAIN_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()));
            CSVPrinter testSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TEST_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()));
            CSVPrinter validationSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_VALIDATE_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()))) {

        // Parse the duplicate thread TSV file
        CSVParser parser = CSVFormat.TDF.withHeader().parse(reader);

        // Iterate over each CSV record, and place into a desired partition
        // (train, test, or
        // validation)
        Iterator<CSVRecord> recordIterator = parser.iterator();
        while (recordIterator.hasNext()) {
            CSVRecord record = recordIterator.next();

            // Get the StackExchangeThread associated with this record, and
            // create a question from it
            StackExchangeThread duplicateThread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(
                    record.get(CorpusBuilder.TSV_COL_HEADER_SERIALIZED_FILE_PATH));
            StackExchangeQuestion duplicateQuestion = new StackExchangeQuestion(duplicateThread);
            String parentId = record.get(CorpusBuilder.TSV_COL_HEADER_PARENT_ID);

            // Now drop this question into a partition, and write it to a
            // corresponding TSV file
            double p = rng.nextDouble(); // Random number determines
            // partition for this record
            if (p <= trainTestValidateCumulativeProbs[0]) {
                // This record goes in the training set
                if (!addQuestionToSet(duplicateQuestion, parentId, this.trainingSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.TRAINING_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                trainSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            } else if (p <= trainTestValidateCumulativeProbs[1]) {
                // This record goes in the test set
                if (!addQuestionToSet(duplicateQuestion, parentId, this.testSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.TEST_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                testSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            } else {
                // This record goes in the validation set
                assert (p <= trainTestValidateCumulativeProbs[2]);
                if (!addQuestionToSet(duplicateQuestion, parentId, this.validationSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.VALIDATION_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                validationSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            }
        }

        // Flush all the printers prior to closing
        trainSetPrinter.flush();
        testSetPrinter.flush();
        validationSetPrinter.flush();
    } catch (IOException | IngestionException e) {
        throw new PipelineException(e);
    }
}