Example usage for org.apache.commons.csv CSVRecord size

List of usage examples for org.apache.commons.csv CSVRecord size

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord size.

Prototype

public int size() 

Source Link

Document

Returns the number of values in this record.

Usage

From source file:nz.ac.waikato.cms.supernova.SupernovaCSV.java

public static void main(String[] args) throws Exception {
    ArgumentParser parser;//w w w . j a va2s.  c  om

    parser = ArgumentParsers.newArgumentParser("I am supernova");
    parser.description("Generates output according to 'I am supernova' by Keith Soo.\n"
            + "Loads scores/percentiles from a CSV file to generate multiple outputs at once.\n"
            + "Expected four columns (name of column is irrelevant):\n"
            + "- ID: the filename (excluding path and extension)\n" + "- Measure: the measure (" + MEASURE_LIST
            + ")\n" + "- Score: the score of the measure\n" + "- Percentile: the percentile of the measure\n"
            + "\n" + "Project homepage:\n" + "https://github.com/fracpete/i-am-supernova");

    // colors
    parser.addArgument("--" + AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.ORANGE))
            .help("The color for '" + AbstractOutputGenerator.OPENNESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.ORANGE) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.YELLOW))
            .help("The color for '" + AbstractOutputGenerator.EXTRAVERSION + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.YELLOW) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.GREEN))
            .help("The color for '" + AbstractOutputGenerator.AGREEABLENESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.GREEN) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.BLUE))
            .help("The color for '" + AbstractOutputGenerator.CONSCIENTIOUSNESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.BLUE) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.RED))
            .help("The color for '" + AbstractOutputGenerator.NEUROTICISM + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.RED) + ").");

    // other parameters
    parser.addArgument("--" + CSV).metavar(CSV).type(String.class).required(true)
            .help("The CSV file containing the scores/percentiles (header must be present).");

    parser.addArgument("--" + ID).metavar(ID).type(Integer.class).setDefault(1)
            .help("The 1-based index of the column in the CSV file containing the ID for the output file.");

    parser.addArgument("--" + MEASURE).metavar(MEASURE).type(Integer.class).setDefault(2)
            .help("The 1-based index of the column in the CSV file containing the measure name.\n"
                    + "Allowed values: " + MEASURE_LIST);

    parser.addArgument("--" + SCORE).metavar(SCORE).type(Integer.class).setDefault(3)
            .help("The 1-based index of the column in the CSV file containing the scores.");

    parser.addArgument("--" + PERCENTILE).metavar(PERCENTILE).type(Integer.class).setDefault(4)
            .help("The 1-based index of the column in the CSV file containing the percentiles.");

    parser.addArgument("--" + BACKGROUND).metavar(BACKGROUND).type(String.class)
            .setDefault(ColorHelper.toHex(Color.BLACK)).help("The background color.");

    parser.addArgument("--" + OPACITY).metavar(OPACITY).type(Double.class).setDefault(0.1)
            .help("The opacity (0-1).");

    parser.addArgument("--" + MARGIN).metavar(MARGIN).type(Double.class).setDefault(0.2)
            .help("The margin in the output (0-1).");

    parser.addArgument("--" + WIDTH).metavar(WIDTH).type(Integer.class).setDefault(2000)
            .help("The width of the output.");

    parser.addArgument("--" + HEIGHT).metavar(HEIGHT).type(Integer.class).setDefault(2000)
            .help("The height of the output.");

    parser.addArgument("--" + CENTER).metavar(CENTER).type(String.class).setDefault(Incenter.class.getName())
            .help("The name of the algorithm for calculating the center of a triangle.\n" + "Available: "
                    + Registry.toString(Registry.getCenters(), true));

    parser.addArgument("--" + GENERATOR).metavar(GENERATOR).type(String.class).setDefault(PNG.class.getName())
            .help("The name of the generator class to use.\n" + "Available: "
                    + Registry.toString(Registry.getGenerators(), true));

    parser.addArgument("--" + OUTPUT).metavar(OUTPUT).type(String.class)
            .help("The directory to store the output in.");

    parser.addArgument("--" + VERBOSE).metavar(VERBOSE).type(Boolean.class).action(Arguments.storeTrue())
            .help("Whether to output logging information.");

    Namespace namespace;
    try {
        namespace = parser.parseArgs(args);
    } catch (Exception e) {
        if (!(e instanceof HelpScreenException))
            parser.printHelp();
        return;
    }

    // colors
    Map<String, Color> colors = new HashMap<>();
    colors.put(AbstractOutputGenerator.OPENNESS, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX), Color.ORANGE));
    colors.put(AbstractOutputGenerator.EXTRAVERSION, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX), Color.YELLOW));
    colors.put(AbstractOutputGenerator.AGREEABLENESS, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX), Color.GREEN));
    colors.put(AbstractOutputGenerator.CONSCIENTIOUSNESS, ColorHelper.valueOf(
            namespace.getString(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX), Color.BLUE));
    colors.put(AbstractOutputGenerator.NEUROTICISM, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX), Color.RED));

    File outdir = new File(namespace.getString(OUTPUT));

    String centerCls = namespace.getString(CENTER);
    if (!centerCls.contains("."))
        centerCls = AbstractTriangleCenterCalculation.class.getPackage().getName() + "." + centerCls;
    String generatorCls = namespace.getString(GENERATOR);
    if (!generatorCls.contains("."))
        generatorCls = AbstractOutputGenerator.class.getPackage().getName() + "." + generatorCls;
    AbstractOutputGenerator generator = (AbstractOutputGenerator) Class.forName(generatorCls).newInstance();
    generator.setVerbose(namespace.getBoolean(VERBOSE));
    generator.setColors(colors);
    generator.setBackground(ColorHelper.valueOf(namespace.getString(BACKGROUND), Color.BLACK));
    generator.setOpacity(namespace.getDouble(OPACITY));
    generator.setMargin(namespace.getDouble(MARGIN));
    generator.setCenter((AbstractTriangleCenterCalculation) Class.forName(centerCls).newInstance());
    if (generator instanceof AbstractOutputGeneratorWithDimensions) {
        AbstractOutputGeneratorWithDimensions pixel = (AbstractOutputGeneratorWithDimensions) generator;
        pixel.setWidth(namespace.getInt(WIDTH));
        pixel.setHeight(namespace.getInt(HEIGHT));
    }

    int colID = namespace.getInt(ID) - 1;
    int colMeasure = namespace.getInt(MEASURE) - 1;
    int colScore = namespace.getInt(SCORE) - 1;
    int colPercentile = namespace.getInt(PERCENTILE) - 1;
    Reader reader = new FileReader(namespace.getString(CSV));
    CSVParser csvparser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
    String oldID = "";
    Map<String, List<Double>> test = new HashMap<>();
    for (CSVRecord rec : csvparser) {
        if (rec.size() < 4)
            continue;
        String id = rec.get(colID);
        if (!id.equals(oldID)) {
            if (!test.isEmpty()) {
                File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension());
                String msg = generator.generate(test, outfile);
                if (msg != null)
                    System.err.println("Failed to generate output for ID: " + oldID);
            }
            test.clear();
            oldID = id;
        }
        String measure = rec.get(colMeasure);
        double score = Double.parseDouble(rec.get(colScore));
        double percentile = Double.parseDouble(rec.get(colPercentile));
        test.put(measure, new ArrayList<>(Arrays.asList(new Double[] { score, percentile })));
    }
    if (!test.isEmpty()) {
        File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension());
        String msg = generator.generate(test, outfile);
        if (msg != null)
            System.err.println("Failed to generate output for ID: " + oldID);
    }
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Deserializes the serialized data, generating one or more {@link Graph}s.
 * @return A list of valid (if incomplete) {@link Graph}s. 
 * @throws SerializerNotConfiguredException if the object has not been configured.
 * @throws SerializationParametersMissingException if the parameters for this particular graph have not been set.
 * @throws SerializationException if errors occur during deserialization.
 *//*w w w .j a  va2 s .  co m*/
public Graph[] deserialize() throws SerializerNotConfiguredException, SerializationParametersMissingException,
        SerializationException {
    // if there are errors, accumlate as many as we can before throwing SerializationException
    SerializationException errors = null;

    Graph graph = new Graph();
    graph.setId(getName());
    // add layers to the graph
    // we don't just copy the whole schema, because that would imply that all the extra layers
    // contained no annotations, which is not necessarily true
    graph.addLayer((Layer) s.getParticipantLayer().clone());
    graph.getSchema().setParticipantLayerId(s.getParticipantLayer().getId());
    graph.addLayer((Layer) s.getTurnLayer().clone());
    graph.getSchema().setTurnLayerId(s.getTurnLayer().getId());
    graph.addLayer((Layer) s.getUtteranceLayer().clone());
    graph.getSchema().setUtteranceLayerId(s.getUtteranceLayer().getId());
    graph.addLayer((Layer) s.getWordLayer().clone());
    graph.getSchema().setWordLayerId(s.getWordLayer().getId());
    for (String layerId : mDiscoveredLayers.keySet()) {
        if (mDiscoveredLayers.get(layerId) != null) {
            graph.addLayer((Layer) mDiscoveredLayers.get(layerId).clone());
        }
    } // next layer

    // anchors
    for (CSVRecord line : mCsvData.get("anchor")) {
        if (line.get(1).equals("offset"))
            continue; // skip header line
        Anchor anchor = new Anchor(line.get(0), new Double(line.get(1)), new Integer(line.get(2)));
        graph.addAnchor(anchor);
        if (line.size() > 3) {
            String comment = line.get(3);
            if (comment.length() > 0) {
                anchor.put("comment", comment);
            }
        }
    } // next anchor
    mCsvData.remove("anchor");

    // layers
    for (String originalId : mCsvData.keySet()) {
        if (mDiscoveredLayers.get(originalId) != null) { // mapped to a schema layer
            try {
                readAnnotations(mCsvData.get(originalId), mDiscoveredLayers.get(originalId), graph);
            } catch (SerializationException exception) {
                if (errors == null) {
                    errors = exception;
                } else {
                    errors.addError(SerializationException.ErrorType.Other, exception.getMessage());
                }
            }
        } // mapped to a schema layer
    } // next layer

    if (errors != null)
        throw errors;
    Graph[] graphs = { graph };
    return graphs;
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Create annotations from the given CSV rows.
 * @param lines CSV records.//from w w  w.j  av  a 2 s . c o  m
 * @param layer Layer for the annotations.
 * @param graph Graph to add the annotations to.
 * @throws SerializationException On error.
 */
public void readAnnotations(Vector<CSVRecord> lines, Layer layer, Graph graph) throws SerializationException {
    // map header columns
    HashMap<String, Integer> mHeadings = new HashMap<String, Integer>();
    for (int c = 0; c < lines.elementAt(1).size(); c++) {
        String sHeader = lines.elementAt(1).get(c);
        if (sHeader.equalsIgnoreCase("id"))
            mHeadings.put("id", c);
        else if (sHeader.equalsIgnoreCase("startAnchor.id"))
            mHeadings.put("startAnchor.id", c);
        else if (sHeader.equalsIgnoreCase("endAnchor.id"))
            mHeadings.put("endAnchor.id", c);
        else if (sHeader.equalsIgnoreCase("label"))
            mHeadings.put("label", c);
        else if (sHeader.equalsIgnoreCase("labelStatus"))
            mHeadings.put("labelStatus", c);
        else if (sHeader.equalsIgnoreCase("turnAnnotationId"))
            mHeadings.put("turnAnnotationId", c);
        else if (sHeader.equalsIgnoreCase("ordinalInTurn"))
            mHeadings.put("ordinalInTurn", c);
        else if (sHeader.equalsIgnoreCase("wordAnnotationId"))
            mHeadings.put("wordAnnotationId", c);
        else if (sHeader.equalsIgnoreCase("ordinalInWord"))
            mHeadings.put("ordinalInWord", c);
        else if (sHeader.equalsIgnoreCase("segmentAnnotationId"))
            mHeadings.put("segmentAnnotationId", c);
    } // next header
    int highestHeaderIndex = 0;
    for (Integer i : mHeadings.values())
        highestHeaderIndex = Math.max(highestHeaderIndex, i);
    mHeadings.put("comment", highestHeaderIndex + 1);

    for (int i = 2; i < lines.size(); i++) {
        CSVRecord line = lines.elementAt(i);
        Annotation annotation = new Annotation(line.get(mHeadings.get("id")), line.get(mHeadings.get("label")),
                layer.getId(), line.get(mHeadings.get("startAnchor.id")),
                line.get(mHeadings.get("endAnchor.id")));
        annotation.setConfidence(new Integer(line.get(mHeadings.get("labelStatus"))));
        if (mHeadings.get("comment") < line.size()) {
            String comment = line.get(mHeadings.get("comment"));
            if (comment.length() > 0) {
                annotation.put("comment", comment);
            }
        }

        // parent
        if (layer.getParentId().equals("graph")) {
            annotation.setParentId(graph.getId());
        } else if (layer.getParentId().equals(graph.getSchema().getTurnLayerId())) {
            if (layer.getId().equals(graph.getSchema().getUtteranceLayerId())) {
                // make sure turn exists
                Annotation turn = graph.getAnnotation(line.get(mHeadings.get("turnAnnotationId")));
                if (turn == null) {

                    // make sure participant exists
                    Annotation participant = graph.getAnnotation(annotation.getLabel());
                    if (participant == null) {
                        participant = new Annotation(annotation.getLabel(), annotation.getLabel(),
                                graph.getSchema().getParticipantLayerId());
                        graph.addAnnotation(participant);
                    }

                    turn = new Annotation(line.get(mHeadings.get("turnAnnotationId")), annotation.getLabel(),
                            graph.getSchema().getTurnLayerId(),
                            // start/end IDs are set, but the anchor's themselves aren't added
                            line.get(mHeadings.get("turnAnnotationId")) + " start",
                            line.get(mHeadings.get("turnAnnotationId")) + " end", participant.getId());
                    graph.addAnnotation(turn);
                } // turn isn't there
            } // utterance layer
            annotation.setParentId(line.get(mHeadings.get("turnAnnotationId")));
        } else if (layer.getParentId().equals(graph.getSchema().getWordLayerId())) {
            annotation.setParentId(line.get(mHeadings.get("wordAnnotationId")));
        } else if (layer.getParentId().equals("segments")) {
            annotation.setParentId(line.get(mHeadings.get("segmentAnnotationId")));
        } else if (layer.getId().equals(graph.getSchema().getTurnLayerId())) { // turn layer
                                                                               // make sure participant exists
            Annotation participant = graph.getAnnotation(annotation.getLabel());
            if (participant == null) {
                participant = new Annotation(annotation.getLabel(), annotation.getLabel(),
                        graph.getSchema().getParticipantLayerId());
                graph.addAnnotation(participant);
            }
            annotation.setParentId(participant.getId());
        }

        // ordinal
        if (layer.getId().equals(graph.getSchema().getWordLayerId())) {
            annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInTurn"))));
        } else if (layer.getId().equals("segments")) {
            annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInWord"))));
        }
        graph.addAnnotation(annotation);
    }
}

From source file:onlinenewspopularity.DataFormatter.java

/**
 * Reads the file and randomly populates the data
 * @return matrix list//from   w w w  . j  a v  a2  s .co  m
 * The list has the following elements:
 * 1. List of features (mx1 ArrayList)
 * 2. Target column name
 * 3. Data for training (n1xm matrix)
 * 4. Target values for training data (n1x1 matrix)
 * 5. Test data (nxm matrix)
 * 6. Target values for test data (n2x2 matrix)
 * NOTE: n1 is the length of training data set.
 *       n2 is the length of test data set.
 *       n2 = Constants.SIZE*Constants.TEST_SET_RATIO
 *       n1 = Constants.SIZE-n2
 * @throws Exception 
 */
public List<Matrix> readData() throws Exception {
    try {
        try (Reader br = new FileReader(new File(fileName))) {
            Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(br);

            List features = new ArrayList<>();
            String predictColName;

            Iterator<CSVRecord> itr = records.iterator();
            CSVRecord header = itr.next();

            features.add(Constants.FEATURE_COL1_NAME);
            for (int i = Constants.INITIAL_FEATURE_INDEX; i < header.size() - 1; i++) {
                features.add(header.get(i).trim());
            }
            predictColName = header.get((header.size() - 1)).trim();

            trainStat = new double[2][features.size()];

            double[][] data = new double[Constants.SIZE][features.size()];
            double[][] res = new double[Constants.SIZE][1];
            boolean[] validFeature = new boolean[features.size()];
            int featureCount = 1;

            for (int i = 0; i < validFeature.length; i++) {
                validFeature[i] = Boolean.FALSE; //Not a valid feature by default
            }

            List indices = new ArrayList<>();
            int n = Constants.SIZE;
            for (int i = 0; i < n; i++) {
                indices.add(i);
            }
            Random randGen = new Random();

            validFeature[0] = Boolean.TRUE; //theta_0 is a valid feature
            int i = 0;
            for (CSVRecord record : records) {
                if (i < Constants.SIZE && !indices.isEmpty()) {
                    int index = (int) indices.get(randGen.nextInt(indices.size()));
                    for (int j = 0; j <= features.size(); j++) {
                        if (j == 0) {
                            data[index][j] = 1.0;
                        } else if (j == features.size()) {
                            res[index][0] = Double.parseDouble(record.get(record.size() - 1));
                        } else {
                            data[index][j] = Double
                                    .parseDouble(record.get(j + Constants.INITIAL_FEATURE_INDEX - 1));
                            if (data[index][j] != 0) {
                                if (validFeature[j] == Boolean.FALSE) {
                                    featureCount++;
                                    validFeature[j] = Boolean.TRUE;
                                }
                            }
                        }
                    }
                    indices.remove((Object) index);
                } else {
                    break;
                }
                i++;
            }

            //Remove empty features
            if (featureCount < features.size()) {
                List featuresCopy = new ArrayList<>();
                featuresCopy.addAll(features);
                double[][] newData = new double[Constants.SIZE][featureCount];
                int k = 0;
                int var = 0;

                for (int j = 0; j < featuresCopy.size(); j++) {
                    if (validFeature[j] == Boolean.TRUE) {
                        for (i = 0; i < Constants.SIZE; i++) {
                            newData[i][k] = data[i][j];
                        }
                        k++;
                    } else {
                        LOGGER.log(Level.INFO, "Removing empty feature: {0}", features.get(j - var));
                        features.remove(j - var);
                        var++;
                    }
                }

                data = newData;
            }

            int testLen = (int) (Constants.TEST_SET_RATIO * Constants.SIZE);
            int trainLen = Constants.SIZE - testLen;

            Matrix tmpx = new Matrix(data);
            Matrix tmpy = new Matrix(res);

            List temp = new ArrayList<>();
            temp.add(features);
            temp.add(predictColName);
            temp.add(tmpx.getMatrix(0, trainLen - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(0, trainLen - 1, 0, tmpy.getColumnDimension() - 1));
            temp.add(tmpx.getMatrix(trainLen, tmpx.getRowDimension() - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(trainLen, tmpy.getRowDimension() - 1, 0, tmpy.getColumnDimension() - 1));

            return temp;
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "{0}: {1}", new Object[] { e.getClass().getName(), e.getMessage() });
        throw e;
    }
}

From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java

@Test
public void testSerializeResources_NoColumnInfo() throws Exception {
    Result result = new ResultImpl(true);
    result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK));
    TreeNode<Resource> tree = result.getResultTree();

    List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() {
        {//from  ww w  .j av  a2s.  c o m
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1a");
                    put("property2", "value2a");
                    put("property3", "value3a");
                    put("property4", "value4a");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1'b");
                    put("property2", "value2'b");
                    put("property3", "value3'b");
                    put("property4", "value4'b");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1,c");
                    put("property2", "value2,c");
                    put("property3", "value3,c");
                    put("property4", "value4,c");
                }
            });
        }
    };

    tree.setName("items");
    tree.setProperty("isCollection", "true");

    addChildResource(tree, "resource", 0, data.get(0));
    addChildResource(tree, "resource", 1, data.get(1));
    addChildResource(tree, "resource", 2, data.get(2));

    replayAll();

    //execute test
    Object o = new CsvSerializer().serialize(result).toString().replace("\r", "");

    verifyAll();

    assertNotNull(o);

    StringReader reader = new StringReader(o.toString());
    CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
    List<CSVRecord> records = csvParser.getRecords();

    assertNotNull(records);
    assertEquals(3, records.size());

    int i = 0;
    for (CSVRecord record : records) {
        TreeMap<String, Object> actualData = data.get(i++);
        assertEquals(actualData.size(), record.size());

        for (String item : record) {
            assertTrue(actualData.containsValue(item));
        }
    }

    csvParser.close();
}

From source file:org.apache.ambari.view.hive.resources.uploads.parsers.csv.CSVIterator.java

@Override
public Row next() {
    CSVRecord row = iterator.next();
    Object[] values = new Object[row.size()];
    for (int i = 0; i < values.length; i++) {
        values[i] = row.get(i);/*from   ww w . java2 s  . c  o m*/
    }
    Row r = new Row(values);
    return r;
}

From source file:org.apache.batchee.csv.CSVFormatFactory.java

static CSVFormat newFormat(final String format, final String delimiter, final String quoteCharacter,
        final String quoteMode, final String commentMarker, final String escapeCharacter,
        final String ignoreSurroundingSpaces, final String ignoreEmptyLines, final String recordSeparator,
        final String nullString, final String headerComments, final String header,
        final String skipHeaderRecord, final String allowMissingColumnNames, final String readHeaders) {
    //CHECKSTYLE:ON
    CSVFormat out = format == null ? CSVFormat.DEFAULT : CSVFormat.valueOf(format);
    if (delimiter != null) {
        out = out.withDelimiter(delimiter.charAt(0));
    }/* w ww. ja  v  a2s .c om*/
    if (quoteCharacter != null) {
        out = out.withQuote(quoteCharacter.charAt(0));
    }
    if (quoteMode != null) {
        out = out.withQuoteMode(QuoteMode.valueOf(quoteMode));
    }
    if (commentMarker != null) {
        out = out.withCommentMarker(commentMarker.charAt(0));
    }
    if (escapeCharacter != null) {
        out = out.withEscape(escapeCharacter.charAt(0));
    }
    if (ignoreSurroundingSpaces != null) {
        out = out.withIgnoreSurroundingSpaces(Boolean.parseBoolean(ignoreSurroundingSpaces));
    }
    if (ignoreEmptyLines != null) {
        out = out.withIgnoreEmptyLines(Boolean.parseBoolean(ignoreEmptyLines));
    }
    if (recordSeparator != null) {
        if ("\\n".equals(recordSeparator)) {
            out = out.withRecordSeparator('\n');
        } else if ("\\r\\n".equals(recordSeparator)) {
            out = out.withRecordSeparator("\r\n");
        } else {
            out = out.withRecordSeparator(recordSeparator);
        }
    }
    if (nullString != null) {
        out = out.withNullString(nullString);
    }
    if (headerComments != null && !headerComments.trim().isEmpty()) {
        out = out.withHeaderComments(headerComments.split(" *, *"));
    }
    if (Boolean.parseBoolean(readHeaders)) {
        out = out.withHeader();
    }
    if (header != null && !header.trim().isEmpty()) {
        try { // headers can have CSV header names so parse it there
            final Iterator<CSVRecord> iterator = out.withHeader(new String[0])
                    .parse(new StringReader(header + '\n' + header)).iterator();
            final CSVRecord record = iterator.next();
            final List<String> list = new ArrayList<String>(record.size());
            for (final String h : record) {
                list.add(h);
            }
            out = out.withHeader(list.toArray(new String[record.size()]));
        } catch (final IOException e) { // can't occur actually
            out = out.withHeader(header.split(" *, *"));
        }
    }
    if (skipHeaderRecord != null) {
        out = out.withSkipHeaderRecord(Boolean.parseBoolean(skipHeaderRecord));
    }
    if (allowMissingColumnNames != null) {
        out = out.withAllowMissingColumnNames(Boolean.parseBoolean(allowMissingColumnNames));
    }
    return out;
}

From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java

public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line,
        BeamRecordSqlType beamRecordSqlType) {
    List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount());
    try (StringReader reader = new StringReader(line)) {
        CSVParser parser = csvFormat.parse(reader);
        CSVRecord rawRecord = parser.getRecords().get(0);

        if (rawRecord.size() != beamRecordSqlType.getFieldCount()) {
            throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d",
                    beamRecordSqlType.getFieldCount(), rawRecord.size()));
        } else {/*from ww  w . j a  v  a 2s .co  m*/
            for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) {
                String raw = rawRecord.get(idx);
                fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw));
            }
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("decodeRecord failed!", e);
    }
    return new BeamRecord(beamRecordSqlType, fieldsValue);
}

From source file:org.apache.nifi.csv.CSVRecordReader.java

@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields)
        throws IOException, MalformedRecordException {
    final RecordSchema schema = getSchema();

    final List<String> rawFieldNames = getRawFieldNames();
    final int numFieldNames = rawFieldNames.size();

    for (final CSVRecord csvRecord : csvParser) {
        final Map<String, Object> values = new LinkedHashMap<>();
        for (int i = 0; i < csvRecord.size(); i++) {
            final String rawFieldName = numFieldNames <= i ? "unknown_field_index_" + i : rawFieldNames.get(i);
            final String rawValue = csvRecord.get(i);

            final Optional<DataType> dataTypeOption = schema.getDataType(rawFieldName);

            if (!dataTypeOption.isPresent() && dropUnknownFields) {
                continue;
            }/*w  w  w  .  j  av a 2  s .co  m*/

            final Object value;
            if (coerceTypes && dataTypeOption.isPresent()) {
                value = convert(rawValue, dataTypeOption.get(), rawFieldName);
            } else if (dataTypeOption.isPresent()) {
                // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
                // dictate a field type. As a result, we will use the schema that we have to attempt to convert
                // the value into the desired type if it's a simple type.
                value = convertSimpleIfPossible(rawValue, dataTypeOption.get(), rawFieldName);
            } else {
                value = rawValue;
            }

            values.put(rawFieldName, value);
        }

        return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
    }

    return null;
}

From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;//from w ww  . j a  v a  2  s.  c om
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> outputAttrs = new HashMap<>();

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue();

            String unparsedRecord;
            // data source is the attribute
            if (StringUtils.isNotBlank(fromAttribute)) {
                unparsedRecord = original.getAttribute(fromAttribute);
                if (StringUtils.isBlank(unparsedRecord)) {
                    // will be routed to failure at the end of the method implementation
                    return;
                }
            } else {
                // data source is the content
                // TODO expose the charset property?
                LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
                if (!iterator.hasNext()) {
                    return;
                }
                unparsedRecord = iterator.next();
            }

            lineFound.set(true);
            final String format = context.getProperty(PROP_FORMAT).getValue();
            final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original)
                    .getValue();
            final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean();

            final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore
                    null); // no custom header

            final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord));
            List<CSVRecord> records = parser.getRecords();
            if (records.size() > 1) {
                // TODO revisit for NiFi's native micro-batching
                throw new ProcessException("Multi-line entries not supported");
            }

            CSVRecord record = records.get(0);

            Map<String, String> originalAttrs = original.getAttributes();
            // filter delimited schema attributes only
            Map<String, String> schemaAttrs = new HashMap<>();
            for (String key : originalAttrs.keySet()) {
                if (key.startsWith(schemaPrefix)) {
                    schemaAttrs.put(key, originalAttrs.get(key));
                }
            }

            // put key/value pairs into attributes
            for (int i = 0; i < record.size(); i++) {
                String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering
                if (columnName == null) {
                    // 1-based column index
                    columnName = String.valueOf(i + 1);
                }
                // TODO indexed schemaless parsing vs auto-schema vs user-provided schema
                String columnValue = record.get(i);
                if (trimValues) {
                    columnValue = columnValue.trim();
                }
                String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix)
                        + columnName;
                outputAttrs.put(attrName, columnValue);
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, outputAttrs);
        session.transfer(ff, REL_SUCCESS);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}