Example usage for org.apache.commons.csv CSVRecord get

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord get.

Prototype

public String get(final String name)

Source Link

Document

Returns a value by name.

Usage

From source file:norbert.mynemo.dataimport.scraping.CkMapping.java

/**
 * Creates a mapping from a record. The record was usually created from a parser created by the
 * {@link #createParser(String)} method.
 *///from w w  w .j  a v  a 2s  .co m
public static CkMapping createMapping(CSVRecord record) {
    return new CkMapping(record.get(CK_MOVIE_HEADER), record.get(IMDB_MOVIE_HEADER));
}

From source file:norbert.mynemo.dataimport.scraping.CkRating.java

/**
 * Creates a rating from a record. The record was usually created from a parser created by the
 * {@link #createParser(String)} method.
 */// w w  w  .ja  v  a2s . c  o  m
public static CkRating createRating(CSVRecord record) {
    return new CkRating(record.get(USER_HEADER), record.get(MOVIE_HEADER), record.get(VALUE_HEADER));
}

From source file:notaql.engines.csv.CSVEngineEvaluator.java

/**
 * Evaluates the given transformation./*w w w .ja  v  a2  s .  co  m*/
 *
 * This first parses the document (with the first line being the header) and then evaluates on our framework.
 *
 * TODO: this assumes a header line. It might happen that it is not provided.
 *
 * @param transformation
 * @return
 */
@Override
public JavaRDD<ObjectValue> evaluate(Transformation transformation) {
    final SparkTransformationEvaluator evaluator = new SparkTransformationEvaluator(transformation);

    final JavaSparkContext sc = NotaQL.SparkFactory.getSparkContext();

    final CSVFormat format = CSVFormat.DEFAULT;

    final JavaRDD<String> csv = sc.textFile(path);

    final String first = csv.first();

    final CSVRecord header;
    try {
        header = format.parse(new StringReader(first)).iterator().next();
    } catch (IOException e) {
        e.printStackTrace();
        throw new AssertionError("Header could not be read for some reason.");
    }

    String[] headerCols = new String[header.size()];
    for (int i = 0; i < header.size(); i++) {
        headerCols[i] = header.get(i);
    }

    final CSVFormat headerFormat = CSVFormat.DEFAULT.withHeader(headerCols);

    final JavaRDD<CSVRecord> records = csv.filter(f -> !f.equals(first))
            .map(line -> headerFormat.parse(new StringReader(line)).iterator().next());

    final JavaRDD<Value> converted = records.map(ValueConverter::convertToNotaQL);

    final JavaRDD<Value> filtered = converted.filter(o -> transformation.satisfiesInPredicate((ObjectValue) o));

    return evaluator.process(filtered);
}

From source file:nz.ac.waikato.cms.supernova.gui.Supernova.java

/**
 * Generates the output of the "batch" tab.
 *///from w w w .  j av a 2s. c  om
protected void generateBatchOutput() {
    String cls;
    AbstractOutputGenerator generator;
    int colID;
    int colMeasure;
    int colScore;
    int colPercentile;
    Reader reader;
    CSVParser csvparser;
    String oldID;
    Map<String, List<Double>> test;
    String id;
    File outfile;
    String msg;
    String measure;
    double score;
    double percentile;
    String error;

    m_BatchLog.setText("");
    m_BatchGenerate.setEnabled(false);

    try {
        cls = AbstractOutputGenerator.class.getPackage().getName() + "." + m_SingleGenerator.getSelectedItem();
        generator = (AbstractOutputGenerator) Class.forName(cls).newInstance();
    } catch (Exception e) {
        batchLog("Failed to instantiate output generator - falling back on PNG", e);
        generator = new PNG();
    }

    try {
        colID = 0;
        colMeasure = 1;
        colScore = 2;
        colPercentile = 3;
        reader = new FileReader(m_BatchCSV.getCurrent());
        csvparser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
        oldID = "";
        test = new HashMap<>();
        for (CSVRecord rec : csvparser) {
            if (rec.size() < 4)
                continue;
            id = rec.get(colID);
            if (!id.equals(oldID)) {
                if (!test.isEmpty()) {
                    outfile = new File(m_BatchOutput.getCurrent() + File.separator + oldID + "."
                            + generator.getExtension());
                    batchLog("Generating: " + outfile, false);
                    batchLog("Using: " + test, false);
                    msg = generator.generate(test, outfile);
                    if (msg != null) {
                        error = "Failed to generate output for ID: " + oldID;
                        batchLog(error, true);
                    }
                }
                test.clear();
                oldID = id;
            }
            measure = rec.get(colMeasure);
            score = Double.parseDouble(rec.get(colScore));
            percentile = Double.parseDouble(rec.get(colPercentile));
            test.put(measure, new ArrayList<>(Arrays.asList(new Double[] { score, percentile })));
        }
        if (!test.isEmpty()) {
            outfile = new File(
                    m_BatchOutput.getCurrent() + File.separator + oldID + "." + generator.getExtension());
            batchLog("Generating: " + outfile, false);
            batchLog("Using: " + test, false);
            msg = generator.generate(test, outfile);
            if (msg != null) {
                error = "Failed to generate output for ID: " + oldID;
                batchLog(error, true);
            }
        }
    } catch (Exception e) {
        batchLog("Failed to generate output!", e);
    }

    m_BatchGenerate.setEnabled(true);
}

From source file:nz.ac.waikato.cms.supernova.SupernovaCSV.java

public static void main(String[] args) throws Exception {
    ArgumentParser parser;/*  w w w . j a v  a2s .c o m*/

    parser = ArgumentParsers.newArgumentParser("I am supernova");
    parser.description("Generates output according to 'I am supernova' by Keith Soo.\n"
            + "Loads scores/percentiles from a CSV file to generate multiple outputs at once.\n"
            + "Expected four columns (name of column is irrelevant):\n"
            + "- ID: the filename (excluding path and extension)\n" + "- Measure: the measure (" + MEASURE_LIST
            + ")\n" + "- Score: the score of the measure\n" + "- Percentile: the percentile of the measure\n"
            + "\n" + "Project homepage:\n" + "https://github.com/fracpete/i-am-supernova");

    // colors
    parser.addArgument("--" + AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.ORANGE))
            .help("The color for '" + AbstractOutputGenerator.OPENNESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.ORANGE) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.YELLOW))
            .help("The color for '" + AbstractOutputGenerator.EXTRAVERSION + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.YELLOW) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.GREEN))
            .help("The color for '" + AbstractOutputGenerator.AGREEABLENESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.GREEN) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.BLUE))
            .help("The color for '" + AbstractOutputGenerator.CONSCIENTIOUSNESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.BLUE) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.RED))
            .help("The color for '" + AbstractOutputGenerator.NEUROTICISM + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.RED) + ").");

    // other parameters
    parser.addArgument("--" + CSV).metavar(CSV).type(String.class).required(true)
            .help("The CSV file containing the scores/percentiles (header must be present).");

    parser.addArgument("--" + ID).metavar(ID).type(Integer.class).setDefault(1)
            .help("The 1-based index of the column in the CSV file containing the ID for the output file.");

    parser.addArgument("--" + MEASURE).metavar(MEASURE).type(Integer.class).setDefault(2)
            .help("The 1-based index of the column in the CSV file containing the measure name.\n"
                    + "Allowed values: " + MEASURE_LIST);

    parser.addArgument("--" + SCORE).metavar(SCORE).type(Integer.class).setDefault(3)
            .help("The 1-based index of the column in the CSV file containing the scores.");

    parser.addArgument("--" + PERCENTILE).metavar(PERCENTILE).type(Integer.class).setDefault(4)
            .help("The 1-based index of the column in the CSV file containing the percentiles.");

    parser.addArgument("--" + BACKGROUND).metavar(BACKGROUND).type(String.class)
            .setDefault(ColorHelper.toHex(Color.BLACK)).help("The background color.");

    parser.addArgument("--" + OPACITY).metavar(OPACITY).type(Double.class).setDefault(0.1)
            .help("The opacity (0-1).");

    parser.addArgument("--" + MARGIN).metavar(MARGIN).type(Double.class).setDefault(0.2)
            .help("The margin in the output (0-1).");

    parser.addArgument("--" + WIDTH).metavar(WIDTH).type(Integer.class).setDefault(2000)
            .help("The width of the output.");

    parser.addArgument("--" + HEIGHT).metavar(HEIGHT).type(Integer.class).setDefault(2000)
            .help("The height of the output.");

    parser.addArgument("--" + CENTER).metavar(CENTER).type(String.class).setDefault(Incenter.class.getName())
            .help("The name of the algorithm for calculating the center of a triangle.\n" + "Available: "
                    + Registry.toString(Registry.getCenters(), true));

    parser.addArgument("--" + GENERATOR).metavar(GENERATOR).type(String.class).setDefault(PNG.class.getName())
            .help("The name of the generator class to use.\n" + "Available: "
                    + Registry.toString(Registry.getGenerators(), true));

    parser.addArgument("--" + OUTPUT).metavar(OUTPUT).type(String.class)
            .help("The directory to store the output in.");

    parser.addArgument("--" + VERBOSE).metavar(VERBOSE).type(Boolean.class).action(Arguments.storeTrue())
            .help("Whether to output logging information.");

    Namespace namespace;
    try {
        namespace = parser.parseArgs(args);
    } catch (Exception e) {
        if (!(e instanceof HelpScreenException))
            parser.printHelp();
        return;
    }

    // colors
    Map<String, Color> colors = new HashMap<>();
    colors.put(AbstractOutputGenerator.OPENNESS, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX), Color.ORANGE));
    colors.put(AbstractOutputGenerator.EXTRAVERSION, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX), Color.YELLOW));
    colors.put(AbstractOutputGenerator.AGREEABLENESS, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX), Color.GREEN));
    colors.put(AbstractOutputGenerator.CONSCIENTIOUSNESS, ColorHelper.valueOf(
            namespace.getString(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX), Color.BLUE));
    colors.put(AbstractOutputGenerator.NEUROTICISM, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX), Color.RED));

    File outdir = new File(namespace.getString(OUTPUT));

    String centerCls = namespace.getString(CENTER);
    if (!centerCls.contains("."))
        centerCls = AbstractTriangleCenterCalculation.class.getPackage().getName() + "." + centerCls;
    String generatorCls = namespace.getString(GENERATOR);
    if (!generatorCls.contains("."))
        generatorCls = AbstractOutputGenerator.class.getPackage().getName() + "." + generatorCls;
    AbstractOutputGenerator generator = (AbstractOutputGenerator) Class.forName(generatorCls).newInstance();
    generator.setVerbose(namespace.getBoolean(VERBOSE));
    generator.setColors(colors);
    generator.setBackground(ColorHelper.valueOf(namespace.getString(BACKGROUND), Color.BLACK));
    generator.setOpacity(namespace.getDouble(OPACITY));
    generator.setMargin(namespace.getDouble(MARGIN));
    generator.setCenter((AbstractTriangleCenterCalculation) Class.forName(centerCls).newInstance());
    if (generator instanceof AbstractOutputGeneratorWithDimensions) {
        AbstractOutputGeneratorWithDimensions pixel = (AbstractOutputGeneratorWithDimensions) generator;
        pixel.setWidth(namespace.getInt(WIDTH));
        pixel.setHeight(namespace.getInt(HEIGHT));
    }

    int colID = namespace.getInt(ID) - 1;
    int colMeasure = namespace.getInt(MEASURE) - 1;
    int colScore = namespace.getInt(SCORE) - 1;
    int colPercentile = namespace.getInt(PERCENTILE) - 1;
    Reader reader = new FileReader(namespace.getString(CSV));
    CSVParser csvparser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
    String oldID = "";
    Map<String, List<Double>> test = new HashMap<>();
    for (CSVRecord rec : csvparser) {
        if (rec.size() < 4)
            continue;
        String id = rec.get(colID);
        if (!id.equals(oldID)) {
            if (!test.isEmpty()) {
                File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension());
                String msg = generator.generate(test, outfile);
                if (msg != null)
                    System.err.println("Failed to generate output for ID: " + oldID);
            }
            test.clear();
            oldID = id;
        }
        String measure = rec.get(colMeasure);
        double score = Double.parseDouble(rec.get(colScore));
        double percentile = Double.parseDouble(rec.get(colPercentile));
        test.put(measure, new ArrayList<>(Arrays.asList(new Double[] { score, percentile })));
    }
    if (!test.isEmpty()) {
        File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension());
        String msg = generator.generate(test, outfile);
        if (msg != null)
            System.err.println("Failed to generate output for ID: " + oldID);
    }
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Loads the serialized form of the graph, using the given set of named streams.
 * @param streams A list of named streams that contain all the transcription/annotation data required.
 * @param schema The layer schema, definining layers and the way they interrelate.
 * @return A list of parameters that require setting before {@link IDeserializer#deserialize()} can be invoked. This may be an empty list, and may include parameters with the value already set to a workable default. If there are parameters, and user interaction is possible, then the user may be presented with an interface for setting/confirming these parameters, before they are then passed to {@link IDeserializer#setParameters(ParameterSet)}.
 * @throws SerializationException If the graph could not be loaded.
 * @throws IOException On IO error./* w ww .j a v a  2 s  .  co  m*/
 * @throws SerializerNotConfiguredException If the configuration is not sufficient for deserialization.
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
public ParameterSet load(NamedStream[] streams, Schema schema)
        throws IOException, SerializationException, SerializerNotConfiguredException {
    if (getFieldDelimiter() == null)
        throw new SerializerNotConfiguredException("fieldDelimiter must be set.");
    ParameterSet parameters = new ParameterSet();

    // take the first csv stream, ignore all others.
    NamedStream csv = Utility.FindSingleStream(streams, ".csv", "text/csv");
    if (csv == null)
        throw new SerializationException("No CSV stream found");
    setName(csv.getName());
    setName(getName().replaceFirst("\\.csv$", "").replaceFirst("\\.ag$", ""));

    reset();

    CSVParser parser = new CSVParser(new InputStreamReader(csv.getStream()),
            CSVFormat.EXCEL.withDelimiter(fieldDelimiter.charAt(0)));
    mDiscoveredLayers = new HashMap<String, Layer>();
    Vector<CSVRecord> vRecords = new Vector<CSVRecord>();
    mCsvData.put("anchor", vRecords); // start with anchors

    // read all the lines, and extract the layer names
    for (CSVRecord line : parser) {
        // does it have only one field? - the layer name
        if (line.get(0).equals("layer")) {
            Layer layer = new Layer(line.get(1), line.get(2), Integer.parseInt(line.get(5)), true, // peers
                    false, // peersOverlap
                    false, // saturated
                    line.get(4).equals("W") ? schema.getWordLayerId() // parentId
                            : line.get(4).equals("M") ? schema.getTurnLayerId() // parentId
                                    : line.get(4).equals("F") ? "graph" : "segments", // parentId
                    true); // parentIncludes
            int layerId = Integer.parseInt(line.get(6));
            if (layerId == 11) // turn
            {
                layer.setParentId(schema.getParticipantLayerId());
            } else if (layerId == 12) // utterance
            {
                layer.setSaturated(true);
            } else if (layerId == 0) // transcription
            {
                layer.setParentId(schema.getTurnLayerId());
            } else if (layerId == 2) // orthography
            {
                layer.setPeers(false);
                layer.setSaturated(true);
            } else if (layerId == 1) // segments
            {
                layer.setSaturated(true);
            }
            layer.put("@layer_id", layerId);
            layer.put("@type", line.get(3));
            layer.put("@scope", line.get(4));
            mDiscoveredLayers.put(line.get(1), layer);
            Parameter p = new Parameter(layer.getId(), Layer.class, layer.getId(), layer.getDescription(),
                    true);
            p.setValue(schema.getLayer(layer.getId()));
            p.setPossibleValues(schema.getLayers().values());
            parameters.addParameter(p);

            // start a new set of records
            vRecords = new Vector<CSVRecord>();
            mCsvData.put(layer.getId(), vRecords);
        }
        vRecords.add(line);
    } // next line
    parser.close();

    return parameters;
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Deserializes the serialized data, generating one or more {@link Graph}s.
 * @return A list of valid (if incomplete) {@link Graph}s. 
 * @throws SerializerNotConfiguredException if the object has not been configured.
 * @throws SerializationParametersMissingException if the parameters for this particular graph have not been set.
 * @throws SerializationException if errors occur during deserialization.
 *///from   www  .j  a va 2s. com
public Graph[] deserialize() throws SerializerNotConfiguredException, SerializationParametersMissingException,
        SerializationException {
    // if there are errors, accumlate as many as we can before throwing SerializationException
    SerializationException errors = null;

    Graph graph = new Graph();
    graph.setId(getName());
    // add layers to the graph
    // we don't just copy the whole schema, because that would imply that all the extra layers
    // contained no annotations, which is not necessarily true
    graph.addLayer((Layer) s.getParticipantLayer().clone());
    graph.getSchema().setParticipantLayerId(s.getParticipantLayer().getId());
    graph.addLayer((Layer) s.getTurnLayer().clone());
    graph.getSchema().setTurnLayerId(s.getTurnLayer().getId());
    graph.addLayer((Layer) s.getUtteranceLayer().clone());
    graph.getSchema().setUtteranceLayerId(s.getUtteranceLayer().getId());
    graph.addLayer((Layer) s.getWordLayer().clone());
    graph.getSchema().setWordLayerId(s.getWordLayer().getId());
    for (String layerId : mDiscoveredLayers.keySet()) {
        if (mDiscoveredLayers.get(layerId) != null) {
            graph.addLayer((Layer) mDiscoveredLayers.get(layerId).clone());
        }
    } // next layer

    // anchors
    for (CSVRecord line : mCsvData.get("anchor")) {
        if (line.get(1).equals("offset"))
            continue; // skip header line
        Anchor anchor = new Anchor(line.get(0), new Double(line.get(1)), new Integer(line.get(2)));
        graph.addAnchor(anchor);
        if (line.size() > 3) {
            String comment = line.get(3);
            if (comment.length() > 0) {
                anchor.put("comment", comment);
            }
        }
    } // next anchor
    mCsvData.remove("anchor");

    // layers
    for (String originalId : mCsvData.keySet()) {
        if (mDiscoveredLayers.get(originalId) != null) { // mapped to a schema layer
            try {
                readAnnotations(mCsvData.get(originalId), mDiscoveredLayers.get(originalId), graph);
            } catch (SerializationException exception) {
                if (errors == null) {
                    errors = exception;
                } else {
                    errors.addError(SerializationException.ErrorType.Other, exception.getMessage());
                }
            }
        } // mapped to a schema layer
    } // next layer

    if (errors != null)
        throw errors;
    Graph[] graphs = { graph };
    return graphs;
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Create annotations from the given CSV rows.
 * @param lines CSV records./* w w  w .  ja  v a  2s .c o m*/
 * @param layer Layer for the annotations.
 * @param graph Graph to add the annotations to.
 * @throws SerializationException On error.
 */
public void readAnnotations(Vector<CSVRecord> lines, Layer layer, Graph graph) throws SerializationException {
    // map header columns
    HashMap<String, Integer> mHeadings = new HashMap<String, Integer>();
    for (int c = 0; c < lines.elementAt(1).size(); c++) {
        String sHeader = lines.elementAt(1).get(c);
        if (sHeader.equalsIgnoreCase("id"))
            mHeadings.put("id", c);
        else if (sHeader.equalsIgnoreCase("startAnchor.id"))
            mHeadings.put("startAnchor.id", c);
        else if (sHeader.equalsIgnoreCase("endAnchor.id"))
            mHeadings.put("endAnchor.id", c);
        else if (sHeader.equalsIgnoreCase("label"))
            mHeadings.put("label", c);
        else if (sHeader.equalsIgnoreCase("labelStatus"))
            mHeadings.put("labelStatus", c);
        else if (sHeader.equalsIgnoreCase("turnAnnotationId"))
            mHeadings.put("turnAnnotationId", c);
        else if (sHeader.equalsIgnoreCase("ordinalInTurn"))
            mHeadings.put("ordinalInTurn", c);
        else if (sHeader.equalsIgnoreCase("wordAnnotationId"))
            mHeadings.put("wordAnnotationId", c);
        else if (sHeader.equalsIgnoreCase("ordinalInWord"))
            mHeadings.put("ordinalInWord", c);
        else if (sHeader.equalsIgnoreCase("segmentAnnotationId"))
            mHeadings.put("segmentAnnotationId", c);
    } // next header
    int highestHeaderIndex = 0;
    for (Integer i : mHeadings.values())
        highestHeaderIndex = Math.max(highestHeaderIndex, i);
    mHeadings.put("comment", highestHeaderIndex + 1);

    for (int i = 2; i < lines.size(); i++) {
        CSVRecord line = lines.elementAt(i);
        Annotation annotation = new Annotation(line.get(mHeadings.get("id")), line.get(mHeadings.get("label")),
                layer.getId(), line.get(mHeadings.get("startAnchor.id")),
                line.get(mHeadings.get("endAnchor.id")));
        annotation.setConfidence(new Integer(line.get(mHeadings.get("labelStatus"))));
        if (mHeadings.get("comment") < line.size()) {
            String comment = line.get(mHeadings.get("comment"));
            if (comment.length() > 0) {
                annotation.put("comment", comment);
            }
        }

        // parent
        if (layer.getParentId().equals("graph")) {
            annotation.setParentId(graph.getId());
        } else if (layer.getParentId().equals(graph.getSchema().getTurnLayerId())) {
            if (layer.getId().equals(graph.getSchema().getUtteranceLayerId())) {
                // make sure turn exists
                Annotation turn = graph.getAnnotation(line.get(mHeadings.get("turnAnnotationId")));
                if (turn == null) {

                    // make sure participant exists
                    Annotation participant = graph.getAnnotation(annotation.getLabel());
                    if (participant == null) {
                        participant = new Annotation(annotation.getLabel(), annotation.getLabel(),
                                graph.getSchema().getParticipantLayerId());
                        graph.addAnnotation(participant);
                    }

                    turn = new Annotation(line.get(mHeadings.get("turnAnnotationId")), annotation.getLabel(),
                            graph.getSchema().getTurnLayerId(),
                            // start/end IDs are set, but the anchor's themselves aren't added
                            line.get(mHeadings.get("turnAnnotationId")) + " start",
                            line.get(mHeadings.get("turnAnnotationId")) + " end", participant.getId());
                    graph.addAnnotation(turn);
                } // turn isn't there
            } // utterance layer
            annotation.setParentId(line.get(mHeadings.get("turnAnnotationId")));
        } else if (layer.getParentId().equals(graph.getSchema().getWordLayerId())) {
            annotation.setParentId(line.get(mHeadings.get("wordAnnotationId")));
        } else if (layer.getParentId().equals("segments")) {
            annotation.setParentId(line.get(mHeadings.get("segmentAnnotationId")));
        } else if (layer.getId().equals(graph.getSchema().getTurnLayerId())) { // turn layer
                                                                               // make sure participant exists
            Annotation participant = graph.getAnnotation(annotation.getLabel());
            if (participant == null) {
                participant = new Annotation(annotation.getLabel(), annotation.getLabel(),
                        graph.getSchema().getParticipantLayerId());
                graph.addAnnotation(participant);
            }
            annotation.setParentId(participant.getId());
        }

        // ordinal
        if (layer.getId().equals(graph.getSchema().getWordLayerId())) {
            annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInTurn"))));
        } else if (layer.getId().equals("segments")) {
            annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInWord"))));
        }
        graph.addAnnotation(annotation);
    }
}

From source file:nzilbb.csv.CsvDeserializer.java

/**
 * Deserializes the serialized data, generating one or more {@link Graph}s.
 * <p>Many data formats will only yield one graph (e.g. Transcriber
 * transcript or Praat textgrid), however there are formats that
 * are capable of storing multiple transcripts in the same file
 * (e.g. AGTK, Transana XML export), which is why this method
 * returns a list./*from  w ww . j a va2 s.co  m*/
 * <p>This deserializer generates one graph per data row in the CSV file.
 * @return A list of valid (if incomplete) {@link Graph}s. 
 * @throws SerializerNotConfiguredException if the object has not been configured.
 * @throws SerializationParametersMissingException if the parameters for this particular graph have not been set.
 * @throws SerializationException if errors occur during deserialization.
 */
public Graph[] deserialize() throws SerializerNotConfiguredException, SerializationParametersMissingException,
        SerializationException {
    if (participantLayer == null)
        throw new SerializerNotConfiguredException("Participant layer not set");
    if (turnLayer == null)
        throw new SerializerNotConfiguredException("Turn layer not set");
    if (utteranceLayer == null)
        throw new SerializerNotConfiguredException("Utterance layer not set");
    if (wordLayer == null)
        throw new SerializerNotConfiguredException("Word layer not set");
    if (schema == null)
        throw new SerializerNotConfiguredException("Layer schema not set");

    validate();

    String participantColumn = (String) parameters.get("who").getValue();
    String textColumn = (String) parameters.get("text").getValue();

    // if there are errors, accumlate as many as we can before throwing SerializationException
    SerializationException errors = null;

    Vector<Graph> graphs = new Vector<Graph>();
    Iterator<CSVRecord> records = getParser().iterator();
    while (records.hasNext()) {
        CSVRecord record = records.next();
        Graph graph = new Graph();
        if (parameters == null || parameters.get("id") == null || parameters.get("id").getValue() == null) {
            graph.setId(getName() + "-" + record.getRecordNumber());
        } else {
            graph.setId(record.get((String) parameters.get("id").getValue()));
        }
        graph.setOffsetUnits(Constants.UNIT_CHARACTERS);

        // creat the 0 anchor to prevent graph tagging from creating one with no confidence
        Anchor firstAnchor = graph.getOrCreateAnchorAt(0.0, Constants.CONFIDENCE_MANUAL);
        Anchor lastAnchor = firstAnchor;

        // add layers to the graph
        // we don't just copy the whole schema, because that would imply that all the extra layers
        // contained no annotations, which is not necessarily true
        graph.addLayer((Layer) participantLayer.clone());
        graph.getSchema().setParticipantLayerId(participantLayer.getId());
        graph.addLayer((Layer) turnLayer.clone());
        graph.getSchema().setTurnLayerId(turnLayer.getId());
        graph.addLayer((Layer) utteranceLayer.clone());
        graph.getSchema().setUtteranceLayerId(utteranceLayer.getId());
        graph.addLayer((Layer) wordLayer.clone());
        graph.getSchema().setWordLayerId(wordLayer.getId());
        if (parameters != null) {
            for (Parameter p : parameters.values()) {
                if (p.getValue() instanceof Layer) {
                    Layer layer = (Layer) p.getValue();
                    if (layer != null && graph.getLayer(layer.getId()) == null) { // haven't added this layer yet
                        graph.addLayer((Layer) layer.clone());
                    }
                }
            }
        }

        // participant/author
        Annotation participant = graph.createTag(graph, schema.getParticipantLayerId(),
                record.get(participantColumn));

        // meta-data
        for (String header : getHeaderMap().keySet()) {
            if (header.trim().length() == 0)
                continue;
            Parameter p = parameters.get("header_" + getHeaderMap().get(header));
            if (p != null && p.getValue() != null) {
                Layer layer = (Layer) p.getValue();
                String value = record.get(header);
                if (layer.getParentId().equals(schema.getRoot().getId())) // graph tag
                {
                    graph.createTag(graph, layer.getId(), value);
                } else // participant tag
                {
                    graph.createTag(participant, layer.getId(), value);
                }
            } // parameter set
        } // next header

        // text
        Annotation turn = new Annotation(null, participant.getLabel(), getTurnLayer().getId());
        graph.addAnnotation(turn);
        turn.setParent(participant);
        turn.setStart(graph.getOrCreateAnchorAt(0.0, Constants.CONFIDENCE_MANUAL));
        Annotation line = new Annotation(null, turn.getLabel(), getUtteranceLayer().getId());
        line.setParentId(turn.getId());
        line.setStart(turn.getStart());
        int iLastPosition = 0;

        String sLine = record.get(textColumn).trim();
        int iNumChars = sLine.length();
        line = new Annotation(null, sLine, getUtteranceLayer().getId());
        line.setParentId(turn.getId());
        line.setStart(turn.getStart());
        Anchor end = graph.getOrCreateAnchorAt(((double) iNumChars + 1), Constants.CONFIDENCE_MANUAL);
        line.setEnd(end);
        graph.addAnnotation(line);

        // ensure we have an utterance tokenizer
        if (getTokenizer() == null) {
            setTokenizer(new SimpleTokenizer(getUtteranceLayer().getId(), getWordLayer().getId()));
        }
        try {
            tokenizer.transform(graph);
        } catch (TransformationException exception) {
            if (errors == null)
                errors = new SerializationException();
            if (errors.getCause() == null)
                errors.initCause(exception);
            errors.addError(SerializationException.ErrorType.Tokenization, exception.getMessage());
        }
        graph.commit();

        OrthographyClumper clumper = new OrthographyClumper(wordLayer.getId(), utteranceLayer.getId());
        try {
            // clump non-orthographic 'words' with real words
            clumper.transform(graph);
            graph.commit();
        } catch (TransformationException exception) {
            if (errors == null)
                errors = new SerializationException();
            if (errors.getCause() == null)
                errors.initCause(exception);
            errors.addError(SerializationException.ErrorType.Tokenization, exception.getMessage());
        }

        if (errors != null)
            throw errors;

        // set end anchors of graph tags
        for (Annotation a : graph.list(getParticipantLayer().getId())) {
            a.setStartId(firstAnchor.getId());
            a.setEndId(lastAnchor.getId());
        }

        graph.commit();

        graphs.add(graph);
    } // next record      

    return graphs.toArray(new Graph[0]);
}

From source file:onlinenewspopularity.DataFormatter.java

/**
 * Reads the file and randomly populates the data
 * @return matrix list//www. j  av  a  2 s  .  com
 * The list has the following elements:
 * 1. List of features (mx1 ArrayList)
 * 2. Target column name
 * 3. Data for training (n1xm matrix)
 * 4. Target values for training data (n1x1 matrix)
 * 5. Test data (nxm matrix)
 * 6. Target values for test data (n2x2 matrix)
 * NOTE: n1 is the length of training data set.
 *       n2 is the length of test data set.
 *       n2 = Constants.SIZE*Constants.TEST_SET_RATIO
 *       n1 = Constants.SIZE-n2
 * @throws Exception 
 */
public List<Matrix> readData() throws Exception {
    try {
        try (Reader br = new FileReader(new File(fileName))) {
            Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(br);

            List features = new ArrayList<>();
            String predictColName;

            Iterator<CSVRecord> itr = records.iterator();
            CSVRecord header = itr.next();

            features.add(Constants.FEATURE_COL1_NAME);
            for (int i = Constants.INITIAL_FEATURE_INDEX; i < header.size() - 1; i++) {
                features.add(header.get(i).trim());
            }
            predictColName = header.get((header.size() - 1)).trim();

            trainStat = new double[2][features.size()];

            double[][] data = new double[Constants.SIZE][features.size()];
            double[][] res = new double[Constants.SIZE][1];
            boolean[] validFeature = new boolean[features.size()];
            int featureCount = 1;

            for (int i = 0; i < validFeature.length; i++) {
                validFeature[i] = Boolean.FALSE; //Not a valid feature by default
            }

            List indices = new ArrayList<>();
            int n = Constants.SIZE;
            for (int i = 0; i < n; i++) {
                indices.add(i);
            }
            Random randGen = new Random();

            validFeature[0] = Boolean.TRUE; //theta_0 is a valid feature
            int i = 0;
            for (CSVRecord record : records) {
                if (i < Constants.SIZE && !indices.isEmpty()) {
                    int index = (int) indices.get(randGen.nextInt(indices.size()));
                    for (int j = 0; j <= features.size(); j++) {
                        if (j == 0) {
                            data[index][j] = 1.0;
                        } else if (j == features.size()) {
                            res[index][0] = Double.parseDouble(record.get(record.size() - 1));
                        } else {
                            data[index][j] = Double
                                    .parseDouble(record.get(j + Constants.INITIAL_FEATURE_INDEX - 1));
                            if (data[index][j] != 0) {
                                if (validFeature[j] == Boolean.FALSE) {
                                    featureCount++;
                                    validFeature[j] = Boolean.TRUE;
                                }
                            }
                        }
                    }
                    indices.remove((Object) index);
                } else {
                    break;
                }
                i++;
            }

            //Remove empty features
            if (featureCount < features.size()) {
                List featuresCopy = new ArrayList<>();
                featuresCopy.addAll(features);
                double[][] newData = new double[Constants.SIZE][featureCount];
                int k = 0;
                int var = 0;

                for (int j = 0; j < featuresCopy.size(); j++) {
                    if (validFeature[j] == Boolean.TRUE) {
                        for (i = 0; i < Constants.SIZE; i++) {
                            newData[i][k] = data[i][j];
                        }
                        k++;
                    } else {
                        LOGGER.log(Level.INFO, "Removing empty feature: {0}", features.get(j - var));
                        features.remove(j - var);
                        var++;
                    }
                }

                data = newData;
            }

            int testLen = (int) (Constants.TEST_SET_RATIO * Constants.SIZE);
            int trainLen = Constants.SIZE - testLen;

            Matrix tmpx = new Matrix(data);
            Matrix tmpy = new Matrix(res);

            List temp = new ArrayList<>();
            temp.add(features);
            temp.add(predictColName);
            temp.add(tmpx.getMatrix(0, trainLen - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(0, trainLen - 1, 0, tmpy.getColumnDimension() - 1));
            temp.add(tmpx.getMatrix(trainLen, tmpx.getRowDimension() - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(trainLen, tmpy.getRowDimension() - 1, 0, tmpy.getColumnDimension() - 1));

            return temp;
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "{0}: {1}", new Object[] { e.getClass().getName(), e.getMessage() });
        throw e;
    }
}