Example usage for org.apache.commons.csv CSVRecord get

List of usage examples for org.apache.commons.csv CSVRecord get

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord get.

Prototype

public String get(final String name) 

Source Link

Document

Returns a value by name.

Usage

From source file:norbert.mynemo.dataimport.scraping.CkMapping.java

/**
 * Creates a mapping from a record. The record was usually created from a parser created by the
 * {@link #createParser(String)} method.
 *///from w w  w .j  a v  a 2s  .co m
public static CkMapping createMapping(CSVRecord record) {
    return new CkMapping(record.get(CK_MOVIE_HEADER), record.get(IMDB_MOVIE_HEADER));
}

From source file:norbert.mynemo.dataimport.scraping.CkRating.java

/**
 * Creates a rating from a record. The record was usually created from a parser created by the
 * {@link #createParser(String)} method.
 */// w w  w  .ja  v  a2s . c  o  m
public static CkRating createRating(CSVRecord record) {
    return new CkRating(record.get(USER_HEADER), record.get(MOVIE_HEADER), record.get(VALUE_HEADER));
}

From source file:notaql.engines.csv.CSVEngineEvaluator.java

/**
 * Evaluates the given transformation./*w w w .ja  v  a2  s .  co  m*/
 *
 * This first parses the document (with the first line being the header) and then evaluates on our framework.
 *
 * TODO: this assumes a header line. It might happen that it is not provided.
 *
 * @param transformation
 * @return
 */
@Override
public JavaRDD<ObjectValue> evaluate(Transformation transformation) {
    final SparkTransformationEvaluator evaluator = new SparkTransformationEvaluator(transformation);

    final JavaSparkContext sc = NotaQL.SparkFactory.getSparkContext();

    final CSVFormat format = CSVFormat.DEFAULT;

    final JavaRDD<String> csv = sc.textFile(path);

    final String first = csv.first();

    final CSVRecord header;
    try {
        header = format.parse(new StringReader(first)).iterator().next();
    } catch (IOException e) {
        e.printStackTrace();
        throw new AssertionError("Header could not be read for some reason.");
    }

    String[] headerCols = new String[header.size()];
    for (int i = 0; i < header.size(); i++) {
        headerCols[i] = header.get(i);
    }

    final CSVFormat headerFormat = CSVFormat.DEFAULT.withHeader(headerCols);

    final JavaRDD<CSVRecord> records = csv.filter(f -> !f.equals(first))
            .map(line -> headerFormat.parse(new StringReader(line)).iterator().next());

    final JavaRDD<Value> converted = records.map(ValueConverter::convertToNotaQL);

    final JavaRDD<Value> filtered = converted.filter(o -> transformation.satisfiesInPredicate((ObjectValue) o));

    return evaluator.process(filtered);
}

From source file:nz.ac.waikato.cms.supernova.gui.Supernova.java

/**
 * Generates the output of the "batch" tab.
 *///from w w w .  j av a 2s. c  om
protected void generateBatchOutput() {
    String cls;
    AbstractOutputGenerator generator;
    int colID;
    int colMeasure;
    int colScore;
    int colPercentile;
    Reader reader;
    CSVParser csvparser;
    String oldID;
    Map<String, List<Double>> test;
    String id;
    File outfile;
    String msg;
    String measure;
    double score;
    double percentile;
    String error;

    m_BatchLog.setText("");
    m_BatchGenerate.setEnabled(false);

    try {
        cls = AbstractOutputGenerator.class.getPackage().getName() + "." + m_SingleGenerator.getSelectedItem();
        generator = (AbstractOutputGenerator) Class.forName(cls).newInstance();
    } catch (Exception e) {
        batchLog("Failed to instantiate output generator - falling back on PNG", e);
        generator = new PNG();
    }

    try {
        colID = 0;
        colMeasure = 1;
        colScore = 2;
        colPercentile = 3;
        reader = new FileReader(m_BatchCSV.getCurrent());
        csvparser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
        oldID = "";
        test = new HashMap<>();
        for (CSVRecord rec : csvparser) {
            if (rec.size() < 4)
                continue;
            id = rec.get(colID);
            if (!id.equals(oldID)) {
                if (!test.isEmpty()) {
                    outfile = new File(m_BatchOutput.getCurrent() + File.separator + oldID + "."
                            + generator.getExtension());
                    batchLog("Generating: " + outfile, false);
                    batchLog("Using: " + test, false);
                    msg = generator.generate(test, outfile);
                    if (msg != null) {
                        error = "Failed to generate output for ID: " + oldID;
                        batchLog(error, true);
                    }
                }
                test.clear();
                oldID = id;
            }
            measure = rec.get(colMeasure);
            score = Double.parseDouble(rec.get(colScore));
            percentile = Double.parseDouble(rec.get(colPercentile));
            test.put(measure, new ArrayList<>(Arrays.asList(new Double[] { score, percentile })));
        }
        if (!test.isEmpty()) {
            outfile = new File(
                    m_BatchOutput.getCurrent() + File.separator + oldID + "." + generator.getExtension());
            batchLog("Generating: " + outfile, false);
            batchLog("Using: " + test, false);
            msg = generator.generate(test, outfile);
            if (msg != null) {
                error = "Failed to generate output for ID: " + oldID;
                batchLog(error, true);
            }
        }
    } catch (Exception e) {
        batchLog("Failed to generate output!", e);
    }

    m_BatchGenerate.setEnabled(true);
}

From source file:nz.ac.waikato.cms.supernova.SupernovaCSV.java

public static void main(String[] args) throws Exception {
    ArgumentParser parser;/*  w w w . j a v  a2s .c o m*/

    parser = ArgumentParsers.newArgumentParser("I am supernova");
    parser.description("Generates output according to 'I am supernova' by Keith Soo.\n"
            + "Loads scores/percentiles from a CSV file to generate multiple outputs at once.\n"
            + "Expected four columns (name of column is irrelevant):\n"
            + "- ID: the filename (excluding path and extension)\n" + "- Measure: the measure (" + MEASURE_LIST
            + ")\n" + "- Score: the score of the measure\n" + "- Percentile: the percentile of the measure\n"
            + "\n" + "Project homepage:\n" + "https://github.com/fracpete/i-am-supernova");

    // colors
    parser.addArgument("--" + AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.ORANGE))
            .help("The color for '" + AbstractOutputGenerator.OPENNESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.ORANGE) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.YELLOW))
            .help("The color for '" + AbstractOutputGenerator.EXTRAVERSION + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.YELLOW) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.GREEN))
            .help("The color for '" + AbstractOutputGenerator.AGREEABLENESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.GREEN) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.BLUE))
            .help("The color for '" + AbstractOutputGenerator.CONSCIENTIOUSNESS + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.BLUE) + ").");
    parser.addArgument("--" + AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX)
            .metavar(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX).type(String.class)
            .setDefault(ColorHelper.toHex(Color.RED))
            .help("The color for '" + AbstractOutputGenerator.NEUROTICISM + "' in hex format (e.g., "
                    + ColorHelper.toHex(Color.RED) + ").");

    // other parameters
    parser.addArgument("--" + CSV).metavar(CSV).type(String.class).required(true)
            .help("The CSV file containing the scores/percentiles (header must be present).");

    parser.addArgument("--" + ID).metavar(ID).type(Integer.class).setDefault(1)
            .help("The 1-based index of the column in the CSV file containing the ID for the output file.");

    parser.addArgument("--" + MEASURE).metavar(MEASURE).type(Integer.class).setDefault(2)
            .help("The 1-based index of the column in the CSV file containing the measure name.\n"
                    + "Allowed values: " + MEASURE_LIST);

    parser.addArgument("--" + SCORE).metavar(SCORE).type(Integer.class).setDefault(3)
            .help("The 1-based index of the column in the CSV file containing the scores.");

    parser.addArgument("--" + PERCENTILE).metavar(PERCENTILE).type(Integer.class).setDefault(4)
            .help("The 1-based index of the column in the CSV file containing the percentiles.");

    parser.addArgument("--" + BACKGROUND).metavar(BACKGROUND).type(String.class)
            .setDefault(ColorHelper.toHex(Color.BLACK)).help("The background color.");

    parser.addArgument("--" + OPACITY).metavar(OPACITY).type(Double.class).setDefault(0.1)
            .help("The opacity (0-1).");

    parser.addArgument("--" + MARGIN).metavar(MARGIN).type(Double.class).setDefault(0.2)
            .help("The margin in the output (0-1).");

    parser.addArgument("--" + WIDTH).metavar(WIDTH).type(Integer.class).setDefault(2000)
            .help("The width of the output.");

    parser.addArgument("--" + HEIGHT).metavar(HEIGHT).type(Integer.class).setDefault(2000)
            .help("The height of the output.");

    parser.addArgument("--" + CENTER).metavar(CENTER).type(String.class).setDefault(Incenter.class.getName())
            .help("The name of the algorithm for calculating the center of a triangle.\n" + "Available: "
                    + Registry.toString(Registry.getCenters(), true));

    parser.addArgument("--" + GENERATOR).metavar(GENERATOR).type(String.class).setDefault(PNG.class.getName())
            .help("The name of the generator class to use.\n" + "Available: "
                    + Registry.toString(Registry.getGenerators(), true));

    parser.addArgument("--" + OUTPUT).metavar(OUTPUT).type(String.class)
            .help("The directory to store the output in.");

    parser.addArgument("--" + VERBOSE).metavar(VERBOSE).type(Boolean.class).action(Arguments.storeTrue())
            .help("Whether to output logging information.");

    Namespace namespace;
    try {
        namespace = parser.parseArgs(args);
    } catch (Exception e) {
        if (!(e instanceof HelpScreenException))
            parser.printHelp();
        return;
    }

    // colors
    Map<String, Color> colors = new HashMap<>();
    colors.put(AbstractOutputGenerator.OPENNESS, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX), Color.ORANGE));
    colors.put(AbstractOutputGenerator.EXTRAVERSION, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX), Color.YELLOW));
    colors.put(AbstractOutputGenerator.AGREEABLENESS, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX), Color.GREEN));
    colors.put(AbstractOutputGenerator.CONSCIENTIOUSNESS, ColorHelper.valueOf(
            namespace.getString(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX), Color.BLUE));
    colors.put(AbstractOutputGenerator.NEUROTICISM, ColorHelper
            .valueOf(namespace.getString(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX), Color.RED));

    File outdir = new File(namespace.getString(OUTPUT));

    String centerCls = namespace.getString(CENTER);
    if (!centerCls.contains("."))
        centerCls = AbstractTriangleCenterCalculation.class.getPackage().getName() + "." + centerCls;
    String generatorCls = namespace.getString(GENERATOR);
    if (!generatorCls.contains("."))
        generatorCls = AbstractOutputGenerator.class.getPackage().getName() + "." + generatorCls;
    AbstractOutputGenerator generator = (AbstractOutputGenerator) Class.forName(generatorCls).newInstance();
    generator.setVerbose(namespace.getBoolean(VERBOSE));
    generator.setColors(colors);
    generator.setBackground(ColorHelper.valueOf(namespace.getString(BACKGROUND), Color.BLACK));
    generator.setOpacity(namespace.getDouble(OPACITY));
    generator.setMargin(namespace.getDouble(MARGIN));
    generator.setCenter((AbstractTriangleCenterCalculation) Class.forName(centerCls).newInstance());
    if (generator instanceof AbstractOutputGeneratorWithDimensions) {
        AbstractOutputGeneratorWithDimensions pixel = (AbstractOutputGeneratorWithDimensions) generator;
        pixel.setWidth(namespace.getInt(WIDTH));
        pixel.setHeight(namespace.getInt(HEIGHT));
    }

    int colID = namespace.getInt(ID) - 1;
    int colMeasure = namespace.getInt(MEASURE) - 1;
    int colScore = namespace.getInt(SCORE) - 1;
    int colPercentile = namespace.getInt(PERCENTILE) - 1;
    Reader reader = new FileReader(namespace.getString(CSV));
    CSVParser csvparser = new CSVParser(reader, CSVFormat.EXCEL.withHeader());
    String oldID = "";
    Map<String, List<Double>> test = new HashMap<>();
    for (CSVRecord rec : csvparser) {
        if (rec.size() < 4)
            continue;
        String id = rec.get(colID);
        if (!id.equals(oldID)) {
            if (!test.isEmpty()) {
                File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension());
                String msg = generator.generate(test, outfile);
                if (msg != null)
                    System.err.println("Failed to generate output for ID: " + oldID);
            }
            test.clear();
            oldID = id;
        }
        String measure = rec.get(colMeasure);
        double score = Double.parseDouble(rec.get(colScore));
        double percentile = Double.parseDouble(rec.get(colPercentile));
        test.put(measure, new ArrayList<>(Arrays.asList(new Double[] { score, percentile })));
    }
    if (!test.isEmpty()) {
        File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension());
        String msg = generator.generate(test, outfile);
        if (msg != null)
            System.err.println("Failed to generate output for ID: " + oldID);
    }
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Loads the serialized form of the graph, using the given set of named streams.
 * @param streams A list of named streams that contain all the transcription/annotation data required.
 * @param schema The layer schema, definining layers and the way they interrelate.
 * @return A list of parameters that require setting before {@link IDeserializer#deserialize()} can be invoked. This may be an empty list, and may include parameters with the value already set to a workable default. If there are parameters, and user interaction is possible, then the user may be presented with an interface for setting/confirming these parameters, before they are then passed to {@link IDeserializer#setParameters(ParameterSet)}.
 * @throws SerializationException If the graph could not be loaded.
 * @throws IOException On IO error./* w ww .j a v a  2 s  .  co  m*/
 * @throws SerializerNotConfiguredException If the configuration is not sufficient for deserialization.
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
public ParameterSet load(NamedStream[] streams, Schema schema)
        throws IOException, SerializationException, SerializerNotConfiguredException {
    if (getFieldDelimiter() == null)
        throw new SerializerNotConfiguredException("fieldDelimiter must be set.");
    ParameterSet parameters = new ParameterSet();

    // take the first csv stream, ignore all others.
    NamedStream csv = Utility.FindSingleStream(streams, ".csv", "text/csv");
    if (csv == null)
        throw new SerializationException("No CSV stream found");
    setName(csv.getName());
    setName(getName().replaceFirst("\\.csv$", "").replaceFirst("\\.ag$", ""));

    reset();

    CSVParser parser = new CSVParser(new InputStreamReader(csv.getStream()),
            CSVFormat.EXCEL.withDelimiter(fieldDelimiter.charAt(0)));
    mDiscoveredLayers = new HashMap<String, Layer>();
    Vector<CSVRecord> vRecords = new Vector<CSVRecord>();
    mCsvData.put("anchor", vRecords); // start with anchors

    // read all the lines, and extract the layer names
    for (CSVRecord line : parser) {
        // does it have only one field? - the layer name
        if (line.get(0).equals("layer")) {
            Layer layer = new Layer(line.get(1), line.get(2), Integer.parseInt(line.get(5)), true, // peers
                    false, // peersOverlap
                    false, // saturated
                    line.get(4).equals("W") ? schema.getWordLayerId() // parentId
                            : line.get(4).equals("M") ? schema.getTurnLayerId() // parentId
                                    : line.get(4).equals("F") ? "graph" : "segments", // parentId
                    true); // parentIncludes
            int layerId = Integer.parseInt(line.get(6));
            if (layerId == 11) // turn
            {
                layer.setParentId(schema.getParticipantLayerId());
            } else if (layerId == 12) // utterance
            {
                layer.setSaturated(true);
            } else if (layerId == 0) // transcription
            {
                layer.setParentId(schema.getTurnLayerId());
            } else if (layerId == 2) // orthography
            {
                layer.setPeers(false);
                layer.setSaturated(true);
            } else if (layerId == 1) // segments
            {
                layer.setSaturated(true);
            }
            layer.put("@layer_id", layerId);
            layer.put("@type", line.get(3));
            layer.put("@scope", line.get(4));
            mDiscoveredLayers.put(line.get(1), layer);
            Parameter p = new Parameter(layer.getId(), Layer.class, layer.getId(), layer.getDescription(),
                    true);
            p.setValue(schema.getLayer(layer.getId()));
            p.setPossibleValues(schema.getLayers().values());
            parameters.addParameter(p);

            // start a new set of records
            vRecords = new Vector<CSVRecord>();
            mCsvData.put(layer.getId(), vRecords);
        }
        vRecords.add(line);
    } // next line
    parser.close();

    return parameters;
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Deserializes the serialized data, generating one or more {@link Graph}s.
 * @return A list of valid (if incomplete) {@link Graph}s. 
 * @throws SerializerNotConfiguredException if the object has not been configured.
 * @throws SerializationParametersMissingException if the parameters for this particular graph have not been set.
 * @throws SerializationException if errors occur during deserialization.
 *///from   www  .j  a va 2s. com
public Graph[] deserialize() throws SerializerNotConfiguredException, SerializationParametersMissingException,
        SerializationException {
    // if there are errors, accumlate as many as we can before throwing SerializationException
    SerializationException errors = null;

    Graph graph = new Graph();
    graph.setId(getName());
    // add layers to the graph
    // we don't just copy the whole schema, because that would imply that all the extra layers
    // contained no annotations, which is not necessarily true
    graph.addLayer((Layer) s.getParticipantLayer().clone());
    graph.getSchema().setParticipantLayerId(s.getParticipantLayer().getId());
    graph.addLayer((Layer) s.getTurnLayer().clone());
    graph.getSchema().setTurnLayerId(s.getTurnLayer().getId());
    graph.addLayer((Layer) s.getUtteranceLayer().clone());
    graph.getSchema().setUtteranceLayerId(s.getUtteranceLayer().getId());
    graph.addLayer((Layer) s.getWordLayer().clone());
    graph.getSchema().setWordLayerId(s.getWordLayer().getId());
    for (String layerId : mDiscoveredLayers.keySet()) {
        if (mDiscoveredLayers.get(layerId) != null) {
            graph.addLayer((Layer) mDiscoveredLayers.get(layerId).clone());
        }
    } // next layer

    // anchors
    for (CSVRecord line : mCsvData.get("anchor")) {
        if (line.get(1).equals("offset"))
            continue; // skip header line
        Anchor anchor = new Anchor(line.get(0), new Double(line.get(1)), new Integer(line.get(2)));
        graph.addAnchor(anchor);
        if (line.size() > 3) {
            String comment = line.get(3);
            if (comment.length() > 0) {
                anchor.put("comment", comment);
            }
        }
    } // next anchor
    mCsvData.remove("anchor");

    // layers
    for (String originalId : mCsvData.keySet()) {
        if (mDiscoveredLayers.get(originalId) != null) { // mapped to a schema layer
            try {
                readAnnotations(mCsvData.get(originalId), mDiscoveredLayers.get(originalId), graph);
            } catch (SerializationException exception) {
                if (errors == null) {
                    errors = exception;
                } else {
                    errors.addError(SerializationException.ErrorType.Other, exception.getMessage());
                }
            }
        } // mapped to a schema layer
    } // next layer

    if (errors != null)
        throw errors;
    Graph[] graphs = { graph };
    return graphs;
}

From source file:nzilbb.agcsv.AgCsvDeserializer.java

/**
 * Create annotations from the given CSV rows.
 * @param lines CSV records./* w w  w .  ja  v a  2s .c o m*/
 * @param layer Layer for the annotations.
 * @param graph Graph to add the annotations to.
 * @throws SerializationException On error.
 */
public void readAnnotations(Vector<CSVRecord> lines, Layer layer, Graph graph) throws SerializationException {
    // map header columns
    HashMap<String, Integer> mHeadings = new HashMap<String, Integer>();
    for (int c = 0; c < lines.elementAt(1).size(); c++) {
        String sHeader = lines.elementAt(1).get(c);
        if (sHeader.equalsIgnoreCase("id"))
            mHeadings.put("id", c);
        else if (sHeader.equalsIgnoreCase("startAnchor.id"))
            mHeadings.put("startAnchor.id", c);
        else if (sHeader.equalsIgnoreCase("endAnchor.id"))
            mHeadings.put("endAnchor.id", c);
        else if (sHeader.equalsIgnoreCase("label"))
            mHeadings.put("label", c);
        else if (sHeader.equalsIgnoreCase("labelStatus"))
            mHeadings.put("labelStatus", c);
        else if (sHeader.equalsIgnoreCase("turnAnnotationId"))
            mHeadings.put("turnAnnotationId", c);
        else if (sHeader.equalsIgnoreCase("ordinalInTurn"))
            mHeadings.put("ordinalInTurn", c);
        else if (sHeader.equalsIgnoreCase("wordAnnotationId"))
            mHeadings.put("wordAnnotationId", c);
        else if (sHeader.equalsIgnoreCase("ordinalInWord"))
            mHeadings.put("ordinalInWord", c);
        else if (sHeader.equalsIgnoreCase("segmentAnnotationId"))
            mHeadings.put("segmentAnnotationId", c);
    } // next header
    int highestHeaderIndex = 0;
    for (Integer i : mHeadings.values())
        highestHeaderIndex = Math.max(highestHeaderIndex, i);
    mHeadings.put("comment", highestHeaderIndex + 1);

    for (int i = 2; i < lines.size(); i++) {
        CSVRecord line = lines.elementAt(i);
        Annotation annotation = new Annotation(line.get(mHeadings.get("id")), line.get(mHeadings.get("label")),
                layer.getId(), line.get(mHeadings.get("startAnchor.id")),
                line.get(mHeadings.get("endAnchor.id")));
        annotation.setConfidence(new Integer(line.get(mHeadings.get("labelStatus"))));
        if (mHeadings.get("comment") < line.size()) {
            String comment = line.get(mHeadings.get("comment"));
            if (comment.length() > 0) {
                annotation.put("comment", comment);
            }
        }

        // parent
        if (layer.getParentId().equals("graph")) {
            annotation.setParentId(graph.getId());
        } else if (layer.getParentId().equals(graph.getSchema().getTurnLayerId())) {
            if (layer.getId().equals(graph.getSchema().getUtteranceLayerId())) {
                // make sure turn exists
                Annotation turn = graph.getAnnotation(line.get(mHeadings.get("turnAnnotationId")));
                if (turn == null) {

                    // make sure participant exists
                    Annotation participant = graph.getAnnotation(annotation.getLabel());
                    if (participant == null) {
                        participant = new Annotation(annotation.getLabel(), annotation.getLabel(),
                                graph.getSchema().getParticipantLayerId());
                        graph.addAnnotation(participant);
                    }

                    turn = new Annotation(line.get(mHeadings.get("turnAnnotationId")), annotation.getLabel(),
                            graph.getSchema().getTurnLayerId(),
                            // start/end IDs are set, but the anchor's themselves aren't added
                            line.get(mHeadings.get("turnAnnotationId")) + " start",
                            line.get(mHeadings.get("turnAnnotationId")) + " end", participant.getId());
                    graph.addAnnotation(turn);
                } // turn isn't there
            } // utterance layer
            annotation.setParentId(line.get(mHeadings.get("turnAnnotationId")));
        } else if (layer.getParentId().equals(graph.getSchema().getWordLayerId())) {
            annotation.setParentId(line.get(mHeadings.get("wordAnnotationId")));
        } else if (layer.getParentId().equals("segments")) {
            annotation.setParentId(line.get(mHeadings.get("segmentAnnotationId")));
        } else if (layer.getId().equals(graph.getSchema().getTurnLayerId())) { // turn layer
                                                                               // make sure participant exists
            Annotation participant = graph.getAnnotation(annotation.getLabel());
            if (participant == null) {
                participant = new Annotation(annotation.getLabel(), annotation.getLabel(),
                        graph.getSchema().getParticipantLayerId());
                graph.addAnnotation(participant);
            }
            annotation.setParentId(participant.getId());
        }

        // ordinal
        if (layer.getId().equals(graph.getSchema().getWordLayerId())) {
            annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInTurn"))));
        } else if (layer.getId().equals("segments")) {
            annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInWord"))));
        }
        graph.addAnnotation(annotation);
    }
}

From source file:nzilbb.csv.CsvDeserializer.java

/**
 * Deserializes the serialized data, generating one or more {@link Graph}s.
 * <p>Many data formats will only yield one graph (e.g. Transcriber
 * transcript or Praat textgrid), however there are formats that
 * are capable of storing multiple transcripts in the same file
 * (e.g. AGTK, Transana XML export), which is why this method
 * returns a list./*from  w ww . j a va2 s.co  m*/
 * <p>This deserializer generates one graph per data row in the CSV file.
 * @return A list of valid (if incomplete) {@link Graph}s. 
 * @throws SerializerNotConfiguredException if the object has not been configured.
 * @throws SerializationParametersMissingException if the parameters for this particular graph have not been set.
 * @throws SerializationException if errors occur during deserialization.
 */
public Graph[] deserialize() throws SerializerNotConfiguredException, SerializationParametersMissingException,
        SerializationException {
    if (participantLayer == null)
        throw new SerializerNotConfiguredException("Participant layer not set");
    if (turnLayer == null)
        throw new SerializerNotConfiguredException("Turn layer not set");
    if (utteranceLayer == null)
        throw new SerializerNotConfiguredException("Utterance layer not set");
    if (wordLayer == null)
        throw new SerializerNotConfiguredException("Word layer not set");
    if (schema == null)
        throw new SerializerNotConfiguredException("Layer schema not set");

    validate();

    String participantColumn = (String) parameters.get("who").getValue();
    String textColumn = (String) parameters.get("text").getValue();

    // if there are errors, accumlate as many as we can before throwing SerializationException
    SerializationException errors = null;

    Vector<Graph> graphs = new Vector<Graph>();
    Iterator<CSVRecord> records = getParser().iterator();
    while (records.hasNext()) {
        CSVRecord record = records.next();
        Graph graph = new Graph();
        if (parameters == null || parameters.get("id") == null || parameters.get("id").getValue() == null) {
            graph.setId(getName() + "-" + record.getRecordNumber());
        } else {
            graph.setId(record.get((String) parameters.get("id").getValue()));
        }
        graph.setOffsetUnits(Constants.UNIT_CHARACTERS);

        // creat the 0 anchor to prevent graph tagging from creating one with no confidence
        Anchor firstAnchor = graph.getOrCreateAnchorAt(0.0, Constants.CONFIDENCE_MANUAL);
        Anchor lastAnchor = firstAnchor;

        // add layers to the graph
        // we don't just copy the whole schema, because that would imply that all the extra layers
        // contained no annotations, which is not necessarily true
        graph.addLayer((Layer) participantLayer.clone());
        graph.getSchema().setParticipantLayerId(participantLayer.getId());
        graph.addLayer((Layer) turnLayer.clone());
        graph.getSchema().setTurnLayerId(turnLayer.getId());
        graph.addLayer((Layer) utteranceLayer.clone());
        graph.getSchema().setUtteranceLayerId(utteranceLayer.getId());
        graph.addLayer((Layer) wordLayer.clone());
        graph.getSchema().setWordLayerId(wordLayer.getId());
        if (parameters != null) {
            for (Parameter p : parameters.values()) {
                if (p.getValue() instanceof Layer) {
                    Layer layer = (Layer) p.getValue();
                    if (layer != null && graph.getLayer(layer.getId()) == null) { // haven't added this layer yet
                        graph.addLayer((Layer) layer.clone());
                    }
                }
            }
        }

        // participant/author
        Annotation participant = graph.createTag(graph, schema.getParticipantLayerId(),
                record.get(participantColumn));

        // meta-data
        for (String header : getHeaderMap().keySet()) {
            if (header.trim().length() == 0)
                continue;
            Parameter p = parameters.get("header_" + getHeaderMap().get(header));
            if (p != null && p.getValue() != null) {
                Layer layer = (Layer) p.getValue();
                String value = record.get(header);
                if (layer.getParentId().equals(schema.getRoot().getId())) // graph tag
                {
                    graph.createTag(graph, layer.getId(), value);
                } else // participant tag
                {
                    graph.createTag(participant, layer.getId(), value);
                }
            } // parameter set
        } // next header

        // text
        Annotation turn = new Annotation(null, participant.getLabel(), getTurnLayer().getId());
        graph.addAnnotation(turn);
        turn.setParent(participant);
        turn.setStart(graph.getOrCreateAnchorAt(0.0, Constants.CONFIDENCE_MANUAL));
        Annotation line = new Annotation(null, turn.getLabel(), getUtteranceLayer().getId());
        line.setParentId(turn.getId());
        line.setStart(turn.getStart());
        int iLastPosition = 0;

        String sLine = record.get(textColumn).trim();
        int iNumChars = sLine.length();
        line = new Annotation(null, sLine, getUtteranceLayer().getId());
        line.setParentId(turn.getId());
        line.setStart(turn.getStart());
        Anchor end = graph.getOrCreateAnchorAt(((double) iNumChars + 1), Constants.CONFIDENCE_MANUAL);
        line.setEnd(end);
        graph.addAnnotation(line);

        // ensure we have an utterance tokenizer
        if (getTokenizer() == null) {
            setTokenizer(new SimpleTokenizer(getUtteranceLayer().getId(), getWordLayer().getId()));
        }
        try {
            tokenizer.transform(graph);
        } catch (TransformationException exception) {
            if (errors == null)
                errors = new SerializationException();
            if (errors.getCause() == null)
                errors.initCause(exception);
            errors.addError(SerializationException.ErrorType.Tokenization, exception.getMessage());
        }
        graph.commit();

        OrthographyClumper clumper = new OrthographyClumper(wordLayer.getId(), utteranceLayer.getId());
        try {
            // clump non-orthographic 'words' with real words
            clumper.transform(graph);
            graph.commit();
        } catch (TransformationException exception) {
            if (errors == null)
                errors = new SerializationException();
            if (errors.getCause() == null)
                errors.initCause(exception);
            errors.addError(SerializationException.ErrorType.Tokenization, exception.getMessage());
        }

        if (errors != null)
            throw errors;

        // set end anchors of graph tags
        for (Annotation a : graph.list(getParticipantLayer().getId())) {
            a.setStartId(firstAnchor.getId());
            a.setEndId(lastAnchor.getId());
        }

        graph.commit();

        graphs.add(graph);
    } // next record      

    return graphs.toArray(new Graph[0]);
}

From source file:onlinenewspopularity.DataFormatter.java

/**
 * Reads the file and randomly populates the data
 * @return matrix list//www. j  av  a  2 s  .  com
 * The list has the following elements:
 * 1. List of features (mx1 ArrayList)
 * 2. Target column name
 * 3. Data for training (n1xm matrix)
 * 4. Target values for training data (n1x1 matrix)
 * 5. Test data (nxm matrix)
 * 6. Target values for test data (n2x2 matrix)
 * NOTE: n1 is the length of training data set.
 *       n2 is the length of test data set.
 *       n2 = Constants.SIZE*Constants.TEST_SET_RATIO
 *       n1 = Constants.SIZE-n2
 * @throws Exception 
 */
public List<Matrix> readData() throws Exception {
    try {
        try (Reader br = new FileReader(new File(fileName))) {
            Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(br);

            List features = new ArrayList<>();
            String predictColName;

            Iterator<CSVRecord> itr = records.iterator();
            CSVRecord header = itr.next();

            features.add(Constants.FEATURE_COL1_NAME);
            for (int i = Constants.INITIAL_FEATURE_INDEX; i < header.size() - 1; i++) {
                features.add(header.get(i).trim());
            }
            predictColName = header.get((header.size() - 1)).trim();

            trainStat = new double[2][features.size()];

            double[][] data = new double[Constants.SIZE][features.size()];
            double[][] res = new double[Constants.SIZE][1];
            boolean[] validFeature = new boolean[features.size()];
            int featureCount = 1;

            for (int i = 0; i < validFeature.length; i++) {
                validFeature[i] = Boolean.FALSE; //Not a valid feature by default
            }

            List indices = new ArrayList<>();
            int n = Constants.SIZE;
            for (int i = 0; i < n; i++) {
                indices.add(i);
            }
            Random randGen = new Random();

            validFeature[0] = Boolean.TRUE; //theta_0 is a valid feature
            int i = 0;
            for (CSVRecord record : records) {
                if (i < Constants.SIZE && !indices.isEmpty()) {
                    int index = (int) indices.get(randGen.nextInt(indices.size()));
                    for (int j = 0; j <= features.size(); j++) {
                        if (j == 0) {
                            data[index][j] = 1.0;
                        } else if (j == features.size()) {
                            res[index][0] = Double.parseDouble(record.get(record.size() - 1));
                        } else {
                            data[index][j] = Double
                                    .parseDouble(record.get(j + Constants.INITIAL_FEATURE_INDEX - 1));
                            if (data[index][j] != 0) {
                                if (validFeature[j] == Boolean.FALSE) {
                                    featureCount++;
                                    validFeature[j] = Boolean.TRUE;
                                }
                            }
                        }
                    }
                    indices.remove((Object) index);
                } else {
                    break;
                }
                i++;
            }

            //Remove empty features
            if (featureCount < features.size()) {
                List featuresCopy = new ArrayList<>();
                featuresCopy.addAll(features);
                double[][] newData = new double[Constants.SIZE][featureCount];
                int k = 0;
                int var = 0;

                for (int j = 0; j < featuresCopy.size(); j++) {
                    if (validFeature[j] == Boolean.TRUE) {
                        for (i = 0; i < Constants.SIZE; i++) {
                            newData[i][k] = data[i][j];
                        }
                        k++;
                    } else {
                        LOGGER.log(Level.INFO, "Removing empty feature: {0}", features.get(j - var));
                        features.remove(j - var);
                        var++;
                    }
                }

                data = newData;
            }

            int testLen = (int) (Constants.TEST_SET_RATIO * Constants.SIZE);
            int trainLen = Constants.SIZE - testLen;

            Matrix tmpx = new Matrix(data);
            Matrix tmpy = new Matrix(res);

            List temp = new ArrayList<>();
            temp.add(features);
            temp.add(predictColName);
            temp.add(tmpx.getMatrix(0, trainLen - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(0, trainLen - 1, 0, tmpy.getColumnDimension() - 1));
            temp.add(tmpx.getMatrix(trainLen, tmpx.getRowDimension() - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(trainLen, tmpy.getRowDimension() - 1, 0, tmpy.getColumnDimension() - 1));

            return temp;
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "{0}: {1}", new Object[] { e.getClass().getName(), e.getMessage() });
        throw e;
    }
}