List of usage examples for org.apache.commons.csv CSVRecord size
public int size()
From source file:nz.ac.waikato.cms.supernova.SupernovaCSV.java
public static void main(String[] args) throws Exception { ArgumentParser parser;//w w w . j a va2s. c om parser = ArgumentParsers.newArgumentParser("I am supernova"); parser.description("Generates output according to 'I am supernova' by Keith Soo.\n" + "Loads scores/percentiles from a CSV file to generate multiple outputs at once.\n" + "Expected four columns (name of column is irrelevant):\n" + "- ID: the filename (excluding path and extension)\n" + "- Measure: the measure (" + MEASURE_LIST + ")\n" + "- Score: the score of the measure\n" + "- Percentile: the percentile of the measure\n" + "\n" + "Project homepage:\n" + "https://github.com/fracpete/i-am-supernova"); // colors parser.addArgument("--" + AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX) .metavar(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX).type(String.class) .setDefault(ColorHelper.toHex(Color.ORANGE)) .help("The color for '" + AbstractOutputGenerator.OPENNESS + "' in hex format (e.g., " + ColorHelper.toHex(Color.ORANGE) + ")."); parser.addArgument("--" + AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX) .metavar(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX).type(String.class) .setDefault(ColorHelper.toHex(Color.YELLOW)) .help("The color for '" + AbstractOutputGenerator.EXTRAVERSION + "' in hex format (e.g., " + ColorHelper.toHex(Color.YELLOW) + ")."); parser.addArgument("--" + AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX) .metavar(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX).type(String.class) .setDefault(ColorHelper.toHex(Color.GREEN)) .help("The color for '" + AbstractOutputGenerator.AGREEABLENESS + "' in hex format (e.g., " + ColorHelper.toHex(Color.GREEN) + ")."); parser.addArgument("--" + AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX) .metavar(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX).type(String.class) .setDefault(ColorHelper.toHex(Color.BLUE)) .help("The color for '" + AbstractOutputGenerator.CONSCIENTIOUSNESS + "' in hex format (e.g., " + ColorHelper.toHex(Color.BLUE) + ")."); parser.addArgument("--" + AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX) .metavar(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX).type(String.class) .setDefault(ColorHelper.toHex(Color.RED)) .help("The color for '" + AbstractOutputGenerator.NEUROTICISM + "' in hex format (e.g., " + ColorHelper.toHex(Color.RED) + ")."); // other parameters parser.addArgument("--" + CSV).metavar(CSV).type(String.class).required(true) .help("The CSV file containing the scores/percentiles (header must be present)."); parser.addArgument("--" + ID).metavar(ID).type(Integer.class).setDefault(1) .help("The 1-based index of the column in the CSV file containing the ID for the output file."); parser.addArgument("--" + MEASURE).metavar(MEASURE).type(Integer.class).setDefault(2) .help("The 1-based index of the column in the CSV file containing the measure name.\n" + "Allowed values: " + MEASURE_LIST); parser.addArgument("--" + SCORE).metavar(SCORE).type(Integer.class).setDefault(3) .help("The 1-based index of the column in the CSV file containing the scores."); parser.addArgument("--" + PERCENTILE).metavar(PERCENTILE).type(Integer.class).setDefault(4) .help("The 1-based index of the column in the CSV file containing the percentiles."); parser.addArgument("--" + BACKGROUND).metavar(BACKGROUND).type(String.class) .setDefault(ColorHelper.toHex(Color.BLACK)).help("The background color."); parser.addArgument("--" + OPACITY).metavar(OPACITY).type(Double.class).setDefault(0.1) .help("The opacity (0-1)."); parser.addArgument("--" + MARGIN).metavar(MARGIN).type(Double.class).setDefault(0.2) .help("The margin in the output (0-1)."); parser.addArgument("--" + WIDTH).metavar(WIDTH).type(Integer.class).setDefault(2000) .help("The width of the output."); parser.addArgument("--" + HEIGHT).metavar(HEIGHT).type(Integer.class).setDefault(2000) .help("The height of the output."); parser.addArgument("--" + CENTER).metavar(CENTER).type(String.class).setDefault(Incenter.class.getName()) .help("The name of the algorithm for calculating the center of a triangle.\n" + "Available: " + Registry.toString(Registry.getCenters(), true)); parser.addArgument("--" + GENERATOR).metavar(GENERATOR).type(String.class).setDefault(PNG.class.getName()) .help("The name of the generator class to use.\n" + "Available: " + Registry.toString(Registry.getGenerators(), true)); parser.addArgument("--" + OUTPUT).metavar(OUTPUT).type(String.class) .help("The directory to store the output in."); parser.addArgument("--" + VERBOSE).metavar(VERBOSE).type(Boolean.class).action(Arguments.storeTrue()) .help("Whether to output logging information."); Namespace namespace; try { namespace = parser.parseArgs(args); } catch (Exception e) { if (!(e instanceof HelpScreenException)) parser.printHelp(); return; } // colors Map<String, Color> colors = new HashMap<>(); colors.put(AbstractOutputGenerator.OPENNESS, ColorHelper .valueOf(namespace.getString(AbstractOutputGenerator.OPENNESS + COLOR_SUFFIX), Color.ORANGE)); colors.put(AbstractOutputGenerator.EXTRAVERSION, ColorHelper .valueOf(namespace.getString(AbstractOutputGenerator.EXTRAVERSION + COLOR_SUFFIX), Color.YELLOW)); colors.put(AbstractOutputGenerator.AGREEABLENESS, ColorHelper .valueOf(namespace.getString(AbstractOutputGenerator.AGREEABLENESS + COLOR_SUFFIX), Color.GREEN)); colors.put(AbstractOutputGenerator.CONSCIENTIOUSNESS, ColorHelper.valueOf( namespace.getString(AbstractOutputGenerator.CONSCIENTIOUSNESS + COLOR_SUFFIX), Color.BLUE)); colors.put(AbstractOutputGenerator.NEUROTICISM, ColorHelper .valueOf(namespace.getString(AbstractOutputGenerator.NEUROTICISM + COLOR_SUFFIX), Color.RED)); File outdir = new File(namespace.getString(OUTPUT)); String centerCls = namespace.getString(CENTER); if (!centerCls.contains(".")) centerCls = AbstractTriangleCenterCalculation.class.getPackage().getName() + "." + centerCls; String generatorCls = namespace.getString(GENERATOR); if (!generatorCls.contains(".")) generatorCls = AbstractOutputGenerator.class.getPackage().getName() + "." + generatorCls; AbstractOutputGenerator generator = (AbstractOutputGenerator) Class.forName(generatorCls).newInstance(); generator.setVerbose(namespace.getBoolean(VERBOSE)); generator.setColors(colors); generator.setBackground(ColorHelper.valueOf(namespace.getString(BACKGROUND), Color.BLACK)); generator.setOpacity(namespace.getDouble(OPACITY)); generator.setMargin(namespace.getDouble(MARGIN)); generator.setCenter((AbstractTriangleCenterCalculation) Class.forName(centerCls).newInstance()); if (generator instanceof AbstractOutputGeneratorWithDimensions) { AbstractOutputGeneratorWithDimensions pixel = (AbstractOutputGeneratorWithDimensions) generator; pixel.setWidth(namespace.getInt(WIDTH)); pixel.setHeight(namespace.getInt(HEIGHT)); } int colID = namespace.getInt(ID) - 1; int colMeasure = namespace.getInt(MEASURE) - 1; int colScore = namespace.getInt(SCORE) - 1; int colPercentile = namespace.getInt(PERCENTILE) - 1; Reader reader = new FileReader(namespace.getString(CSV)); CSVParser csvparser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); String oldID = ""; Map<String, List<Double>> test = new HashMap<>(); for (CSVRecord rec : csvparser) { if (rec.size() < 4) continue; String id = rec.get(colID); if (!id.equals(oldID)) { if (!test.isEmpty()) { File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension()); String msg = generator.generate(test, outfile); if (msg != null) System.err.println("Failed to generate output for ID: " + oldID); } test.clear(); oldID = id; } String measure = rec.get(colMeasure); double score = Double.parseDouble(rec.get(colScore)); double percentile = Double.parseDouble(rec.get(colPercentile)); test.put(measure, new ArrayList<>(Arrays.asList(new Double[] { score, percentile }))); } if (!test.isEmpty()) { File outfile = new File(outdir + File.separator + oldID + "." + generator.getExtension()); String msg = generator.generate(test, outfile); if (msg != null) System.err.println("Failed to generate output for ID: " + oldID); } }
From source file:nzilbb.agcsv.AgCsvDeserializer.java
/** * Deserializes the serialized data, generating one or more {@link Graph}s. * @return A list of valid (if incomplete) {@link Graph}s. * @throws SerializerNotConfiguredException if the object has not been configured. * @throws SerializationParametersMissingException if the parameters for this particular graph have not been set. * @throws SerializationException if errors occur during deserialization. *//*w w w .j a va2 s . co m*/ public Graph[] deserialize() throws SerializerNotConfiguredException, SerializationParametersMissingException, SerializationException { // if there are errors, accumlate as many as we can before throwing SerializationException SerializationException errors = null; Graph graph = new Graph(); graph.setId(getName()); // add layers to the graph // we don't just copy the whole schema, because that would imply that all the extra layers // contained no annotations, which is not necessarily true graph.addLayer((Layer) s.getParticipantLayer().clone()); graph.getSchema().setParticipantLayerId(s.getParticipantLayer().getId()); graph.addLayer((Layer) s.getTurnLayer().clone()); graph.getSchema().setTurnLayerId(s.getTurnLayer().getId()); graph.addLayer((Layer) s.getUtteranceLayer().clone()); graph.getSchema().setUtteranceLayerId(s.getUtteranceLayer().getId()); graph.addLayer((Layer) s.getWordLayer().clone()); graph.getSchema().setWordLayerId(s.getWordLayer().getId()); for (String layerId : mDiscoveredLayers.keySet()) { if (mDiscoveredLayers.get(layerId) != null) { graph.addLayer((Layer) mDiscoveredLayers.get(layerId).clone()); } } // next layer // anchors for (CSVRecord line : mCsvData.get("anchor")) { if (line.get(1).equals("offset")) continue; // skip header line Anchor anchor = new Anchor(line.get(0), new Double(line.get(1)), new Integer(line.get(2))); graph.addAnchor(anchor); if (line.size() > 3) { String comment = line.get(3); if (comment.length() > 0) { anchor.put("comment", comment); } } } // next anchor mCsvData.remove("anchor"); // layers for (String originalId : mCsvData.keySet()) { if (mDiscoveredLayers.get(originalId) != null) { // mapped to a schema layer try { readAnnotations(mCsvData.get(originalId), mDiscoveredLayers.get(originalId), graph); } catch (SerializationException exception) { if (errors == null) { errors = exception; } else { errors.addError(SerializationException.ErrorType.Other, exception.getMessage()); } } } // mapped to a schema layer } // next layer if (errors != null) throw errors; Graph[] graphs = { graph }; return graphs; }
From source file:nzilbb.agcsv.AgCsvDeserializer.java
/** * Create annotations from the given CSV rows. * @param lines CSV records.//from w w w.j av a 2 s . c o m * @param layer Layer for the annotations. * @param graph Graph to add the annotations to. * @throws SerializationException On error. */ public void readAnnotations(Vector<CSVRecord> lines, Layer layer, Graph graph) throws SerializationException { // map header columns HashMap<String, Integer> mHeadings = new HashMap<String, Integer>(); for (int c = 0; c < lines.elementAt(1).size(); c++) { String sHeader = lines.elementAt(1).get(c); if (sHeader.equalsIgnoreCase("id")) mHeadings.put("id", c); else if (sHeader.equalsIgnoreCase("startAnchor.id")) mHeadings.put("startAnchor.id", c); else if (sHeader.equalsIgnoreCase("endAnchor.id")) mHeadings.put("endAnchor.id", c); else if (sHeader.equalsIgnoreCase("label")) mHeadings.put("label", c); else if (sHeader.equalsIgnoreCase("labelStatus")) mHeadings.put("labelStatus", c); else if (sHeader.equalsIgnoreCase("turnAnnotationId")) mHeadings.put("turnAnnotationId", c); else if (sHeader.equalsIgnoreCase("ordinalInTurn")) mHeadings.put("ordinalInTurn", c); else if (sHeader.equalsIgnoreCase("wordAnnotationId")) mHeadings.put("wordAnnotationId", c); else if (sHeader.equalsIgnoreCase("ordinalInWord")) mHeadings.put("ordinalInWord", c); else if (sHeader.equalsIgnoreCase("segmentAnnotationId")) mHeadings.put("segmentAnnotationId", c); } // next header int highestHeaderIndex = 0; for (Integer i : mHeadings.values()) highestHeaderIndex = Math.max(highestHeaderIndex, i); mHeadings.put("comment", highestHeaderIndex + 1); for (int i = 2; i < lines.size(); i++) { CSVRecord line = lines.elementAt(i); Annotation annotation = new Annotation(line.get(mHeadings.get("id")), line.get(mHeadings.get("label")), layer.getId(), line.get(mHeadings.get("startAnchor.id")), line.get(mHeadings.get("endAnchor.id"))); annotation.setConfidence(new Integer(line.get(mHeadings.get("labelStatus")))); if (mHeadings.get("comment") < line.size()) { String comment = line.get(mHeadings.get("comment")); if (comment.length() > 0) { annotation.put("comment", comment); } } // parent if (layer.getParentId().equals("graph")) { annotation.setParentId(graph.getId()); } else if (layer.getParentId().equals(graph.getSchema().getTurnLayerId())) { if (layer.getId().equals(graph.getSchema().getUtteranceLayerId())) { // make sure turn exists Annotation turn = graph.getAnnotation(line.get(mHeadings.get("turnAnnotationId"))); if (turn == null) { // make sure participant exists Annotation participant = graph.getAnnotation(annotation.getLabel()); if (participant == null) { participant = new Annotation(annotation.getLabel(), annotation.getLabel(), graph.getSchema().getParticipantLayerId()); graph.addAnnotation(participant); } turn = new Annotation(line.get(mHeadings.get("turnAnnotationId")), annotation.getLabel(), graph.getSchema().getTurnLayerId(), // start/end IDs are set, but the anchor's themselves aren't added line.get(mHeadings.get("turnAnnotationId")) + " start", line.get(mHeadings.get("turnAnnotationId")) + " end", participant.getId()); graph.addAnnotation(turn); } // turn isn't there } // utterance layer annotation.setParentId(line.get(mHeadings.get("turnAnnotationId"))); } else if (layer.getParentId().equals(graph.getSchema().getWordLayerId())) { annotation.setParentId(line.get(mHeadings.get("wordAnnotationId"))); } else if (layer.getParentId().equals("segments")) { annotation.setParentId(line.get(mHeadings.get("segmentAnnotationId"))); } else if (layer.getId().equals(graph.getSchema().getTurnLayerId())) { // turn layer // make sure participant exists Annotation participant = graph.getAnnotation(annotation.getLabel()); if (participant == null) { participant = new Annotation(annotation.getLabel(), annotation.getLabel(), graph.getSchema().getParticipantLayerId()); graph.addAnnotation(participant); } annotation.setParentId(participant.getId()); } // ordinal if (layer.getId().equals(graph.getSchema().getWordLayerId())) { annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInTurn")))); } else if (layer.getId().equals("segments")) { annotation.setOrdinal(Integer.parseInt(line.get(mHeadings.get("ordinalInWord")))); } graph.addAnnotation(annotation); } }
From source file:onlinenewspopularity.DataFormatter.java
/** * Reads the file and randomly populates the data * @return matrix list//from w w w . j a v a2 s .co m * The list has the following elements: * 1. List of features (mx1 ArrayList) * 2. Target column name * 3. Data for training (n1xm matrix) * 4. Target values for training data (n1x1 matrix) * 5. Test data (nxm matrix) * 6. Target values for test data (n2x2 matrix) * NOTE: n1 is the length of training data set. * n2 is the length of test data set. * n2 = Constants.SIZE*Constants.TEST_SET_RATIO * n1 = Constants.SIZE-n2 * @throws Exception */ public List<Matrix> readData() throws Exception { try { try (Reader br = new FileReader(new File(fileName))) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(br); List features = new ArrayList<>(); String predictColName; Iterator<CSVRecord> itr = records.iterator(); CSVRecord header = itr.next(); features.add(Constants.FEATURE_COL1_NAME); for (int i = Constants.INITIAL_FEATURE_INDEX; i < header.size() - 1; i++) { features.add(header.get(i).trim()); } predictColName = header.get((header.size() - 1)).trim(); trainStat = new double[2][features.size()]; double[][] data = new double[Constants.SIZE][features.size()]; double[][] res = new double[Constants.SIZE][1]; boolean[] validFeature = new boolean[features.size()]; int featureCount = 1; for (int i = 0; i < validFeature.length; i++) { validFeature[i] = Boolean.FALSE; //Not a valid feature by default } List indices = new ArrayList<>(); int n = Constants.SIZE; for (int i = 0; i < n; i++) { indices.add(i); } Random randGen = new Random(); validFeature[0] = Boolean.TRUE; //theta_0 is a valid feature int i = 0; for (CSVRecord record : records) { if (i < Constants.SIZE && !indices.isEmpty()) { int index = (int) indices.get(randGen.nextInt(indices.size())); for (int j = 0; j <= features.size(); j++) { if (j == 0) { data[index][j] = 1.0; } else if (j == features.size()) { res[index][0] = Double.parseDouble(record.get(record.size() - 1)); } else { data[index][j] = Double .parseDouble(record.get(j + Constants.INITIAL_FEATURE_INDEX - 1)); if (data[index][j] != 0) { if (validFeature[j] == Boolean.FALSE) { featureCount++; validFeature[j] = Boolean.TRUE; } } } } indices.remove((Object) index); } else { break; } i++; } //Remove empty features if (featureCount < features.size()) { List featuresCopy = new ArrayList<>(); featuresCopy.addAll(features); double[][] newData = new double[Constants.SIZE][featureCount]; int k = 0; int var = 0; for (int j = 0; j < featuresCopy.size(); j++) { if (validFeature[j] == Boolean.TRUE) { for (i = 0; i < Constants.SIZE; i++) { newData[i][k] = data[i][j]; } k++; } else { LOGGER.log(Level.INFO, "Removing empty feature: {0}", features.get(j - var)); features.remove(j - var); var++; } } data = newData; } int testLen = (int) (Constants.TEST_SET_RATIO * Constants.SIZE); int trainLen = Constants.SIZE - testLen; Matrix tmpx = new Matrix(data); Matrix tmpy = new Matrix(res); List temp = new ArrayList<>(); temp.add(features); temp.add(predictColName); temp.add(tmpx.getMatrix(0, trainLen - 1, 0, tmpx.getColumnDimension() - 1)); temp.add(tmpy.getMatrix(0, trainLen - 1, 0, tmpy.getColumnDimension() - 1)); temp.add(tmpx.getMatrix(trainLen, tmpx.getRowDimension() - 1, 0, tmpx.getColumnDimension() - 1)); temp.add(tmpy.getMatrix(trainLen, tmpy.getRowDimension() - 1, 0, tmpy.getColumnDimension() - 1)); return temp; } } catch (Exception e) { LOGGER.log(Level.WARNING, "{0}: {1}", new Object[] { e.getClass().getName(), e.getMessage() }); throw e; } }
From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java
@Test public void testSerializeResources_NoColumnInfo() throws Exception { Result result = new ResultImpl(true); result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK)); TreeNode<Resource> tree = result.getResultTree(); List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() { {//from ww w .j av a2s. c o m add(new TreeMap<String, Object>() { { put("property1", "value1a"); put("property2", "value2a"); put("property3", "value3a"); put("property4", "value4a"); } }); add(new TreeMap<String, Object>() { { put("property1", "value1'b"); put("property2", "value2'b"); put("property3", "value3'b"); put("property4", "value4'b"); } }); add(new TreeMap<String, Object>() { { put("property1", "value1,c"); put("property2", "value2,c"); put("property3", "value3,c"); put("property4", "value4,c"); } }); } }; tree.setName("items"); tree.setProperty("isCollection", "true"); addChildResource(tree, "resource", 0, data.get(0)); addChildResource(tree, "resource", 1, data.get(1)); addChildResource(tree, "resource", 2, data.get(2)); replayAll(); //execute test Object o = new CsvSerializer().serialize(result).toString().replace("\r", ""); verifyAll(); assertNotNull(o); StringReader reader = new StringReader(o.toString()); CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT); List<CSVRecord> records = csvParser.getRecords(); assertNotNull(records); assertEquals(3, records.size()); int i = 0; for (CSVRecord record : records) { TreeMap<String, Object> actualData = data.get(i++); assertEquals(actualData.size(), record.size()); for (String item : record) { assertTrue(actualData.containsValue(item)); } } csvParser.close(); }
From source file:org.apache.ambari.view.hive.resources.uploads.parsers.csv.CSVIterator.java
@Override public Row next() { CSVRecord row = iterator.next(); Object[] values = new Object[row.size()]; for (int i = 0; i < values.length; i++) { values[i] = row.get(i);/*from ww w . java2 s . c o m*/ } Row r = new Row(values); return r; }
From source file:org.apache.batchee.csv.CSVFormatFactory.java
static CSVFormat newFormat(final String format, final String delimiter, final String quoteCharacter, final String quoteMode, final String commentMarker, final String escapeCharacter, final String ignoreSurroundingSpaces, final String ignoreEmptyLines, final String recordSeparator, final String nullString, final String headerComments, final String header, final String skipHeaderRecord, final String allowMissingColumnNames, final String readHeaders) { //CHECKSTYLE:ON CSVFormat out = format == null ? CSVFormat.DEFAULT : CSVFormat.valueOf(format); if (delimiter != null) { out = out.withDelimiter(delimiter.charAt(0)); }/* w ww. ja v a2s .c om*/ if (quoteCharacter != null) { out = out.withQuote(quoteCharacter.charAt(0)); } if (quoteMode != null) { out = out.withQuoteMode(QuoteMode.valueOf(quoteMode)); } if (commentMarker != null) { out = out.withCommentMarker(commentMarker.charAt(0)); } if (escapeCharacter != null) { out = out.withEscape(escapeCharacter.charAt(0)); } if (ignoreSurroundingSpaces != null) { out = out.withIgnoreSurroundingSpaces(Boolean.parseBoolean(ignoreSurroundingSpaces)); } if (ignoreEmptyLines != null) { out = out.withIgnoreEmptyLines(Boolean.parseBoolean(ignoreEmptyLines)); } if (recordSeparator != null) { if ("\\n".equals(recordSeparator)) { out = out.withRecordSeparator('\n'); } else if ("\\r\\n".equals(recordSeparator)) { out = out.withRecordSeparator("\r\n"); } else { out = out.withRecordSeparator(recordSeparator); } } if (nullString != null) { out = out.withNullString(nullString); } if (headerComments != null && !headerComments.trim().isEmpty()) { out = out.withHeaderComments(headerComments.split(" *, *")); } if (Boolean.parseBoolean(readHeaders)) { out = out.withHeader(); } if (header != null && !header.trim().isEmpty()) { try { // headers can have CSV header names so parse it there final Iterator<CSVRecord> iterator = out.withHeader(new String[0]) .parse(new StringReader(header + '\n' + header)).iterator(); final CSVRecord record = iterator.next(); final List<String> list = new ArrayList<String>(record.size()); for (final String h : record) { list.add(h); } out = out.withHeader(list.toArray(new String[record.size()])); } catch (final IOException e) { // can't occur actually out = out.withHeader(header.split(" *, *")); } } if (skipHeaderRecord != null) { out = out.withSkipHeaderRecord(Boolean.parseBoolean(skipHeaderRecord)); } if (allowMissingColumnNames != null) { out = out.withAllowMissingColumnNames(Boolean.parseBoolean(allowMissingColumnNames)); } return out; }
From source file:org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.java
public static BeamRecord csvLine2BeamSqlRow(CSVFormat csvFormat, String line, BeamRecordSqlType beamRecordSqlType) { List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount()); try (StringReader reader = new StringReader(line)) { CSVParser parser = csvFormat.parse(reader); CSVRecord rawRecord = parser.getRecords().get(0); if (rawRecord.size() != beamRecordSqlType.getFieldCount()) { throw new IllegalArgumentException(String.format("Expect %d fields, but actually %d", beamRecordSqlType.getFieldCount(), rawRecord.size())); } else {/*from ww w . j a v a 2s .co m*/ for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) { String raw = rawRecord.get(idx); fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw)); } } } catch (IOException e) { throw new IllegalArgumentException("decodeRecord failed!", e); } return new BeamRecord(beamRecordSqlType, fieldsValue); }
From source file:org.apache.nifi.csv.CSVRecordReader.java
@Override public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException { final RecordSchema schema = getSchema(); final List<String> rawFieldNames = getRawFieldNames(); final int numFieldNames = rawFieldNames.size(); for (final CSVRecord csvRecord : csvParser) { final Map<String, Object> values = new LinkedHashMap<>(); for (int i = 0; i < csvRecord.size(); i++) { final String rawFieldName = numFieldNames <= i ? "unknown_field_index_" + i : rawFieldNames.get(i); final String rawValue = csvRecord.get(i); final Optional<DataType> dataTypeOption = schema.getDataType(rawFieldName); if (!dataTypeOption.isPresent() && dropUnknownFields) { continue; }/*w w w . j av a 2 s .co m*/ final Object value; if (coerceTypes && dataTypeOption.isPresent()) { value = convert(rawValue, dataTypeOption.get(), rawFieldName); } else if (dataTypeOption.isPresent()) { // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to // dictate a field type. As a result, we will use the schema that we have to attempt to convert // the value into the desired type if it's a simple type. value = convertSimpleIfPossible(rawValue, dataTypeOption.get(), rawFieldName); } else { value = rawValue; } values.put(rawFieldName, value); } return new MapRecord(schema, values, coerceTypes, dropUnknownFields); } return null; }
From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile original = session.get(); if (original == null) { return;//from w ww . j a v a 2 s. c om } final AtomicBoolean lineFound = new AtomicBoolean(false); final Map<String, String> outputAttrs = new HashMap<>(); session.read(original, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue(); String unparsedRecord; // data source is the attribute if (StringUtils.isNotBlank(fromAttribute)) { unparsedRecord = original.getAttribute(fromAttribute); if (StringUtils.isBlank(unparsedRecord)) { // will be routed to failure at the end of the method implementation return; } } else { // data source is the content // TODO expose the charset property? LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8); if (!iterator.hasNext()) { return; } unparsedRecord = iterator.next(); } lineFound.set(true); final String format = context.getProperty(PROP_FORMAT).getValue(); final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original) .getValue(); final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean(); final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore null); // no custom header final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord)); List<CSVRecord> records = parser.getRecords(); if (records.size() > 1) { // TODO revisit for NiFi's native micro-batching throw new ProcessException("Multi-line entries not supported"); } CSVRecord record = records.get(0); Map<String, String> originalAttrs = original.getAttributes(); // filter delimited schema attributes only Map<String, String> schemaAttrs = new HashMap<>(); for (String key : originalAttrs.keySet()) { if (key.startsWith(schemaPrefix)) { schemaAttrs.put(key, originalAttrs.get(key)); } } // put key/value pairs into attributes for (int i = 0; i < record.size(); i++) { String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering if (columnName == null) { // 1-based column index columnName = String.valueOf(i + 1); } // TODO indexed schemaless parsing vs auto-schema vs user-provided schema String columnValue = record.get(i); if (trimValues) { columnValue = columnValue.trim(); } String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix) + columnName; outputAttrs.put(attrName, columnValue); } } }); if (lineFound.get()) { FlowFile ff = session.putAllAttributes(original, outputAttrs); session.transfer(ff, REL_SUCCESS); } else { session.transfer(original, REL_FAILURE); } }