List of usage examples for org.apache.commons.csv CSVParser CSVParser
public CSVParser(final Reader reader, final CSVFormat format) throws IOException
If you do not read all records from the given reader , you should call #close() on the parser, unless you close the reader .
From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
/** * 1. Gets the file header or creates one. * 2. Sets the number of columns// ww w . j av a 2 s .c om * 3. Checks the type of data stored in each variable using the first rowsToScan rows. * Variables are integers by default. This method will change the data type to either * double or string. * */ private void setDataTypes() { CSVParser parser = null; Reader reader = null; try { reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); //Get column names from variable attributes colNames = new String[variableAttributeMap.size()]; int index = 0; Iterator<VariableName> iter = variableAttributeMap.keySet().iterator(); VariableName tempName = null; while (iter.hasNext()) { colNames[index++] = iter.next().toString(); } //Create a parser with variable names from the variable attributes if (hasHeader) { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true).withCommentMarker('#')); } else { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withCommentMarker('#')); } //Check data types in each column. String value = ""; Iterator<CSVRecord> csvIter = parser.iterator(); CSVRecord csvRecord = null; double testValue = 0; nrow = 0; while (csvIter.hasNext()) { csvRecord = csvIter.next(); iter = variableAttributeMap.keySet().iterator(); while (iter.hasNext()) { tempName = iter.next(); value = csvRecord.get(tempName.toString()).trim(); //Check that string can be converted to double. If not, Change variable type. //Ignore missing data and other special codes try { if (!"".equals(value) && !specialDataCodes.isMissing(value)) { testValue = Double.parseDouble(value); if (testValue != Math.floor(testValue)) { //if any value is a double, the variable is a double variableAttributeMap.get(tempName).setDataType(DataType.DOUBLE); } } } catch (NumberFormatException ex) { //if any value is a String, the variable is a String variableAttributeMap.get(tempName).setDataType(DataType.STRING); } } nrow++; } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); } catch (IOException ex) { theException = ex; logger.fatal(ex); } } }
From source file:com.marklogic.contentpump.DelimitedTextReader.java
protected void initParser(InputSplit inSplit) throws IOException, InterruptedException { setFile(((FileSplit) inSplit).getPath()); configFileNameAsCollection(conf, file); fileIn = fs.open(file);//from w w w . j a v a 2 s .c o m instream = new InputStreamReader(fileIn, encoding); bytesRead = 0; fileLen = inSplit.getLength(); if (uriName == null) { generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false); if (generateId) { idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } else { uriId = 0; } } parser = new CSVParser(instream, CSVParserFormatter.getFormat(delimiter, encapsulator, true, true)); parserIterator = parser.iterator(); }
From source file:data.io.csv.CSVDataReader.java
/** * {@inheritDoc}/*from ww w. ja va 2 s.com*/ * Note : multiple iterators on the same instance are not supported */ @Override public Iterator<MVDataEntry> iterator() { // When a new iterator is requested, everything should be reset CSVParser parser; try { dataSourceStream.reset(); parser = new CSVParser(dataSourceStream, format); } catch (IOException e) { throw new RuntimeException(e); } csvIt = parser.iterator(); nextCSVRecord = null; nextEntry = null; return this; }
From source file:br.edimarmanica.weir2.integration.ScoredPairs.java
public static List<ScoredPair> loadAndSort(Domain domain) { List<ScoredPair> pairs = new ArrayList<>(); try (Reader in = new FileReader(new File(Paths.PATH_WEIR_V2 + "/" + domain.getPath() + "/scores.csv"))) { try (CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader())) { for (CSVRecord record : parser) { //para cada value Rule rule1 = new Rule(domain.getSiteOf(record.get("SITE1")), record.get("RULE1")); Rule rule2 = new Rule(domain.getSiteOf(record.get("SITE2")), record.get("RULE2")); double score = Double.valueOf(record.get("SCORE")); if (score == 1) { continue; }//from ww w . j av a 2s . co m ScoredPair pair = new ScoredPair(rule1, rule2, score); pairs.add(pair); } } } catch (FileNotFoundException ex) { Logger.getLogger(RulesDataTypeController.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(RulesDataTypeController.class.getName()).log(Level.SEVERE, null, ex); } Collections.sort(pairs); return pairs; }
From source file:edu.si.sidora.tabularmetadata.TabularScannerTest.java
@Test public void testOperationWithLimitedScan() throws IOException { try (Reader reader = new FileReader(smalltestfile); final CSVParser parser = new CSVParser(reader, DEFAULT.withHeader())) { log.debug("Found header map: {}", parser.getHeaderMap()); final TabularScanner testScanner = new TabularScanner(parser.iterator(), mockTypeStrategy, mockRangeStrategy, mockEnumStrategy); testScanner.scan(2);//ww w .j a v a 2 s .com final List<DataType> guesses = testScanner.getTypeStrategies().stream().map(Heuristic::results) .collect(toList()); assertEquals("Failed to find the correct column types!", expectedResults, guesses); } }
From source file:com.x460dot10.b.registrar.StartupManager.java
/** * Imports data/passwords.txt into <code>University.passwords</code> * * @return Indicates import of passwords was successful * @throws IOException /*www . j a va 2 s .c o m*/ */ public boolean importPasswords() throws IOException { Boolean importPasswordsSuccessful = true; File file = new File("data/mockpasswords.dat"); FileReader reader = null; Object nextPassword; ArrayList<Password> filePasswords = new ArrayList<Password>(); try { reader = new FileReader(file); CSVFormat format = CSVFormat.DEFAULT; List<CSVRecord> records = new CSVParser(reader, format).getRecords(); for (CSVRecord record : records) { String idAsString = record.values[0]; Integer id = Integer.parseInt(idAsString); String userName = record.values[1]; String password = record.values[2]; nextPassword = Password.getStaticInstance(id, userName, password).clone(); filePasswords.add((Password) nextPassword); } uni.passwordManager.importPasswords(filePasswords); } catch (Exception ex) { // TODO send error message to a log file System.err.println("Error: " + ex.getMessage()); importPasswordsSuccessful = false; } finally { if (reader != null) reader.close(); } return importPasswordsSuccessful; }
From source file:com.alcatel_lucent.nz.wnmsextract.reader.BorgSelectionReader.java
/** * Main readAll method with calendar type args. Sets up data array and uses * DB utilities class to bulk insert//from w w w .ja va 2s . c om */ @Override public void readAll(Calendar start, Calendar end) { ArrayList<ColumnStructure> colstruct = new ArrayList<ColumnStructure>(); colstruct.add(ColumnStructure.VC); colstruct.add(ColumnStructure.TS); colstruct.add(ColumnStructure.IT); ArrayList<ArrayList<String>> dmap = new ArrayList<ArrayList<String>>(); try { //URL borg = new URL(BORG+getYesterday()); URL borg = new URL(BORG + getDateSelection(start, end)); URLConnection conn = borg.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); CSVParser parser = new CSVParser(in, strategy); //if header //[just consumes header] //String[] header = parser.getLine(); //and body String[] line = null; while ((line = parser.getLine()) != null) { ArrayList<String> list = new ArrayList<String>(); list.add(idConvert(line[1], line[2], line[3])); Calendar cal = Calendar.getInstance(); cal.setTime(BORG_DATA_DF.parse(line[0])); list.add(ALUDBUtilities.ALUDB_DF.format(cal.getTime())); list.add(line[5]); dmap.add(list); } in.close(); } catch (ArrayIndexOutOfBoundsException aiobe) { System.err.println("Result not parseable " + aiobe); System.exit(1); } catch (MalformedURLException mrue) { System.err.println("Borg Path incorrect " + mrue); System.exit(1); } catch (IOException ioe) { System.err.println("Cannot read Borg file " + ioe); System.exit(1); } catch (ParseException pe) { System.err.println("Cannot parse Date field " + pe); System.exit(1); } /* bulk insert */ ALUDBUtilities.insert(databasetype, TABLE, colstruct, dmap); }
From source file:edu.clemson.lph.civet.addons.VspsCviFile.java
/** * Test only/* w w w . j ava2 s. c o m*/ */ private void printme(File fIn) { try { CSVParser parserIn = new CSVParser(new FileReader(fIn), CSVFormat.EXCEL); parser = new LabeledCSVParser(parserIn); aCols = parser.getNext(); } catch (FileNotFoundException e) { logger.error(e.getMessage() + "\nCould not read file: " + fIn.getName()); } catch (IOException e) { logger.error(e.getMessage() + "\nCould not read file: " + fIn.getName()); } VspsCvi cvi; try { while ((cvi = nextCVI()) != null) { if (cvi.getStatus().equals("SAVED")) continue; VspsCviEntity orig = cvi.getOrigin(); VspsCviEntity dest = cvi.getDestination(); System.out.println(cvi.getCVINumber() + " created: " + cvi.getCreateDate()); System.out .println(" origin = " + orig.getName() + " " + orig.getPhone() + " " + orig.getAddress1()); System.out.println( " destination = " + dest.getName() + " " + dest.getPhone() + " " + dest.getAddress1()); System.out.println(cvi.getOriginState() + " " + orig.getState()); System.out.println( cvi.getVeterinarianName() + ": " + cvi.getVetFirstName() + " " + cvi.getVetLastName()); System.out.println(cvi.getAnimals().size() + " Animals in CVI"); System.out.println(cvi.getRemarks()); for (List<String> aKey : cvi.getSpecies().keySet()) { Integer iCount = cvi.getSpecies().get(aKey); System.out.println(iCount + " " + aKey.get(0) + " (" + aKey.get(1) + ")"); } for (VspsCviAnimal animal : cvi.getAnimals()) { System.out.println("\t" + animal.getSpecies() + " " + animal.getBreed() + " " + animal.getGender() + " " + animal.getDateOfBirth()); for (int i = 1; i <= 5; i++) { String sIdType = animal.getIdentifierType(i); if (sIdType != null) System.out.println("\t\t" + sIdType + " = " + animal.getIdentifier(i)); } } } } catch (IOException e) { logger.error(e); } }
From source file:javalibs.CSVExtractor.java
private void readCSV() { try {// www. j a v a 2 s . co m CSVParser parser = new CSVParser(Files.newBufferedReader(Paths.get(this.inCSV)), CSVFormat.DEFAULT.withHeader().withIgnoreHeaderCase().withTrim()); // Get all headers Map<String, Integer> rawHeaders = parser.getHeaderMap(); // Store the inRecords this.inRecords = parser.getRecords(); parser.close(); orderHeaders(rawHeaders); } catch (IOException e) { log_.die(e); } }
From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java
public List<InputSplit> getSplits(JobContext job) throws IOException { boolean delimSplit = isSplitInput(job.getConfiguration()); //if delimSplit is true, size of each split is determined by //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat List<InputSplit> splits = super.getSplits(job); if (!delimSplit) { return splits; }/*from www.j av a 2 s . c om*/ if (splits.size() >= SPLIT_COUNT_LIMIT) { //if #splits > 1 million, there is enough parallelism //therefore no point to split LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT); DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT); return splits; } // add header info into splits List<InputSplit> populatedSplits = new ArrayList<InputSplit>(); LOG.info(splits.size() + " DelimitedSplits generated"); Configuration conf = job.getConfiguration(); char delimiter = 0; ArrayList<Text> hlist = new ArrayList<Text>(); for (InputSplit file : splits) { FileSplit fsplit = ((FileSplit) file); Path path = fsplit.getPath(); FileSystem fs = path.getFileSystem(conf); if (fsplit.getStart() == 0) { // parse the inSplit, get the header FSDataInputStream fileIn = fs.open(path); String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER); if (delimStr.length() == 1) { delimiter = delimStr.charAt(0); } else { LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character."); } String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING, MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING); InputStreamReader instream = new InputStreamReader(fileIn, encoding); CSVParser parser = new CSVParser(instream, CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true)); Iterator<CSVRecord> it = parser.iterator(); String[] header = null; if (it.hasNext()) { CSVRecord record = (CSVRecord) it.next(); Iterator<String> recordIterator = record.iterator(); int recordSize = record.size(); header = new String[recordSize]; for (int i = 0; i < recordSize; i++) { if (recordIterator.hasNext()) { header[i] = (String) recordIterator.next(); } else { throw new IOException("Record size doesn't match the real size"); } } EncodingUtil.handleBOMUTF8(header, 0); hlist.clear(); for (String s : header) { hlist.add(new Text(s)); } } instream.close(); } DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])), path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations()); populatedSplits.add(ds); } return populatedSplits; }