List of usage examples for org.apache.commons.csv CSVPrinter CSVPrinter
public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException
From source file:de.speexx.jira.jan.command.issuequery.CsvCreator.java
void printHeader(final List<FieldNamePath> currentFieldNames, final List<FieldName> historyFieldNames, final TemporalChangeOutput temporalOutput) { assert !Objects.isNull(currentFieldNames); assert !Objects.isNull(historyFieldNames); assert !Objects.isNull(temporalOutput); final List<String> headerNames = new ArrayList<>(); currentFieldNames.stream().map(FieldNamePath::asString) .map(name -> name.replaceAll(FieldNamePath.DELIMITER, FIELDNAMEPATH_DELIMITER_REPALCEMENT)) .forEach(name -> headerNames.add(name)); historyFieldNames.stream().map(FieldName::asString).forEach(name -> { headerNames.add(HOSTORICAL_FROM_PREFIX + name); if (temporalOutput != NONE) { if (temporalOutput == BOTH || temporalOutput == TIME) { headerNames.add(HOSTORICAL_CHANGE_DATETIME_PREFIX + name); }/*ww w . j a va 2 s. c om*/ if (temporalOutput == BOTH || temporalOutput == DURATION) { headerNames.add(HOSTORICAL_DURATION_PREFIX + name); } } headerNames.add(HOSTORICAL_TO_PREFIX + name); }); try { final CSVPrinter csvPrinter = new CSVPrinter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8), RFC4180); csvPrinter.printRecord(headerNames.toArray()); csvPrinter.flush(); } catch (final IOException e) { throw new JiraAnalyzeException(e); } }
From source file:canreg.client.dataentry.Convert.java
public static boolean convertData(canreg.client.gui.management.CanReg4MigrationInternalFrame.MigrationTask task, String filepath, String datafile, String regcode) { Connection conn;/*from w w w.java2s . c o m*/ Statement stmt; ResultSet rs_hdr; ResultSet rs_data; boolean success = false; int totalrowcount = 0; int rowsImported = 0; String csv = filepath + Globals.FILE_SEPARATOR + regcode + ".csv"; CSVPrinter printer; try { debugOut("Migrating data " + datafile); pconn = (ParadoxConnection) DriverManager .getConnection("jdbc:paradox:///" + filepath.replaceAll("\\\\", "/")); final ParadoxTable table = TableData.listTables(pconn, datafile).get(0); totalrowcount = table.getRowCount(); SystemDescription sd = new SystemDescription( Globals.CANREG_SERVER_SYSTEM_CONFIG_FOLDER + Globals.FILE_SEPARATOR + regcode + ".xml"); DatabaseVariablesListElement[] variableListElements; variableListElements = sd.getDatabaseVariableListElements(); ArrayList<String> dbvle = new ArrayList(); ArrayList<String> cols = new ArrayList(); // Handling variables names with reservered word by replacing underscore after variable name. for (DatabaseVariablesListElement variable : variableListElements) { if (variable.getShortName().endsWith("_")) { dbvle.add(variable.getShortName().replace("_", "")); } else { dbvle.add(variable.getShortName()); } } conn = DriverManager.getConnection("jdbc:paradox:///" + filepath.replaceAll("\\\\", "/")); final DatabaseMetaData meta = conn.getMetaData(); rs_hdr = meta.getColumns("", "", datafile, "%"); //Comparing variables in file and database while (rs_hdr.next()) { for (String dbvar : dbvle) { if (rs_hdr.getString("COLUMN_NAME").equals(dbvar) || rs_hdr.getString("COLUMN_NAME").replaceAll(" ", "_").equals(dbvar)) { cols.add(rs_hdr.getString("COLUMN_NAME")); } } } String[] strheader = new String[cols.size()]; String query = "SELECT "; for (int i = 0; i < cols.size(); i++) { strheader[i] = cols.get(i).toString(); if (i == cols.size() - 1) { query += "\"" + strheader[i] + "\""; } else { query += "\"" + strheader[i] + "\","; } } query += " FROM \"" + datafile + "\""; CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withHeader(strheader).withDelimiter(','); debugOut(query); printer = new CSVPrinter(new FileWriter(csv), format); int hdrsize = strheader.length; Object[] strdata = new String[hdrsize]; stmt = conn.createStatement(); rs_data = stmt.executeQuery(query); if (Globals.DEBUG) { Statement stmt2 = conn.createStatement(); String q = "SELECT RecNum FROM \"" + datafile + "\""; ResultSet rs_all_data = stmt2.executeQuery(q); debugOut(rs_all_data.toString()); } while (rs_data.next()) { for (int i = 1; i < rs_data.getMetaData().getColumnCount() + 1; i++) { switch (rs_data.getMetaData().getColumnType(i)) { case 4: strdata[i - 1] = Integer.toString(rs_data.getShort(i)); break; case 12: strdata[i - 1] = StringEscapeUtils.escapeCsv(rs_data.getString(i)); break; } } printer.printRecord(strdata); rowsImported++; } printer.flush(); printer.close(); success = true; } catch (SQLException ex) { Logger.getLogger(Convert.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(Convert.class.getName()).log(Level.SEVERE, null, ex); } success = success && (rowsImported == totalrowcount); return success; }
From source file:edu.harvard.mcz.imagecapture.RunnableJobReportDialog.java
protected void serializeTableModel() { PrintWriter out = null;/*from w w w . j ava 2 s .c om*/ CSVPrinter writer = null; try { int cols = jTable.getModel().getColumnCount(); CSVFormat csvFormat = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) .withHeaderComments(jTextArea.getText()); TableModel model = jTable.getModel(); switch (cols) { case 9: csvFormat = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) .withHeader(model.getColumnName(0), model.getColumnName(1), model.getColumnName(2), model.getColumnName(3), model.getColumnName(4), model.getColumnName(5), model.getColumnName(6), model.getColumnName(7), model.getColumnName(8)) .withCommentMarker('*').withHeaderComments(jTextArea.getText()); break; case 6: csvFormat = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) .withHeader(model.getColumnName(0), model.getColumnName(1), model.getColumnName(2), model.getColumnName(3), model.getColumnName(4), model.getColumnName(5)) .withCommentMarker('*').withHeaderComments(jTextArea.getText()); break; case 5: csvFormat = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) .withHeader(model.getColumnName(0), model.getColumnName(1), model.getColumnName(2), model.getColumnName(3), model.getColumnName(4)) .withCommentMarker('*').withHeaderComments(jTextArea.getText()); break; case 4: csvFormat = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) .withHeader(model.getColumnName(0), model.getColumnName(1), model.getColumnName(2), model.getColumnName(3)) .withCommentMarker('*').withHeaderComments(jTextArea.getText()); break; } log.debug(jTextArea.getText()); log.debug(csvFormat.getHeaderComments()); Date now = new Date(); SimpleDateFormat dateFormat = new SimpleDateFormat("yyyymmdd_HHmmss"); String time = dateFormat.format(now); String filename = "jobreport_" + time + ".csv"; out = new PrintWriter(filename); writer = new CSVPrinter(out, csvFormat); writer.flush(); int rows = jTable.getModel().getRowCount(); for (int i = 0; i < rows; i++) { ArrayList<String> values = new ArrayList<String>(); for (int col = 0; col < cols; col++) { values.add((String) jTable.getModel().getValueAt(i, col)); } writer.printRecord(values); } writer.flush(); writer.close(); JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), "Saved report to file: " + filename, "Report to CSV file", JOptionPane.OK_OPTION); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { out.close(); } catch (Exception e) { } try { writer.close(); } catch (Exception e) { } } }
From source file:com.rcv.ResultsWriter.java
private void generateSummarySpreadsheet(Map<Integer, Map<String, BigDecimal>> roundTallies, String precinct, String outputPath) throws IOException { String csvPath = outputPath + ".csv"; Logger.log(Level.INFO, "Generating summary spreadsheets: %s...", csvPath); // Get all candidates sorted by their first round tally. This determines the display order. // container for firstRoundTally Map<String, BigDecimal> firstRoundTally = roundTallies.get(1); // candidates sorted by first round tally List<String> sortedCandidates = sortCandidatesByTally(firstRoundTally); // totalActiveVotesPerRound is a map of round to total votes cast in each round Map<Integer, BigDecimal> totalActiveVotesPerRound = new HashMap<>(); // round indexes over all rounds plus final results round for (int round = 1; round <= numRounds; round++) { // tally is map of candidate to tally for the current round Map<String, BigDecimal> tallies = roundTallies.get(round); // total will contain total votes for all candidates in this round // this is used for calculating other derived data BigDecimal total = BigDecimal.ZERO; // tally indexes over all tallies for the current round for (BigDecimal tally : tallies.values()) { total = total.add(tally);/*from w w w .ja v a 2s . c o m*/ } totalActiveVotesPerRound.put(round, total); } // csvPrinter will be used to write output to csv file CSVPrinter csvPrinter; try { BufferedWriter writer = Files.newBufferedWriter(Paths.get(csvPath)); csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT); } catch (IOException exception) { Logger.log(Level.SEVERE, "Error creating CSV file: %s\n%s", csvPath, exception.toString()); throw exception; } // print contest info addHeaderRows(csvPrinter, precinct); // add a row header for the round column labels csvPrinter.print("Rounds"); // round indexes over all rounds for (int round = 1; round <= numRounds; round++) { // label string will have the actual text which goes in the cell String label = String.format("Round %d", round); // cell for round label csvPrinter.print(label); } csvPrinter.println(); // actions don't make sense in individual precinct results if (precinct == null || precinct.isEmpty()) { addActionRows(csvPrinter); } final BigDecimal totalActiveVotesFirstRound = totalActiveVotesPerRound.get(1); // For each candidate: for each round: output total votes // candidate indexes over all candidates for (String candidate : sortedCandidates) { // show each candidate row with their totals for each round // text for the candidate name String candidateDisplayName = this.config.getNameForCandidateID(candidate); csvPrinter.print(candidateDisplayName); // round indexes over all rounds for (int round = 1; round <= numRounds; round++) { // vote tally this round BigDecimal thisRoundTally = roundTallies.get(round).get(candidate); // not all candidates may have a tally in every round if (thisRoundTally == null) { thisRoundTally = BigDecimal.ZERO; } // total votes cell csvPrinter.print(thisRoundTally.toString()); } // advance to next line csvPrinter.println(); } // row for the inactive CVR counts // inactive CVR header cell csvPrinter.print("Inactive ballots"); // round indexes through all rounds for (int round = 1; round <= numRounds; round++) { // count of votes inactive this round BigDecimal thisRoundInactive = BigDecimal.ZERO; if (round > 1) { // Exhausted count is the difference between the total votes in round 1 and the total votes // in the current round. thisRoundInactive = totalActiveVotesFirstRound.subtract(totalActiveVotesPerRound.get(round)) .subtract(roundToResidualSurplus.get(round)); } // total votes cell csvPrinter.print(thisRoundInactive.toString()); } csvPrinter.println(); // row for residual surplus (if needed) // We check if we accumulated any residual surplus over the course of the tabulation by testing // whether the value in the final round is positive. if (roundToResidualSurplus.get(numRounds).signum() == 1) { csvPrinter.print("Residual surplus"); for (int round = 1; round <= numRounds; round++) { csvPrinter.print(roundToResidualSurplus.get(round).toString()); } csvPrinter.println(); } // write xls to disk try { // output stream is used to write data to disk csvPrinter.flush(); csvPrinter.close(); } catch (IOException exception) { Logger.log(Level.SEVERE, "Error saving file: %s\n%s", outputPath, exception.toString()); throw exception; } }
From source file:cz.lbenda.dataman.db.ExportTableData.java
/** Write rows to CSV file * @param sqlQueryRows rows/*from w w w .java 2 s . co m*/ * @param writer where are data */ public static void writeSqlQueryRowsToCSV(SQLQueryRows sqlQueryRows, Writer writer) throws IOException { CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(Constants.CSV_NEW_LINE_SEPARATOR); CSVPrinter csvFilePrinter = new CSVPrinter(writer, csvFileFormat); csvFilePrinter .printRecord(sqlQueryRows.getMetaData().getColumns().stream().map(ColumnDesc::getName).toArray()); for (RowDesc row : sqlQueryRows.getRows()) { csvFilePrinter.printRecord( sqlQueryRows.getMetaData().getColumns().stream().map(row::getColumnValueStr).toArray()); } }
From source file:de.tudarmstadt.ukp.dkpro.tc.svmhmm.util.SVMHMMUtils.java
/** * Given confusion matrix, it writes it in CSV and LaTeX form to the tasks output directory, * and also prints evaluations (F-measure, Precision, Recall) * * @param context task context/* ww w. j a va 2s. c om*/ * @param confusionMatrix confusion matrix * @param filePrefix prefix of output files * @throws java.io.IOException */ public static void writeOutputResults(TaskContext context, ConfusionMatrix confusionMatrix, String filePrefix) throws IOException { // storing the results as latex confusion matrix String confMatrixFileTex = (filePrefix != null ? filePrefix : "") + "confusionMatrix.tex"; File evaluationFileLaTeX = new File( context.getStorageLocation(Constants.TEST_TASK_OUTPUT_KEY, StorageService.AccessMode.READWRITE), confMatrixFileTex); FileUtils.writeStringToFile(evaluationFileLaTeX, confusionMatrix.toStringLatex()); // as CSV confusion matrix String confMatrixFileCsv = (filePrefix != null ? filePrefix : "") + "confusionMatrix.csv"; File evaluationFileCSV = new File( context.getStorageLocation(Constants.TEST_TASK_OUTPUT_KEY, StorageService.AccessMode.READWRITE), confMatrixFileCsv); CSVPrinter csvPrinter = new CSVPrinter(new FileWriter(evaluationFileCSV), CSVFormat.DEFAULT); csvPrinter.printRecords(confusionMatrix.toStringMatrix()); IOUtils.closeQuietly(csvPrinter); // and results File evaluationFile = new File( context.getStorageLocation(Constants.TEST_TASK_OUTPUT_KEY, StorageService.AccessMode.READWRITE), new SVMHMMAdapter() .getFrameworkFilename(TCMachineLearningAdapter.AdapterNameEntries.evaluationFile)); PrintWriter pw = new PrintWriter(evaluationFile); pw.println(confusionMatrix.printNiceResults()); pw.println(confusionMatrix.printLabelPrecRecFm()); pw.println(confusionMatrix.printClassDistributionGold()); IOUtils.closeQuietly(pw); }
From source file:com.act.lcms.db.analysis.StandardIonAnalysis.java
public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());// www . ja v a 2 s . c o m } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY)); if (!lcmsDir.isDirectory()) { System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } try (DB db = DB.openDBFromCLI(cl)) { ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir); StandardIonAnalysis analysis = new StandardIonAnalysis(); HashMap<Integer, Plate> plateCache = new HashMap<>(); String plateBarcode = cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE); String inputChemicals = cl.getOptionValue(OPTION_STANDARD_CHEMICAL); String medium = cl.getOptionValue(OPTION_MEDIUM); // If standard chemical is specified, do standard LCMS ion selection analysis if (inputChemicals != null && !inputChemicals.equals("")) { String[] chemicals; if (!inputChemicals.contains(",")) { chemicals = new String[1]; chemicals[0] = inputChemicals; } else { chemicals = inputChemicals.split(","); } String outAnalysis = cl.getOptionValue(OPTION_OUTPUT_PREFIX) + "." + CSV_FORMAT; String plottingDirectory = cl.getOptionValue(OPTION_PLOTTING_DIR); String[] headerStrings = { "Molecule", "Plate Bar Code", "LCMS Detection Results" }; CSVPrinter printer = new CSVPrinter(new FileWriter(outAnalysis), CSVFormat.DEFAULT.withHeader(headerStrings)); for (String inputChemical : chemicals) { List<StandardWell> standardWells; Plate queryPlate = Plate.getPlateByBarcode(db, cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE)); if (plateBarcode != null && medium != null) { standardWells = analysis.getStandardWellsForChemicalInSpecificPlateAndMedium(db, inputChemical, queryPlate.getId(), medium); } else if (plateBarcode != null) { standardWells = analysis.getStandardWellsForChemicalInSpecificPlate(db, inputChemical, queryPlate.getId()); } else { standardWells = analysis.getStandardWellsForChemical(db, inputChemical); } if (standardWells.size() == 0) { throw new RuntimeException("Found no LCMS wells for " + inputChemical); } // Sort in descending order of media where MeOH and Water related media are promoted to the top and // anything derived from yeast media are demoted. Collections.sort(standardWells, new Comparator<StandardWell>() { @Override public int compare(StandardWell o1, StandardWell o2) { if (StandardWell.doesMediaContainYeastExtract(o1.getMedia()) && !StandardWell.doesMediaContainYeastExtract(o2.getMedia())) { return 1; } else { return 0; } } }); Map<StandardWell, StandardIonResult> wellToIonRanking = StandardIonAnalysis .getBestMetlinIonsForChemical(inputChemical, lcmsDir, db, standardWells, plottingDirectory); if (wellToIonRanking.size() != standardWells.size() && !cl.hasOption(OPTION_OVERRIDE_NO_SCAN_FILE_FOUND)) { throw new Exception("Could not find a scan file associated with one of the standard wells"); } for (StandardWell well : wellToIonRanking.keySet()) { LinkedHashMap<String, XZ> snrResults = wellToIonRanking.get(well).getAnalysisResults(); String snrRankingResults = ""; int numResultsToShow = 0; Plate plateForWellToAnalyze = Plate.getPlateById(db, well.getPlateId()); for (Map.Entry<String, XZ> ionToSnrAndTime : snrResults.entrySet()) { if (numResultsToShow > 3) { break; } String ion = ionToSnrAndTime.getKey(); XZ snrAndTime = ionToSnrAndTime.getValue(); snrRankingResults += String.format(ion + " (%.2f SNR at %.2fs); ", snrAndTime.getIntensity(), snrAndTime.getTime()); numResultsToShow++; } String[] resultSet = { inputChemical, plateForWellToAnalyze.getBarcode() + " " + well.getCoordinatesString() + " " + well.getMedia() + " " + well.getConcentration(), snrRankingResults }; printer.printRecord(resultSet); } } try { printer.flush(); printer.close(); } catch (IOException e) { System.err.println("Error while flushing/closing csv writer."); e.printStackTrace(); } } else { // Get the set of chemicals that includes the construct and all it's intermediates Pair<ConstructEntry, List<ChemicalAssociatedWithPathway>> constructAndPathwayChems = analysis .getChemicalsForConstruct(db, cl.getOptionValue(OPTION_CONSTRUCT)); System.out.format("Construct: %s\n", constructAndPathwayChems.getLeft().getCompositionId()); for (ChemicalAssociatedWithPathway pathwayChem : constructAndPathwayChems.getRight()) { System.out.format(" Pathway chem %s\n", pathwayChem.getChemical()); // Get all the standard wells for the pathway chemicals. These wells contain only the // the chemical added with controlled solutions (ie no organism or other chemicals in the // solution) List<StandardWell> standardWells; if (plateBarcode != null) { Plate queryPlate = Plate.getPlateByBarcode(db, cl.getOptionValue(OPTION_STANDARD_PLATE_BARCODE)); standardWells = analysis.getStandardWellsForChemicalInSpecificPlate(db, pathwayChem.getChemical(), queryPlate.getId()); } else { standardWells = analysis.getStandardWellsForChemical(db, pathwayChem.getChemical()); } for (StandardWell wellToAnalyze : standardWells) { List<StandardWell> negativeControls = analysis.getViableNegativeControlsForStandardWell(db, wellToAnalyze); Map<StandardWell, List<ScanFile>> allViableScanFiles = analysis .getViableScanFilesForStandardWells(db, wellToAnalyze, negativeControls); List<String> primaryStandardScanFileNames = new ArrayList<>(); for (ScanFile scanFile : allViableScanFiles.get(wellToAnalyze)) { primaryStandardScanFileNames.add(scanFile.getFilename()); } Plate plate = plateCache.get(wellToAnalyze.getPlateId()); if (plate == null) { plate = Plate.getPlateById(db, wellToAnalyze.getPlateId()); plateCache.put(plate.getId(), plate); } System.out.format(" Standard well: %s @ %s, '%s'%s%s\n", plate.getBarcode(), wellToAnalyze.getCoordinatesString(), wellToAnalyze.getChemical(), wellToAnalyze.getMedia() == null ? "" : String.format(" in %s", wellToAnalyze.getMedia()), wellToAnalyze.getConcentration() == null ? "" : String.format(" @ %s", wellToAnalyze.getConcentration())); System.out.format(" Scan files: %s\n", StringUtils.join(primaryStandardScanFileNames, ", ")); for (StandardWell negCtrlWell : negativeControls) { plate = plateCache.get(negCtrlWell.getPlateId()); if (plate == null) { plate = Plate.getPlateById(db, negCtrlWell.getPlateId()); plateCache.put(plate.getId(), plate); } List<String> negativeControlScanFileNames = new ArrayList<>(); for (ScanFile scanFile : allViableScanFiles.get(negCtrlWell)) { negativeControlScanFileNames.add(scanFile.getFilename()); } System.out.format(" Viable negative: %s @ %s, '%s'%s%s\n", plate.getBarcode(), negCtrlWell.getCoordinatesString(), negCtrlWell.getChemical(), negCtrlWell.getMedia() == null ? "" : String.format(" in %s", negCtrlWell.getMedia()), negCtrlWell.getConcentration() == null ? "" : String.format(" @ %s", negCtrlWell.getConcentration())); System.out.format(" Scan files: %s\n", StringUtils.join(negativeControlScanFileNames, ", ")); // TODO: do something useful with the standard wells and their scan files, and then stop all the printing. } } } } } }
From source file:act.installer.wikipedia.ImportantChemicalsWikipedia.java
/** * This function writes the important chemicals set to a TSV file. * @param outputPath a String indicating where the file should be written (including its name) *//*from w w w . ja v a 2 s . co m*/ public void writeToTSV(String outputPath) { try { BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath)); CSVPrinter printer = new CSVPrinter(writer, TSV_FORMAT); printer.printComment("This file has been generated by the ImportantChemicalsWikipedia.java script."); printer.printComment("Format: WIKIPEDIA<tab><wikipedia url><tab><inchi><tab><metadata>"); for (ImportantChemical importantChemical : importantChemicalsWikipedia) { List<String> nextLine = new ArrayList<>(); nextLine.add(importantChemical.getType()); nextLine.add(importantChemical.getDbid()); nextLine.add(importantChemical.getInchi()); nextLine.add(mapper.writeValueAsString(importantChemical.getMetadata())); printer.printRecord(nextLine); } printer.flush(); writer.close(); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManager.java
/** * This function is responsible for parsing a duplicate Stack Exchange thread TSV file produced by * {@link StackExchangeThreadSerializer}, and partitioning each such thread into the training set, * test set, or validation set. In addition, the corresponding row of the TSV file will be written * out to a training-, test-, or validation-set-specific TSV file in the same directory as the * input TSV file./* w ww .j a va2s . c o m*/ * * @param dupQuestionFile - A TSV file containing duplicate {@link StackExchangeThread} records * @param trainTestValidateCumulativeProbs - A CDF of the desired proportion of training, test, * and validation set records * @throws PipelineException */ private void parseTsvAndPartitionRecords(File dupQuestionFile, double[] trainTestValidateCumulativeProbs) throws PipelineException { // Open the TSV file for parsing, and CSVPrinters for outputting train, // test, and validation set // TSV files String baseName = FilenameUtils.removeExtension(dupQuestionFile.getAbsolutePath()); String extension = FilenameUtils.getExtension(dupQuestionFile.getAbsolutePath()); try (FileReader reader = new FileReader(dupQuestionFile); CSVPrinter trainSetPrinter = new CSVPrinter( new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TRAIN_FILE_SUFFIX + FilenameUtils.EXTENSION_SEPARATOR + extension), CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders())); CSVPrinter testSetPrinter = new CSVPrinter( new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TEST_FILE_SUFFIX + FilenameUtils.EXTENSION_SEPARATOR + extension), CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders())); CSVPrinter validationSetPrinter = new CSVPrinter( new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_VALIDATE_FILE_SUFFIX + FilenameUtils.EXTENSION_SEPARATOR + extension), CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()))) { // Parse the duplicate thread TSV file CSVParser parser = CSVFormat.TDF.withHeader().parse(reader); // Iterate over each CSV record, and place into a desired partition // (train, test, or // validation) Iterator<CSVRecord> recordIterator = parser.iterator(); while (recordIterator.hasNext()) { CSVRecord record = recordIterator.next(); // Get the StackExchangeThread associated with this record, and // create a question from it StackExchangeThread duplicateThread = StackExchangeThreadSerializer.deserializeThreadFromBinFile( record.get(CorpusBuilder.TSV_COL_HEADER_SERIALIZED_FILE_PATH)); StackExchangeQuestion duplicateQuestion = new StackExchangeQuestion(duplicateThread); String parentId = record.get(CorpusBuilder.TSV_COL_HEADER_PARENT_ID); // Now drop this question into a partition, and write it to a // corresponding TSV file double p = rng.nextDouble(); // Random number determines // partition for this record if (p <= trainTestValidateCumulativeProbs[0]) { // This record goes in the training set if (!addQuestionToSet(duplicateQuestion, parentId, this.trainingSet)) { throw new PipelineException( MessageFormat.format(Messages.getString("RetrieveAndRank.TRAINING_SET_FAILED_Q"), //$NON-NLS-1$ duplicateThread.getId())); } trainSetPrinter.printRecord((Object[]) convertRecordToArray(record)); } else if (p <= trainTestValidateCumulativeProbs[1]) { // This record goes in the test set if (!addQuestionToSet(duplicateQuestion, parentId, this.testSet)) { throw new PipelineException( MessageFormat.format(Messages.getString("RetrieveAndRank.TEST_SET_FAILED_Q"), //$NON-NLS-1$ duplicateThread.getId())); } testSetPrinter.printRecord((Object[]) convertRecordToArray(record)); } else { // This record goes in the validation set assert (p <= trainTestValidateCumulativeProbs[2]); if (!addQuestionToSet(duplicateQuestion, parentId, this.validationSet)) { throw new PipelineException( MessageFormat.format(Messages.getString("RetrieveAndRank.VALIDATION_SET_FAILED_Q"), //$NON-NLS-1$ duplicateThread.getId())); } validationSetPrinter.printRecord((Object[]) convertRecordToArray(record)); } } // Flush all the printers prior to closing trainSetPrinter.flush(); testSetPrinter.flush(); validationSetPrinter.flush(); } catch (IOException | IngestionException e) { throw new PipelineException(e); } }
From source file:at.alladin.rmbt.statisticServer.export.ExportResource.java
@Get public Representation request(final String entity) { //Before doing anything => check if a cached file already exists and is new enough String property = System.getProperty("java.io.tmpdir"); final String filename_zip; final String filename_csv; //allow filtering by month/year int year = -1; int month = -1; int hours = -1; boolean hoursExport = false; boolean dateExport = false; if (getRequest().getAttributes().containsKey("hours")) { // export by hours try {//from w w w.j a v a2 s .com hours = Integer.parseInt(getRequest().getAttributes().get("hours").toString()); } catch (NumberFormatException ex) { //Nothing -> just fall back } if (hours <= 7 * 24 && hours >= 1) { //limit to 1 week (avoid DoS) hoursExport = true; } } else if (!hoursExport && getRequest().getAttributes().containsKey("year")) { // export by month/year try { year = Integer.parseInt(getRequest().getAttributes().get("year").toString()); month = Integer.parseInt(getRequest().getAttributes().get("month").toString()); } catch (NumberFormatException ex) { //Nothing -> just fall back } if (year < 2099 && month > 0 && month <= 12 && year > 2000) { dateExport = true; } } if (hoursExport) { filename_zip = FILENAME_ZIP_HOURS.replace("%HOURS%", String.format("%03d", hours)); filename_csv = FILENAME_CSV_HOURS.replace("%HOURS%", String.format("%03d", hours)); cacheThresholdMs = 5 * 60 * 1000; //5 minutes } else if (dateExport) { filename_zip = FILENAME_ZIP.replace("%YEAR%", Integer.toString(year)).replace("%MONTH%", String.format("%02d", month)); filename_csv = FILENAME_CSV.replace("%YEAR%", Integer.toString(year)).replace("%MONTH%", String.format("%02d", month)); cacheThresholdMs = 23 * 60 * 60 * 1000; //23 hours } else { filename_zip = FILENAME_ZIP_CURRENT; filename_csv = FILENAME_CSV_CURRENT; cacheThresholdMs = 3 * 60 * 60 * 1000; //3 hours } final File cachedFile = new File(property + File.separator + ((zip) ? filename_zip : filename_csv)); final File generatingFile = new File( property + File.separator + ((zip) ? filename_zip : filename_csv) + "_tmp"); if (cachedFile.exists()) { //check if file has been recently created OR a file is currently being created if (((cachedFile.lastModified() + cacheThresholdMs) > (new Date()).getTime()) || (generatingFile.exists() && (generatingFile.lastModified() + cacheThresholdMs) > (new Date()).getTime())) { //if so, return the cached file instead of a cost-intensive new one final OutputRepresentation result = new OutputRepresentation( zip ? MediaType.APPLICATION_ZIP : MediaType.TEXT_CSV) { @Override public void write(OutputStream out) throws IOException { InputStream is = new FileInputStream(cachedFile); IOUtils.copy(is, out); out.close(); } }; if (zip) { final Disposition disposition = new Disposition(Disposition.TYPE_ATTACHMENT); disposition.setFilename(filename_zip); result.setDisposition(disposition); } return result; } } final String timeClause; if (dateExport) timeClause = " AND (EXTRACT (month FROM t.time AT TIME ZONE 'UTC') = " + month + ") AND (EXTRACT (year FROM t.time AT TIME ZONE 'UTC') = " + year + ") "; else if (hoursExport) timeClause = " AND time > now() - interval '" + hours + " hours' "; else timeClause = " AND time > current_date - interval '31 days' "; final String sql = "SELECT" + " ('P' || t.open_uuid) open_uuid," + " ('O' || t.open_test_uuid) open_test_uuid," + " to_char(t.time AT TIME ZONE 'UTC', 'YYYY-MM-DD HH24:MI:SS') time_utc," + " nt.group_name cat_technology," + " nt.name network_type," + " (CASE WHEN (t.geo_accuracy < ?) AND (t.geo_provider != 'manual') AND (t.geo_provider != 'geocoder') THEN" + " t.geo_lat" + " WHEN (t.geo_accuracy < ?) THEN" + " ROUND(t.geo_lat*1111)/1111" + " ELSE null" + " END) lat," + " (CASE WHEN (t.geo_accuracy < ?) AND (t.geo_provider != 'manual') AND (t.geo_provider != 'geocoder') THEN" + " t.geo_long" + " WHEN (t.geo_accuracy < ?) THEN" + " ROUND(t.geo_long*741)/741 " + " ELSE null" + " END) long," + " (CASE WHEN ((t.geo_provider = 'manual') OR (t.geo_provider = 'geocoder')) THEN" + " 'rastered'" + //make raster transparent " ELSE t.geo_provider" + " END) loc_src," + " (CASE WHEN (t.geo_accuracy < ?) AND (t.geo_provider != 'manual') AND (t.geo_provider != 'geocoder') " + " THEN round(t.geo_accuracy::float * 10)/10 " + " WHEN (t.geo_accuracy < 100) AND ((t.geo_provider = 'manual') OR (t.geo_provider = 'geocoder')) THEN 100" + // limit accuracy to 100m " WHEN (t.geo_accuracy < ?) THEN round(t.geo_accuracy::float * 10)/10" + " ELSE null END) loc_accuracy, " + " (CASE WHEN (t.zip_code < 1000 OR t.zip_code > 9999) THEN null ELSE t.zip_code END) zip_code," + " t.gkz gkz," + " t.country_location country_location," + " t.speed_download download_kbit," + " t.speed_upload upload_kbit," + " round(t.ping_median::float / 100000)/10 ping_ms," + " t.lte_rsrp," + " t.lte_rsrq," + " ts.name server_name," + " duration test_duration," + " num_threads," + " t.plattform platform," + " COALESCE(adm.fullname, t.model) model," + " client_software_version client_version," + " network_operator network_mcc_mnc," + " network_operator_name network_name," + " network_sim_operator sim_mcc_mnc," + " nat_type," + " public_ip_asn asn," + " client_public_ip_anonymized ip_anonym," + " (ndt.s2cspd*1000)::int ndt_download_kbit," + " (ndt.c2sspd*1000)::int ndt_upload_kbit," + " COALESCE(t.implausible, false) implausible," + " t.signal_strength" + " FROM test t" + " LEFT JOIN network_type nt ON nt.uid=t.network_type" + " LEFT JOIN device_map adm ON adm.codename=t.model" + " LEFT JOIN test_server ts ON ts.uid=t.server_id" + " LEFT JOIN test_ndt ndt ON t.uid=ndt.test_id" + " WHERE " + " t.deleted = false" + timeClause + " AND status = 'FINISHED'" + " ORDER BY t.uid"; final String[] columns; final List<String[]> data = new ArrayList<>(); PreparedStatement ps = null; ResultSet rs = null; try { ps = conn.prepareStatement(sql); //insert filter for accuracy double accuracy = Double.parseDouble(settings.getString("RMBT_GEO_ACCURACY_DETAIL_LIMIT")); ps.setDouble(1, accuracy); ps.setDouble(2, accuracy); ps.setDouble(3, accuracy); ps.setDouble(4, accuracy); ps.setDouble(5, accuracy); ps.setDouble(6, accuracy); if (!ps.execute()) return null; rs = ps.getResultSet(); final ResultSetMetaData meta = rs.getMetaData(); final int colCnt = meta.getColumnCount(); columns = new String[colCnt]; for (int i = 0; i < colCnt; i++) columns[i] = meta.getColumnName(i + 1); while (rs.next()) { final String[] line = new String[colCnt]; for (int i = 0; i < colCnt; i++) { final Object obj = rs.getObject(i + 1); line[i] = obj == null ? null : obj.toString(); } data.add(line); } } catch (final SQLException e) { e.printStackTrace(); return null; } finally { try { if (rs != null) rs.close(); if (ps != null) ps.close(); } catch (final SQLException e) { e.printStackTrace(); } } final OutputRepresentation result = new OutputRepresentation( zip ? MediaType.APPLICATION_ZIP : MediaType.TEXT_CSV) { @Override public void write(OutputStream out) throws IOException { //cache in file => create temporary temporary file (to // handle errors while fulfilling a request) String property = System.getProperty("java.io.tmpdir"); final File cachedFile = new File( property + File.separator + ((zip) ? filename_zip : filename_csv) + "_tmp"); OutputStream outf = new FileOutputStream(cachedFile); if (zip) { final ZipOutputStream zos = new ZipOutputStream(outf); final ZipEntry zeLicense = new ZipEntry("LIZENZ.txt"); zos.putNextEntry(zeLicense); final InputStream licenseIS = getClass().getResourceAsStream("DATA_LICENSE.txt"); IOUtils.copy(licenseIS, zos); licenseIS.close(); final ZipEntry zeCsv = new ZipEntry(filename_csv); zos.putNextEntry(zeCsv); outf = zos; } final OutputStreamWriter osw = new OutputStreamWriter(outf); final CSVPrinter csvPrinter = new CSVPrinter(osw, csvFormat); for (final String c : columns) csvPrinter.print(c); csvPrinter.println(); for (final String[] line : data) { for (final String f : line) csvPrinter.print(f); csvPrinter.println(); } csvPrinter.flush(); if (zip) outf.close(); //if we reach this code, the data is now cached in a temporary tmp-file //so, rename the file for "production use2 //concurrency issues should be solved by the operating system File newCacheFile = new File(property + File.separator + ((zip) ? filename_zip : filename_csv)); Files.move(cachedFile.toPath(), newCacheFile.toPath(), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); FileInputStream fis = new FileInputStream(newCacheFile); IOUtils.copy(fis, out); fis.close(); out.close(); } }; if (zip) { final Disposition disposition = new Disposition(Disposition.TYPE_ATTACHMENT); disposition.setFilename(filename_zip); result.setDisposition(disposition); } return result; }