Example usage for java.io BufferedWriter write

Introduction

In this page you can find the example usage for java.io BufferedWriter write.

Prototype

public void write(int c) throws IOException

Source Link

Document

Writes a single character.

Usage

From source file:com.nextdoor.bender.S3SnsNotifier.java

public static void main(String[] args) throws ParseException, InterruptedException, IOException {
    formatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZoneUTC();

    /*//  w  w  w.j  a v a 2 s .  c o  m
     * Parse cli arguments
     */
    Options options = new Options();
    options.addOption(Option.builder().longOpt("bucket").hasArg().required()
            .desc("Name of S3 bucket to list s3 objects from").build());
    options.addOption(Option.builder().longOpt("key-file").hasArg().required()
            .desc("Local file of S3 keys to process").build());
    options.addOption(
            Option.builder().longOpt("sns-arn").hasArg().required().desc("SNS arn to publish to").build());
    options.addOption(Option.builder().longOpt("throttle-ms").hasArg()
            .desc("Amount of ms to wait between publishing to SNS").build());
    options.addOption(Option.builder().longOpt("processed-file").hasArg()
            .desc("Local file to use to store procssed S3 object names").build());
    options.addOption(Option.builder().longOpt("skip-processed").hasArg(false)
            .desc("Whether to skip S3 objects that have been processed").build());
    options.addOption(
            Option.builder().longOpt("dry-run").hasArg(false).desc("If set do not publish to SNS").build());

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd = parser.parse(options, args);

    String bucket = cmd.getOptionValue("bucket");
    String keyFile = cmd.getOptionValue("key-file");
    String snsArn = cmd.getOptionValue("sns-arn");
    String processedFile = cmd.getOptionValue("processed-file", null);
    boolean skipProcessed = cmd.hasOption("skip-processed");
    dryRun = cmd.hasOption("dry-run");
    long throttle = Long.parseLong(cmd.getOptionValue("throttle-ms", "-1"));

    if (processedFile != null) {
        File file = new File(processedFile);

        if (!file.exists()) {
            logger.debug("creating local file to store processed s3 object names: " + processedFile);
            file.createNewFile();
        }
    }

    /*
     * Import S3 keys that have been processed
     */
    if (skipProcessed && processedFile != null) {
        try (BufferedReader br = new BufferedReader(new FileReader(processedFile))) {
            String line;
            while ((line = br.readLine()) != null) {
                alreadyPublished.add(line.trim());
            }
        }
    }

    /*
     * Setup writer for file containing processed S3 keys
     */
    FileWriter fw = null;
    BufferedWriter bw = null;
    if (processedFile != null) {
        fw = new FileWriter(processedFile, true);
        bw = new BufferedWriter(fw);
    }

    /*
     * Create clients
     */
    AmazonS3Client s3Client = new AmazonS3Client();
    AmazonSNSClient snsClient = new AmazonSNSClient();

    /*
     * Get S3 object list
     */
    try (BufferedReader br = new BufferedReader(new FileReader(keyFile))) {
        String line;
        while ((line = br.readLine()) != null) {
            String key = line.trim();

            if (alreadyPublished.contains(key)) {
                logger.info("skipping " + key);
            }

            ObjectMetadata om = s3Client.getObjectMetadata(bucket, key);

            S3EventNotification s3Notification = getS3Notification(key, bucket, om.getContentLength());

            String json = s3Notification.toJson();

            /*
             * Publish to SNS
             */
            if (publish(snsArn, json, snsClient, key) && processedFile != null) {
                bw.write(key + "\n");
                bw.flush();
            }

            if (throttle != -1) {
                Thread.sleep(throttle);
            }

        }
    }

    if (processedFile != null) {
        bw.close();
        fw.close();
    }
}

From source file:de.ipbhalle.metfrag.main.CommandLineTool.java

/**
 * @param args//from w  w w .j ava  2 s  .  c  om
 * @throws Exception 
 */
public static void main(String[] args) {

    CommandLineParser parser = new PosixParser();

    // create the Options
    Options options = new Options();

    options.addOption("d", "database", true, "database: " + Databases.getString() + " (default: kegg)");
    options.addOption("l", "localdb", true,
            "use a local database together with a settings file for candidate search (default: not used) note: only usable if pubchem database is selected (-d)");
    options.addOption("a", "mzabs", true,
            "allowed absolute (Da) mass deviation of fragment and peak masses (default: 0.01)");
    options.addOption("p", "mzppm", true,
            "allowed relative (ppm) mass deviation of fragment and peak masses (default: 10)");
    options.addOption("s", "searchppm", true,
            "relative (ppm) mass deviation used for candidate search in given compound database (-d) (default: 10; not used by default if sdf database is selected (-d))\n");
    options.addOption("n", "exactmass", true,
            "neutral mass of measured compound used for candidate search in database (-d) (mandatory)");
    options.addOption("b", "biological", false,
            "only consider compounds including CHNOPS atoms (not used by default)");
    options.addOption("i", "databaseids", true,
            "database ids of compounds used for in silico fragmentation (separated by ,) (not used by default; not used if sdf database is selected (-d)) note: given ids must be valid ids of given database (-d)");
    options.addOption("t", "treedepth", true,
            "treedepth used for in silico fragmentation (default: 2) note: high values result in high computation time");
    options.addOption("M", "mode", true,
            "mode used for measured ms/ms spectrum:\n" + Modes.getString() + "(default: 3)");
    options.addOption("f", "formula", true,
            "molecular formula of measured compound used for candidate search in database (-d) (not used by default; not used if sdf database is selected (-d))");
    options.addOption("B", "breakrings", false,
            "allow splitting of aromatic rings of candidate structures during in silico fragmentation (not used by default)");
    options.addOption("F", "storefragments", false,
            "store in silico generated fragments of candidate molecules (not used by default)");
    options.addOption("R", "resultspath", true, "directory where result files are stored (default: /tmp)");
    options.addOption("L", "sdffile", true,
            "location of the local sdf file (mandatory if sdf database (-d) is selected)");
    options.addOption("h", "help", false, "print help");
    options.addOption("D", "spectrumfile", true,
            "file containing peak data (mandatory) note: commandline options overwrite parameters given in the spectrum data file");
    options.addOption("T", "threads", true,
            "number of threads used for fragment calculation (default: number of available cpu cores)");
    options.addOption("c", "chemspidertoken", true,
            "Token for ChemSpider database search (not used by default; only necessary (mandatory) if ChemSpider database (-d) is selected)");
    options.addOption("v", "verbose", false,
            "get more output information during the processing (not used by default)");
    options.addOption("S", "samplename", true,
            "name of the sample measured (mandatory) note: result files are stored with given value");
    options.addOption("P", "saveparameters", false, "save used parameters (not used by default)");
    options.addOption("e", "printexamplespecfile", false,
            "print an example spectrum data file (not used by default)");
    options.addOption("C", "charge", true,
            "charge used in combination with mode (-M):\n" + Charges.getString() + " (default: 1)");
    options.addOption("r", "range", true,
            "range of candidates that will be processed: N (first N), M-N (from M to N), M- (from M), -N (till N); if N is greater than the number of candidates it will be set accordingly");

    // parse the command line arguments
    CommandLine line = null;
    try {
        line = parser.parse(options, args);
    } catch (ParseException e1) {
        System.out.println(e1.getMessage());
        System.out.println("Error: Could not parse option parameters.");
        System.out.println("Use help -h (--help) for information.");
        System.exit(1);
    }

    if (line == null) {
        System.out.println("Error: Could not parse option parameters.");
        System.out.println("Use help -h (--help) for information.");
        System.exit(1);
    }
    if (checkInitialParamsPresent(line, options))
        System.exit(0);

    if (!checkSpectrumFile(line)) {
        System.out.println("Error: Option parameters are not set correctly.");
        System.out.println("Use help -h (--help) for information.");
        System.exit(1);
    }
    if (!parseSpectrumFile(spectrumfile)) {
        System.out.println("Error: Could not correctly parse the spectrum data file.");
        System.out.println("Use help -h (--help) for information.");
        System.exit(1);
    }

    int successfulSet = setParameters(line, options);
    if (successfulSet == 2)
        System.exit(0);
    if (successfulSet != 0) {
        System.out.println("Error: Option parameters are not set correctly.");
        System.out.println("Use help -h (--help) for information.");
        System.exit(1);
    }

    boolean successfulChecked = true;
    if (successfulSet == 0)
        successfulChecked = checkParameters();
    if (saveParametersIsSet) {
        try {
            BufferedWriter bwriter = new BufferedWriter(new FileWriter(new File(
                    resultspath + System.getProperty("file.separator") + "parameters_" + sampleName + ".txt")));
            bwriter.write(getParameters());
            bwriter.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    try {
        boolean isPositive = true;
        if (charge.getValue() == 2)
            isPositive = false;
        spec = new WrapperSpectrum(peaksString, mode.getValueWithOffset(), exactMass.getValue(), isPositive);
    } catch (Exception e) {
        System.out.println("Error: Could not parse spectrum correctly. Check the given peak list.");
        System.exit(1);
    }

    if (!successfulChecked) {
        System.out.println("Error: Option parameters are not set correctly.");
        System.out.println("Use help -h (--help) for information.");
        System.exit(1);
    }

    List<MetFragResult> results = null;
    String pathToStoreFrags = "";
    if (storeFragments)
        pathToStoreFrags = resultspath;
    //run metfrag when all checks were successful

    if (usesdf) {
        try {
            if (verbose) {
                System.out.println("start fragmenter with local database");
                System.out.println("using database " + database);
            }
            results = MetFrag.startConvenienceSDF(spec, mzabs.getValue(), mzppm.getValue(),
                    searchppm.getValue(), true, breakRings, treeDepth.getValue(), true, true, true, false,
                    Integer.MAX_VALUE, true, sdfFile, "", null, searchppmIsSet, pathToStoreFrags,
                    numberThreads.getValue(), verbose, sampleName, onlyBiologicalCompounds);
        } catch (Exception e) {
            System.out.println("Error: " + e.getMessage());
            System.out.println("Error: Could not perform in silico fragmentation step.");
            System.exit(1);
        }
    } else {
        try {
            if (verbose) {
                if (!localdbIsSet)
                    System.out.println("start fragmenter with web database");
                else
                    System.out.println("start fragmenter with local database");
                System.out.println("using database " + database);
            }
            results = MetFrag.startConvenience(database, databaseIDs, formula, exactMass.getValue(), spec,
                    useProxy, mzabs.getValue(), mzppm.getValue(), searchppm.getValue(), true, breakRings,
                    treeDepth.getValue(), true, false, true, false, startindex.getValue(), endindex.getValue(),
                    true, pathToStoreFrags, numberThreads.getValue(), chemSpiderToken, verbose, sampleName,
                    localdb, onlyBiologicalCompounds, dblink, dbuser, dbpass, uniquebyinchi);
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("Error: " + e.getMessage());
            System.out.println("Error: Could not perform in silico fragmentation step.");
            System.exit(1);
        }
    }

    saveResults(results);

}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.QueryGenNMSLIB.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption(CommonParams.QUERY_FILE_PARAM, null, true, CommonParams.QUERY_FILE_DESC);
    options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC);
    options.addOption(CommonParams.KNN_QUERIES_PARAM, null, true, CommonParams.KNN_QUERIES_DESC);
    options.addOption(CommonParams.NMSLIB_FIELDS_PARAM, null, true, CommonParams.NMSLIB_FIELDS_DESC);
    options.addOption(CommonParams.MAX_NUM_QUERY_PARAM, null, true, CommonParams.MAX_NUM_QUERY_DESC);
    options.addOption(CommonParams.SEL_PROB_PARAM, null, true, CommonParams.SEL_PROB_DESC);

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    BufferedWriter knnQueries = null;

    int maxNumQuery = Integer.MAX_VALUE;

    Float selProb = null;/*from   w  w w.  j a  v  a  2  s.  co  m*/

    try {
        CommandLine cmd = parser.parse(options, args);
        String queryFile = null;

        if (cmd.hasOption(CommonParams.QUERY_FILE_PARAM)) {
            queryFile = cmd.getOptionValue(CommonParams.QUERY_FILE_PARAM);
        } else {
            Usage("Specify 'query file'", options);
        }

        String knnQueriesFile = cmd.getOptionValue(CommonParams.KNN_QUERIES_PARAM);

        if (null == knnQueriesFile)
            Usage("Specify '" + CommonParams.KNN_QUERIES_DESC + "'", options);

        String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM);
        if (tmpn != null) {
            try {
                maxNumQuery = Integer.parseInt(tmpn);
            } catch (NumberFormatException e) {
                Usage("Maximum number of queries isn't integer: '" + tmpn + "'", options);
            }
        }

        String tmps = cmd.getOptionValue(CommonParams.NMSLIB_FIELDS_PARAM);
        if (null == tmps)
            Usage("Specify '" + CommonParams.NMSLIB_FIELDS_DESC + "'", options);
        String nmslibFieldList[] = tmps.split(",");

        knnQueries = new BufferedWriter(new FileWriter(knnQueriesFile));
        knnQueries.write("isQueryFile=1");
        knnQueries.newLine();
        knnQueries.newLine();

        String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM);

        if (null == memIndexPref) {
            Usage("Specify '" + CommonParams.MEMINDEX_DESC + "'", options);
        }

        String tmpf = cmd.getOptionValue(CommonParams.SEL_PROB_PARAM);

        if (tmpf != null) {
            try {
                selProb = Float.parseFloat(tmpf);
            } catch (NumberFormatException e) {
                Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options);
            }
            if (selProb < Float.MIN_NORMAL || selProb + Float.MIN_NORMAL >= 1)
                Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options);
        }

        BufferedReader inpText = new BufferedReader(
                new InputStreamReader(CompressUtils.createInputStream(queryFile)));

        String docText = XmlHelper.readNextXMLIndexEntry(inpText);

        NmslibQueryGenerator queryGen = new NmslibQueryGenerator(nmslibFieldList, memIndexPref);

        Random rnd = new Random();

        for (int docNum = 1; docNum <= maxNumQuery
                && docText != null; ++docNum, docText = XmlHelper.readNextXMLIndexEntry(inpText)) {
            if (selProb != null) {
                if (rnd.nextFloat() > selProb)
                    continue;
            }

            Map<String, String> docFields = null;

            try {
                docFields = XmlHelper.parseXMLIndexEntry(docText);

                String queryObjStr = queryGen.getStrObjForKNNService(docFields);

                knnQueries.append(queryObjStr);
                knnQueries.newLine();
            } catch (SAXException e) {
                System.err.println("Parsing error, offending DOC:" + NL + docText + " doc # " + docNum);
                throw new Exception("Parsing error.");
            }
        }

        knnQueries.close();
    } catch (ParseException e) {
        Usage("Cannot parse arguments", options);
        if (null != knnQueries)
            try {
                knnQueries.close();
            } catch (IOException e1) {
                e1.printStackTrace();
            }
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        try {
            if (knnQueries != null)
                knnQueries.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
        System.exit(1);
    }

    System.out.println("Terminated successfully!");
}

From source file:apps.quantification.LearnQuantificationSVMLight.java

public static void main(String[] args) throws IOException {
    String cmdLineSyntax = LearnQuantificationSVMLight.class.getName()
            + " [OPTIONS] <path to svm_light_learn> <path to svm_light_classify> <trainingIndexDirectory> <outputDirectory>";

    Options options = new Options();

    OptionBuilder.withArgName("f");
    OptionBuilder.withDescription("Number of folds");
    OptionBuilder.withLongOpt("f");
    OptionBuilder.isRequired(true);/* w ww .j ava  2s . c o  m*/
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("c");
    OptionBuilder.withDescription("The c value for svm_light (default 1)");
    OptionBuilder.withLongOpt("c");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("k");
    OptionBuilder.withDescription("Kernel type (default 0: linear, 1: polynomial, 2: RBF, 3: sigmoid)");
    OptionBuilder.withLongOpt("k");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("t");
    OptionBuilder.withDescription("Path for temporary files");
    OptionBuilder.withLongOpt("t");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("v");
    OptionBuilder.withDescription("Verbose output");
    OptionBuilder.withLongOpt("v");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("s");
    OptionBuilder.withDescription("Don't delete temporary training file in svm_light format (default: delete)");
    OptionBuilder.withLongOpt("s");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    SvmLightLearnerCustomizer classificationLearnerCustomizer = null;
    SvmLightClassifierCustomizer classificationCustomizer = null;

    int folds = -1;

    GnuParser parser = new GnuParser();
    String[] remainingArgs = null;
    try {
        CommandLine line = parser.parse(options, args);

        remainingArgs = line.getArgs();

        classificationLearnerCustomizer = new SvmLightLearnerCustomizer(remainingArgs[0]);
        classificationCustomizer = new SvmLightClassifierCustomizer(remainingArgs[1]);

        folds = Integer.parseInt(line.getOptionValue("f"));

        if (line.hasOption("c"))
            classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c")));

        if (line.hasOption("k")) {
            System.out.println("Kernel type: " + line.getOptionValue("k"));
            classificationLearnerCustomizer.setKernelType(Integer.parseInt(line.getOptionValue("k")));
        }

        if (line.hasOption("v"))
            classificationLearnerCustomizer.printSvmLightOutput(true);

        if (line.hasOption("s"))
            classificationLearnerCustomizer.setDeleteTrainingFiles(false);

        if (line.hasOption("t")) {
            classificationLearnerCustomizer.setTempPath(line.getOptionValue("t"));
            classificationCustomizer.setTempPath(line.getOptionValue("t"));
        }

    } catch (Exception exp) {
        System.err.println("Parsing failed.  Reason: " + exp.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    assert (classificationLearnerCustomizer != null);

    if (remainingArgs.length != 4) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    String indexFile = remainingArgs[2];

    File file = new File(indexFile);

    String indexName = file.getName();
    String indexPath = file.getParent();

    String outputPath = remainingArgs[3];

    SvmLightLearner classificationLearner = new SvmLightLearner();

    classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer);

    FileSystemStorageManager fssm = new FileSystemStorageManager(indexPath, false);
    fssm.open();

    IIndex training = TroveReadWriteHelper.readIndex(fssm, indexName, TroveContentDBType.Full,
            TroveClassificationDBType.Full);

    final TextualProgressBar progressBar = new TextualProgressBar("Learning the quantifiers");

    IOperationStatusListener status = new IOperationStatusListener() {

        @Override
        public void operationStatus(double percentage) {
            progressBar.signal((int) percentage);
        }
    };

    QuantificationLearner quantificationLearner = new QuantificationLearner(folds, classificationLearner,
            classificationLearnerCustomizer, classificationCustomizer, ClassificationMode.PER_CATEGORY,
            new LogisticFunction(), status);

    IQuantifier[] quantifiers = quantificationLearner.learn(training);

    File executableFile = new File(classificationLearnerCustomizer.getSvmLightLearnPath());
    IDataManager classifierDataManager = new SvmLightDataManager(new SvmLightClassifierCustomizer(
            executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_light_classify"));
    String description = "_SVMLight_C-" + classificationLearnerCustomizer.getC() + "_K-"
            + classificationLearnerCustomizer.getKernelType();
    if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0)
        description += "_" + classificationLearnerCustomizer.getAdditionalParameters();
    String quantifierPrefix = indexName + "_Quantifier-" + folds + description;

    FileSystemStorageManager fssmo = new FileSystemStorageManager(
            outputPath + File.separatorChar + quantifierPrefix, true);
    fssmo.open();
    QuantificationLearner.write(quantifiers, fssmo, classifierDataManager);
    fssmo.close();

    BufferedWriter bfs = new BufferedWriter(
            new FileWriter(outputPath + File.separatorChar + quantifierPrefix + "_rates.txt"));
    TShortDoubleHashMap simpleTPRs = quantificationLearner.getSimpleTPRs();
    TShortDoubleHashMap simpleFPRs = quantificationLearner.getSimpleFPRs();
    TShortDoubleHashMap scaledTPRs = quantificationLearner.getScaledTPRs();
    TShortDoubleHashMap scaledFPRs = quantificationLearner.getScaledFPRs();

    ContingencyTableSet contingencyTableSet = quantificationLearner.getContingencyTableSet();

    short[] cats = simpleTPRs.keys();
    for (int i = 0; i < cats.length; ++i) {
        short cat = cats[i];
        String catName = training.getCategoryDB().getCategoryName(cat);
        ContingencyTable contingencyTable = contingencyTableSet.getCategoryContingencyTable(cat);
        double simpleTPR = simpleTPRs.get(cat);
        double simpleFPR = simpleFPRs.get(cat);
        double scaledTPR = scaledTPRs.get(cat);
        double scaledFPR = scaledFPRs.get(cat);
        String line = quantifierPrefix + "\ttrain\tsimple\t" + catName + "\t" + cat + "\t"
                + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t"
                + contingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n";
        bfs.write(line);
        line = quantifierPrefix + "\ttrain\tscaled\t" + catName + "\t" + cat + "\t" + contingencyTable.tp()
                + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn()
                + "\t" + scaledTPR + "\t" + scaledFPR + "\n";
        bfs.write(line);
    }
    bfs.close();
}

From source file:apps.quantification.LearnQuantificationSVMPerf.java

public static void main(String[] args) throws IOException {
    String cmdLineSyntax = LearnQuantificationSVMPerf.class.getName()
            + " [OPTIONS] <path to svm_perf_learn> <path to svm_perf_classify> <trainingIndexDirectory> <outputDirectory>";

    Options options = new Options();

    OptionBuilder.withArgName("f");
    OptionBuilder.withDescription("Number of folds");
    OptionBuilder.withLongOpt("f");
    OptionBuilder.isRequired(true);/*from   w w w.j av a  2 s  .  c  o  m*/
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("c");
    OptionBuilder.withDescription("The c value for svm_perf (default 0.01)");
    OptionBuilder.withLongOpt("c");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("t");
    OptionBuilder.withDescription("Path for temporary files");
    OptionBuilder.withLongOpt("t");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("l");
    OptionBuilder.withDescription("The loss function to optimize (default 2):\n"
            + "               0  Zero/one loss: 1 if vector of predictions contains error, 0 otherwise.\n"
            + "               1  F1: 100 minus the F1-score in percent.\n"
            + "               2  Errorrate: Percentage of errors in prediction vector.\n"
            + "               3  Prec/Rec Breakeven: 100 minus PRBEP in percent.\n"
            + "               4  Prec@p: 100 minus precision at p in percent.\n"
            + "               5  Rec@p: 100 minus recall at p in percent.\n"
            + "               10  ROCArea: Percentage of swapped pos/neg pairs (i.e. 100 - ROCArea).");
    OptionBuilder.withLongOpt("l");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("w");
    OptionBuilder.withDescription("Choice of structural learning algorithm (default 9):\n"
            + "               0: n-slack algorithm described in [2]\n"
            + "               1: n-slack algorithm with shrinking heuristic\n"
            + "               2: 1-slack algorithm (primal) described in [5]\n"
            + "               3: 1-slack algorithm (dual) described in [5]\n"
            + "               4: 1-slack algorithm (dual) with constraint cache [5]\n"
            + "               9: custom algorithm in svm_struct_learn_custom.c");
    OptionBuilder.withLongOpt("w");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("p");
    OptionBuilder.withDescription("The value of p used by the prec@p and rec@p loss functions (default 0)");
    OptionBuilder.withLongOpt("p");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("v");
    OptionBuilder.withDescription("Verbose output");
    OptionBuilder.withLongOpt("v");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("s");
    OptionBuilder.withDescription("Don't delete temporary training file in svm_perf format (default: delete)");
    OptionBuilder.withLongOpt("s");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    SvmPerfLearnerCustomizer classificationLearnerCustomizer = null;
    SvmPerfClassifierCustomizer classificationCustomizer = null;

    int folds = -1;

    GnuParser parser = new GnuParser();
    String[] remainingArgs = null;
    try {
        CommandLine line = parser.parse(options, args);

        remainingArgs = line.getArgs();

        classificationLearnerCustomizer = new SvmPerfLearnerCustomizer(remainingArgs[0]);
        classificationCustomizer = new SvmPerfClassifierCustomizer(remainingArgs[1]);

        folds = Integer.parseInt(line.getOptionValue("f"));

        if (line.hasOption("c"))
            classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c")));

        if (line.hasOption("w"))
            classificationLearnerCustomizer.setW(Integer.parseInt(line.getOptionValue("w")));

        if (line.hasOption("p"))
            classificationLearnerCustomizer.setP(Integer.parseInt(line.getOptionValue("p")));

        if (line.hasOption("l"))
            classificationLearnerCustomizer.setL(Integer.parseInt(line.getOptionValue("l")));

        if (line.hasOption("v"))
            classificationLearnerCustomizer.printSvmPerfOutput(true);

        if (line.hasOption("s"))
            classificationLearnerCustomizer.setDeleteTrainingFiles(false);

        if (line.hasOption("t")) {
            classificationLearnerCustomizer.setTempPath(line.getOptionValue("t"));
            classificationCustomizer.setTempPath(line.getOptionValue("t"));
        }

    } catch (Exception exp) {
        System.err.println("Parsing failed.  Reason: " + exp.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    assert (classificationLearnerCustomizer != null);

    if (remainingArgs.length != 4) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    String indexFile = remainingArgs[2];

    File file = new File(indexFile);

    String indexName = file.getName();
    String indexPath = file.getParent();

    String outputPath = remainingArgs[3];

    SvmPerfLearner classificationLearner = new SvmPerfLearner();

    classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer);

    FileSystemStorageManager fssm = new FileSystemStorageManager(indexPath, false);
    fssm.open();

    IIndex training = TroveReadWriteHelper.readIndex(fssm, indexName, TroveContentDBType.Full,
            TroveClassificationDBType.Full);

    final TextualProgressBar progressBar = new TextualProgressBar("Learning the quantifiers");

    IOperationStatusListener status = new IOperationStatusListener() {

        @Override
        public void operationStatus(double percentage) {
            progressBar.signal((int) percentage);
        }
    };

    QuantificationLearner quantificationLearner = new QuantificationLearner(folds, classificationLearner,
            classificationLearnerCustomizer, classificationCustomizer, ClassificationMode.PER_CATEGORY,
            new LogisticFunction(), status);

    IQuantifier[] quantifiers = quantificationLearner.learn(training);

    File executableFile = new File(classificationLearnerCustomizer.getSvmPerfLearnPath());
    IDataManager classifierDataManager = new SvmPerfDataManager(new SvmPerfClassifierCustomizer(
            executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_perf_classify"));
    String description = "_SVMPerf_C-" + classificationLearnerCustomizer.getC() + "_W-"
            + classificationLearnerCustomizer.getW() + "_L-" + classificationLearnerCustomizer.getL();
    if (classificationLearnerCustomizer.getL() == 4 || classificationLearnerCustomizer.getL() == 5)
        description += "_P-" + classificationLearnerCustomizer.getP();
    if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0)
        description += "_" + classificationLearnerCustomizer.getAdditionalParameters();
    String quantifierPrefix = indexName + "_Quantifier-" + folds + description;

    FileSystemStorageManager fssmo = new FileSystemStorageManager(
            outputPath + File.separatorChar + quantifierPrefix, true);
    fssmo.open();
    QuantificationLearner.write(quantifiers, fssmo, classifierDataManager);
    fssmo.close();

    BufferedWriter bfs = new BufferedWriter(
            new FileWriter(outputPath + File.separatorChar + quantifierPrefix + "_rates.txt"));
    TShortDoubleHashMap simpleTPRs = quantificationLearner.getSimpleTPRs();
    TShortDoubleHashMap simpleFPRs = quantificationLearner.getSimpleFPRs();
    TShortDoubleHashMap scaledTPRs = quantificationLearner.getScaledTPRs();
    TShortDoubleHashMap scaledFPRs = quantificationLearner.getScaledFPRs();

    ContingencyTableSet contingencyTableSet = quantificationLearner.getContingencyTableSet();

    short[] cats = simpleTPRs.keys();
    for (int i = 0; i < cats.length; ++i) {
        short cat = cats[i];
        String catName = training.getCategoryDB().getCategoryName(cat);
        ContingencyTable contingencyTable = contingencyTableSet.getCategoryContingencyTable(cat);
        double simpleTPR = simpleTPRs.get(cat);
        double simpleFPR = simpleFPRs.get(cat);
        double scaledTPR = scaledTPRs.get(cat);
        double scaledFPR = scaledFPRs.get(cat);
        String line = quantifierPrefix + "\ttrain\tsimple\t" + catName + "\t" + cat + "\t"
                + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t"
                + contingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n";
        bfs.write(line);
        line = quantifierPrefix + "\ttrain\tscaled\t" + catName + "\t" + cat + "\t" + contingencyTable.tp()
                + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn()
                + "\t" + scaledTPR + "\t" + scaledFPR + "\n";
        bfs.write(line);
    }
    bfs.close();
}

From source file:metaTile.Main.java

/**
 * @param args/*w w w  .  j  a  v a  2 s. co  m*/
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    try {
        /* parse the command line arguments */
        // create the command line parser
        CommandLineParser parser = new PosixParser();

        // create the Options
        Options options = new Options();
        options.addOption("i", "input", true, "File to read original tile list from.");
        options.addOption("o", "output", true, "File to write shorter meta-tile list to.");
        options.addOption("m", "metatiles", true,
                "Number of tiles in x and y direction to group into one meta-tile.");

        // parse the command line arguments
        CommandLine commandLine = parser.parse(options, args);

        if (!commandLine.hasOption("input") || !commandLine.hasOption("output")
                || !commandLine.hasOption("metatiles"))
            printUsage(options);

        String inputFileName = commandLine.getOptionValue("input");
        String outputFileName = commandLine.getOptionValue("output");
        int metaTileSize = Integer.parseInt(commandLine.getOptionValue("metatiles"));

        ArrayList<RenderingTile> tiles = new ArrayList<RenderingTile>();

        BufferedReader tileListReader = new BufferedReader(new FileReader(new File(inputFileName)));

        BufferedWriter renderMetatileListWriter = new BufferedWriter(new FileWriter(new File(outputFileName)));

        String line = tileListReader.readLine();
        while (line != null) {
            String[] columns = line.split("/");

            if (columns.length == 3)
                tiles.add(new RenderingTile(Integer.parseInt(columns[0]), Integer.parseInt(columns[1]),
                        Integer.parseInt(columns[2])));

            line = tileListReader.readLine();
        }

        tileListReader.close();

        int hits = 0;

        // tiles which we are already rendering as the top left corner of 4x4 metatiles
        HashSet<RenderingTile> whitelist = new HashSet<RenderingTile>();

        // for each tile in the list see if it has a meta-tile in the whitelist already
        for (int i = 0; i < tiles.size(); i++) {
            boolean hit = false; // by default we aren't already rendering this tile as part of another metatile
            for (int dx = 0; dx < metaTileSize; dx++) {
                for (int dy = 0; dy < metaTileSize; dy++) {
                    RenderingTile candidate = new RenderingTile(tiles.get(i).z, tiles.get(i).x - dx,
                            tiles.get(i).y - dy);
                    if (whitelist.contains(candidate)) {
                        hit = true;
                        // now exit the two for loops iterating over tiles inside a meta-tile
                        dx = metaTileSize;
                        dy = metaTileSize;
                    }
                }
            }

            // if this tile doesn't already have a meta-tile in the whitelist, add it
            if (hit == false) {
                hits++;
                renderMetatileListWriter.write(tiles.get(i).toString() + "/" + metaTileSize + "\n");
                whitelist.add(tiles.get(i));
            }
        }
        renderMetatileListWriter.close();
        System.out.println(
                "Reduced " + tiles.size() + " tiles into " + hits + " metatiles of size " + metaTileSize);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:cc.redberry.core.performance.StableSort.java

/**
 * @param args the command line arguments
 *///w  ww. ja va 2 s  . c o m
public static void main(String[] args) {
    try {

        //burn JVM
        BitsStreamGenerator bitsStreamGenerator = new Well19937c();

        for (int i = 0; i < 1000; ++i)
            nextArray(1000, bitsStreamGenerator);

        System.out.println("!");
        BufferedWriter timMeanOut = new BufferedWriter(
                new FileWriter("/home/stas/Projects/stableSort/timMean.dat"));
        BufferedWriter insertionMeanOut = new BufferedWriter(
                new FileWriter("/home/stas/Projects/stableSort/insertionMean.dat"));

        BufferedWriter timMaxOut = new BufferedWriter(
                new FileWriter("/home/stas/Projects/stableSort/timMax.dat"));
        BufferedWriter insertionMaxOut = new BufferedWriter(
                new FileWriter("/home/stas/Projects/stableSort/insertionMax.dat"));

        BufferedWriter timSigOut = new BufferedWriter(
                new FileWriter("/home/stas/Projects/stableSort/timSig.dat"));
        BufferedWriter insertionSigOut = new BufferedWriter(
                new FileWriter("/home/stas/Projects/stableSort/insertionSig.dat"));

        DescriptiveStatistics timSort;
        DescriptiveStatistics insertionSort;

        int tryies = 200;
        int arrayLength = 0;
        for (; arrayLength < 1000; ++arrayLength) {

            int[] coSort = nextArray(arrayLength, bitsStreamGenerator);

            timSort = new DescriptiveStatistics();
            insertionSort = new DescriptiveStatistics();
            for (int i = 0; i < tryies; ++i) {
                int[] t1 = nextArray(arrayLength, bitsStreamGenerator);
                int[] t2 = t1.clone();

                long start = System.currentTimeMillis();
                ArraysUtils.timSort(t1, coSort);
                long stop = System.currentTimeMillis();
                timSort.addValue(stop - start);

                start = System.currentTimeMillis();
                ArraysUtils.insertionSort(t2, coSort);
                stop = System.currentTimeMillis();
                insertionSort.addValue(stop - start);
            }
            timMeanOut.write(arrayLength + "\t" + timSort.getMean() + "\n");
            insertionMeanOut.write(arrayLength + "\t" + insertionSort.getMean() + "\n");

            timMaxOut.write(arrayLength + "\t" + timSort.getMax() + "\n");
            insertionMaxOut.write(arrayLength + "\t" + insertionSort.getMax() + "\n");

            timSigOut.write(arrayLength + "\t" + timSort.getStandardDeviation() + "\n");
            insertionSigOut.write(arrayLength + "\t" + insertionSort.getStandardDeviation() + "\n");
        }
        timMeanOut.close();
        insertionMeanOut.close();
        timMaxOut.close();
        insertionMaxOut.close();
        timSigOut.close();
        insertionSigOut.close();
    } catch (IOException ex) {
        Logger.getLogger(StableSort.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.BuildRetrofitLexicons.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption(CommonParams.GIZA_ROOT_DIR_PARAM, null, true, CommonParams.GIZA_ROOT_DIR_DESC);
    options.addOption(CommonParams.GIZA_ITER_QTY_PARAM, null, true, CommonParams.GIZA_ITER_QTY_DESC);
    options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC);
    options.addOption(OUT_FILE_PARAM, null, true, OUT_FILE_DESC);
    options.addOption(MIN_PROB_PARAM, null, true, MIN_PROB_DESC);
    options.addOption(FORMAT_PARAM, null, true, FORMAT_DESC);

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    try {/*from   w ww. j a v a 2  s. c o  m*/
        CommandLine cmd = parser.parse(options, args);
        String gizaRootDir = cmd.getOptionValue(CommonParams.GIZA_ROOT_DIR_PARAM);
        int gizaIterQty = -1;

        if (cmd.hasOption(CommonParams.GIZA_ITER_QTY_PARAM)) {
            gizaIterQty = Integer.parseInt(cmd.getOptionValue(CommonParams.GIZA_ITER_QTY_PARAM));
        } else {
            Usage("Specify: " + CommonParams.GIZA_ITER_QTY_PARAM, options);
        }
        String outFileName = cmd.getOptionValue(OUT_FILE_PARAM);
        if (null == outFileName) {
            Usage("Specify: " + OUT_FILE_PARAM, options);
        }

        String indexDir = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM);

        if (null == indexDir) {
            Usage("Specify: " + CommonParams.MEMINDEX_DESC, options);
        }

        FormatType outType = FormatType.kOrig;

        String outTypeStr = cmd.getOptionValue(FORMAT_PARAM);

        if (null != outTypeStr) {
            if (outTypeStr.equals(ORIG_TYPE)) {
                outType = FormatType.kOrig;
            } else if (outTypeStr.equals(WEIGHTED_TYPE)) {
                outType = FormatType.kWeighted;
            } else if (outTypeStr.equals(UNWEIGHTED_TYPE)) {
                outType = FormatType.kUnweighted;
            } else {
                Usage("Unknown format type: " + outTypeStr, options);
            }
        }

        float minProb = 0;

        if (cmd.hasOption(MIN_PROB_PARAM)) {
            minProb = Float.parseFloat(cmd.getOptionValue(MIN_PROB_PARAM));
        } else {
            Usage("Specify: " + MIN_PROB_PARAM, options);
        }

        System.out.println(String.format(
                "Saving lexicon to '%s' (output format '%s'), keep only entries with translation probability >= %f",
                outFileName, outType.toString(), minProb));

        // We use unlemmatized text here, because lemmatized dictionary is going to be mostly subset of the unlemmatized one.
        InMemForwardIndex textIndex = new InMemForwardIndex(FeatureExtractor.indexFileName(indexDir,
                FeatureExtractor.mFieldNames[FeatureExtractor.TEXT_UNLEMM_FIELD_ID]));
        InMemForwardIndexFilterAndRecoder filterAndRecoder = new InMemForwardIndexFilterAndRecoder(textIndex);

        String prefix = gizaRootDir + "/" + FeatureExtractor.mFieldNames[FeatureExtractor.TEXT_UNLEMM_FIELD_ID]
                + "/";
        GizaVocabularyReader answVoc = new GizaVocabularyReader(prefix + "source.vcb", filterAndRecoder);
        GizaVocabularyReader questVoc = new GizaVocabularyReader(prefix + "target.vcb", filterAndRecoder);

        GizaTranTableReaderAndRecoder gizaTable = new GizaTranTableReaderAndRecoder(false, // we don't need to flip the table for the purpose 
                prefix + "/output.t1." + gizaIterQty, filterAndRecoder, answVoc, questVoc,
                (float) FeatureExtractor.DEFAULT_PROB_SELF_TRAN, minProb);
        BufferedWriter outFile = new BufferedWriter(new FileWriter(outFileName));

        for (int srcWordId = 0; srcWordId <= textIndex.getMaxWordId(); ++srcWordId) {
            GizaOneWordTranRecs tranRecs = gizaTable.getTranProbs(srcWordId);

            if (null != tranRecs) {
                String wordSrc = textIndex.getWord(srcWordId);
                StringBuffer sb = new StringBuffer();
                sb.append(wordSrc);

                for (int k = 0; k < tranRecs.mDstIds.length; ++k) {
                    float prob = tranRecs.mProbs[k];
                    if (prob >= minProb) {
                        int dstWordId = tranRecs.mDstIds[k];

                        if (dstWordId == srcWordId && outType != FormatType.kWeighted)
                            continue; // Don't duplicate the word, unless it's probability weighted

                        sb.append(' ');
                        String dstWord = textIndex.getWord(dstWordId);
                        if (null == dstWord) {
                            throw new Exception(
                                    "Bug or inconsistent data: Couldn't retriev a word for wordId = "
                                            + dstWordId);
                        }
                        if (dstWord.indexOf(':') >= 0)
                            throw new Exception(
                                    "Illegal dictionary word '" + dstWord + "' b/c it contains ':'");
                        sb.append(dstWord);
                        if (outType != FormatType.kOrig) {
                            sb.append(':');
                            sb.append(outType == FormatType.kWeighted ? prob : 1);
                        }
                    }
                }

                outFile.write(sb.toString());
                outFile.newLine();
            }
        }

        outFile.close();
    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments", options);
    } catch (Exception e) {
        e.printStackTrace();
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }

    System.out.println("Terminated successfully!");

}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.FilterTranTable.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption(INPUT_PARAM, null, true, INPUT_DESC);
    options.addOption(OUTPUT_PARAM, null, true, OUTPUT_DESC);
    options.addOption(CommonParams.MEM_FWD_INDEX_PARAM, null, true, CommonParams.MEM_FWD_INDEX_DESC);
    options.addOption(CommonParams.GIZA_ITER_QTY_PARAM, null, true, CommonParams.GIZA_ITER_QTY_PARAM);
    options.addOption(CommonParams.GIZA_ROOT_DIR_PARAM, null, true, CommonParams.GIZA_ROOT_DIR_PARAM);
    options.addOption(CommonParams.MIN_PROB_PARAM, null, true, CommonParams.MIN_PROB_DESC);
    options.addOption(CommonParams.MAX_WORD_QTY_PARAM, null, true, CommonParams.MAX_WORD_QTY_PARAM);

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    try {//from   ww w  .ja v a  2  s  .com
        CommandLine cmd = parser.parse(options, args);

        String outputFile = null;

        outputFile = cmd.getOptionValue(OUTPUT_PARAM);
        if (null == outputFile) {
            Usage("Specify 'A name of the output file'", options);
        }

        String gizaRootDir = cmd.getOptionValue(CommonParams.GIZA_ROOT_DIR_PARAM);
        if (null == gizaRootDir) {
            Usage("Specify '" + CommonParams.GIZA_ROOT_DIR_DESC + "'", options);
        }

        String gizaIterQty = cmd.getOptionValue(CommonParams.GIZA_ITER_QTY_PARAM);

        if (null == gizaIterQty) {
            Usage("Specify '" + CommonParams.GIZA_ITER_QTY_DESC + "'", options);
        }

        float minProb = 0;

        String tmpf = cmd.getOptionValue(CommonParams.MIN_PROB_PARAM);

        if (tmpf != null) {
            minProb = Float.parseFloat(tmpf);
        }

        int maxWordQty = Integer.MAX_VALUE;

        String tmpi = cmd.getOptionValue(CommonParams.MAX_WORD_QTY_PARAM);

        if (null != tmpi) {
            maxWordQty = Integer.parseInt(tmpi);
        }

        String memFwdIndxName = cmd.getOptionValue(CommonParams.MEM_FWD_INDEX_PARAM);
        if (null == memFwdIndxName) {
            Usage("Specify '" + CommonParams.MEM_FWD_INDEX_DESC + "'", options);
        }

        System.out.println("Filtering index: " + memFwdIndxName + " max # of frequent words: " + maxWordQty
                + " min. probability:" + minProb);

        VocabularyFilterAndRecoder filter = new FrequentIndexWordFilterAndRecoder(memFwdIndxName, maxWordQty);

        String srcVocFile = CompressUtils.findFileVariant(gizaRootDir + "/source.vcb");

        System.out.println("Source vocabulary file: " + srcVocFile);

        GizaVocabularyReader srcVoc = new GizaVocabularyReader(srcVocFile, filter);

        String dstVocFile = CompressUtils.findFileVariant(gizaRootDir + "/target.vcb");

        System.out.println("Target vocabulary file: " + dstVocFile);

        GizaVocabularyReader dstVoc = new GizaVocabularyReader(CompressUtils.findFileVariant(dstVocFile),
                filter);

        String inputFile = CompressUtils.findFileVariant(gizaRootDir + "/output.t1." + gizaIterQty);

        BufferedReader finp = new BufferedReader(
                new InputStreamReader(CompressUtils.createInputStream(inputFile)));

        BufferedWriter fout = new BufferedWriter(
                new OutputStreamWriter(CompressUtils.createOutputStream(outputFile)));

        try {
            String line;
            int prevSrcId = -1;
            int wordQty = 0;
            long addedQty = 0;
            long totalQty = 0;
            boolean isNotFiltered = false;

            for (totalQty = 0; (line = finp.readLine()) != null;) {
                ++totalQty;
                // Skip empty lines
                line = line.trim();
                if (line.isEmpty())
                    continue;

                GizaTranRec rec = new GizaTranRec(line);

                if (rec.mSrcId != prevSrcId) {
                    ++wordQty;
                }
                if (totalQty % REPORT_INTERVAL_QTY == 0) {
                    System.out.println(String.format(
                            "Processed %d lines (%d source word entries) from '%s', added %d lines", totalQty,
                            wordQty, inputFile, addedQty));
                }

                // isNotFiltered should be set after procOneWord
                if (rec.mSrcId != prevSrcId) {
                    if (rec.mSrcId == 0)
                        isNotFiltered = true;
                    else {
                        String wordSrc = srcVoc.getWord(rec.mSrcId);
                        isNotFiltered = filter == null || (wordSrc != null && filter.checkWord(wordSrc));
                    }
                }

                prevSrcId = rec.mSrcId;

                if (rec.mProb >= minProb && isNotFiltered) {
                    String wordDst = dstVoc.getWord(rec.mDstId);

                    if (filter == null || (wordDst != null && filter.checkWord(wordDst))) {
                        fout.write(String.format(rec.mSrcId + " " + rec.mDstId + " " + rec.mProb));
                        fout.newLine();
                        addedQty++;
                    }
                }
            }

            System.out.println(
                    String.format("Processed %d lines (%d source word entries) from '%s', added %d lines",
                            totalQty, wordQty, inputFile, addedQty));

        } finally {
            finp.close();
            fout.close();
        }
    } catch (ParseException e) {
        Usage("Cannot parse arguments", options);
    } catch (Exception e) {
        e.printStackTrace();
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }
}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.CollectionDiffer.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i1", null, true, "Input file 1");
    options.addOption("i2", null, true, "Input file 2");
    options.addOption("o", null, true, "Output file");

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    try {/* w  w w .ja v a2s  .  com*/
        CommandLine cmd = parser.parse(options, args);

        InputStream input1 = null, input2 = null;

        if (cmd.hasOption("i1")) {
            input1 = CompressUtils.createInputStream(cmd.getOptionValue("i1"));
        } else {
            Usage("Specify 'Input file 1'");
        }
        if (cmd.hasOption("i2")) {
            input2 = CompressUtils.createInputStream(cmd.getOptionValue("i2"));
        } else {
            Usage("Specify 'Input file 2'");
        }

        HashSet<String> hSubj = new HashSet<String>();

        BufferedWriter out = null;

        if (cmd.hasOption("o")) {
            String outFile = cmd.getOptionValue("o");

            out = new BufferedWriter(new OutputStreamWriter(CompressUtils.createOutputStream(outFile)));
        } else {
            Usage("Specify 'Output file'");
        }

        XmlIterator inpIter2 = new XmlIterator(input2, YahooAnswersReader.DOCUMENT_TAG);

        int docNum = 1;
        for (String oneRec = inpIter2.readNext(); !oneRec.isEmpty(); oneRec = inpIter2.readNext(), ++docNum) {
            if (docNum % 10000 == 0) {
                System.out.println(String.format(
                        "Loaded and memorized questions for %d documents from the second input file", docNum));
            }
            ParsedQuestion q = YahooAnswersParser.parse(oneRec, false);
            hSubj.add(q.mQuestion);
        }

        XmlIterator inpIter1 = new XmlIterator(input1, YahooAnswersReader.DOCUMENT_TAG);

        System.out.println("=============================================");
        System.out.println("Memoization is done... now let's diff!!!");
        System.out.println("=============================================");

        docNum = 1;
        int skipOverlapQty = 0, skipErrorQty = 0;
        for (String oneRec = inpIter1.readNext(); !oneRec.isEmpty(); ++docNum, oneRec = inpIter1.readNext()) {
            if (docNum % 10000 == 0) {
                System.out.println(String.format("Processed %d documents from the first input file", docNum));
            }

            oneRec = oneRec.trim() + System.getProperty("line.separator");

            ParsedQuestion q = null;
            try {
                q = YahooAnswersParser.parse(oneRec, false);
            } catch (Exception e) {
                // If <bestanswer>...</bestanswer> is missing we may end up here...
                // This is a bit funny, because this element is supposed to be mandatory,
                // but it's not.
                System.err.println("Skipping due to parsing error, exception: " + e);
                skipErrorQty++;
                continue;
            }
            if (hSubj.contains(q.mQuestion.trim())) {
                //System.out.println(String.format("Skipping uri='%s', question='%s'", q.mQuestUri, q.mQuestion));
                skipOverlapQty++;
                continue;
            }

            out.write(oneRec);
        }
        System.out.println(
                String.format("Processed %d documents, skipped because of overlap/errors %d/%d documents",
                        docNum - 1, skipOverlapQty, skipErrorQty));
        out.close();
    } catch (ParseException e) {
        Usage("Cannot parse arguments");
    } catch (Exception e) {
        e.printStackTrace();
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }
}