Example usage for java.io BufferedWriter close

List of usage examples for java.io BufferedWriter close

Introduction

In this page you can find the example usage for java.io BufferedWriter close.

Prototype

@SuppressWarnings("try")
    public void close() throws IOException 

Source Link

Usage

From source file:edu.cmu.lti.oaqa.knn4qa.apps.ParsedPost.java

public static void main(String args[]) {

    Options options = new Options();

    options.addOption(INPUT_PARAM, null, true, INPUT_DESC);
    options.addOption(OUTPUT_PARAM, null, true, OUTPUT_DESC);
    options.addOption(CommonParams.MAX_NUM_REC_PARAM, null, true, CommonParams.MAX_NUM_REC_DESC);
    options.addOption(DEBUG_PRINT_PARAM, null, false, DEBUG_PRINT_DESC);
    options.addOption(EXCLUDE_CODE_PARAM, null, false, EXCLUDE_CODE_DESC);

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    HashMap<String, ParsedPost> hQuestions = new HashMap<String, ParsedPost>();

    try {//from ww w . j  ava  2 s  .  com
        CommandLine cmd = parser.parse(options, args);

        String inputFile = cmd.getOptionValue(INPUT_PARAM);

        if (null == inputFile)
            Usage("Specify: " + INPUT_PARAM, options);

        String outputFile = cmd.getOptionValue(OUTPUT_PARAM);

        if (null == outputFile)
            Usage("Specify: " + OUTPUT_PARAM, options);

        InputStream input = CompressUtils.createInputStream(inputFile);
        BufferedWriter output = new BufferedWriter(new FileWriter(new File(outputFile)));

        int maxNumRec = Integer.MAX_VALUE;

        String tmp = cmd.getOptionValue(CommonParams.MAX_NUM_REC_PARAM);

        if (tmp != null)
            maxNumRec = Integer.parseInt(tmp);

        boolean debug = cmd.hasOption(DEBUG_PRINT_PARAM);

        boolean excludeCode = cmd.hasOption(EXCLUDE_CODE_PARAM);

        System.out.println("Processing at most " + maxNumRec + " records, excluding code? " + excludeCode);

        XmlIterator xi = new XmlIterator(input, ROOT_POST_TAG);

        String elem;

        output.write("<?xml version='1.0' encoding='UTF-8'?><ystfeed>\n");

        for (int num = 1; num <= maxNumRec && !(elem = xi.readNext()).isEmpty(); ++num) {
            ParsedPost post = null;
            try {
                post = parsePost(elem, excludeCode);

                if (!post.mAcceptedAnswerId.isEmpty()) {
                    hQuestions.put(post.mId, post);
                } else if (post.mpostIdType.equals("2")) {
                    String parentId = post.mParentId;
                    String id = post.mId;
                    if (!parentId.isEmpty()) {
                        ParsedPost parentPost = hQuestions.get(parentId);
                        if (parentPost != null && parentPost.mAcceptedAnswerId.equals(id)) {
                            output.write(createYahooAnswersQuestion(parentPost, post));
                            hQuestions.remove(parentId);
                        }
                    }
                }

            } catch (Exception e) {
                e.printStackTrace();
                throw new Exception("Error parsing record # " + num + ", error message: " + e);
            }
            if (debug) {
                System.out.println(String.format("%s parentId=%s acceptedAnswerId=%s type=%s", post.mId,
                        post.mParentId, post.mAcceptedAnswerId, post.mpostIdType));
                System.out.println("================================");
                if (!post.mTitle.isEmpty()) {
                    System.out.println(post.mTitle);
                    System.out.println("--------------------------------");
                }
                System.out.println(post.mBody);
                System.out.println("================================");
            }
        }

        output.write("</ystfeed>\n");

        input.close();
        output.close();

    } catch (ParseException e) {
        Usage("Cannot parse arguments", options);
    } catch (Exception e) {
        e.printStackTrace();
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }

}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.QueryGenNMSLIB.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption(CommonParams.QUERY_FILE_PARAM, null, true, CommonParams.QUERY_FILE_DESC);
    options.addOption(CommonParams.MEMINDEX_PARAM, null, true, CommonParams.MEMINDEX_DESC);
    options.addOption(CommonParams.KNN_QUERIES_PARAM, null, true, CommonParams.KNN_QUERIES_DESC);
    options.addOption(CommonParams.NMSLIB_FIELDS_PARAM, null, true, CommonParams.NMSLIB_FIELDS_DESC);
    options.addOption(CommonParams.MAX_NUM_QUERY_PARAM, null, true, CommonParams.MAX_NUM_QUERY_DESC);
    options.addOption(CommonParams.SEL_PROB_PARAM, null, true, CommonParams.SEL_PROB_DESC);

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    BufferedWriter knnQueries = null;

    int maxNumQuery = Integer.MAX_VALUE;

    Float selProb = null;//  www .  j a v a 2s  .c  o m

    try {
        CommandLine cmd = parser.parse(options, args);
        String queryFile = null;

        if (cmd.hasOption(CommonParams.QUERY_FILE_PARAM)) {
            queryFile = cmd.getOptionValue(CommonParams.QUERY_FILE_PARAM);
        } else {
            Usage("Specify 'query file'", options);
        }

        String knnQueriesFile = cmd.getOptionValue(CommonParams.KNN_QUERIES_PARAM);

        if (null == knnQueriesFile)
            Usage("Specify '" + CommonParams.KNN_QUERIES_DESC + "'", options);

        String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM);
        if (tmpn != null) {
            try {
                maxNumQuery = Integer.parseInt(tmpn);
            } catch (NumberFormatException e) {
                Usage("Maximum number of queries isn't integer: '" + tmpn + "'", options);
            }
        }

        String tmps = cmd.getOptionValue(CommonParams.NMSLIB_FIELDS_PARAM);
        if (null == tmps)
            Usage("Specify '" + CommonParams.NMSLIB_FIELDS_DESC + "'", options);
        String nmslibFieldList[] = tmps.split(",");

        knnQueries = new BufferedWriter(new FileWriter(knnQueriesFile));
        knnQueries.write("isQueryFile=1");
        knnQueries.newLine();
        knnQueries.newLine();

        String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM);

        if (null == memIndexPref) {
            Usage("Specify '" + CommonParams.MEMINDEX_DESC + "'", options);
        }

        String tmpf = cmd.getOptionValue(CommonParams.SEL_PROB_PARAM);

        if (tmpf != null) {
            try {
                selProb = Float.parseFloat(tmpf);
            } catch (NumberFormatException e) {
                Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options);
            }
            if (selProb < Float.MIN_NORMAL || selProb + Float.MIN_NORMAL >= 1)
                Usage("A selection probability isn't a number in the range (0,1)'" + tmpf + "'", options);
        }

        BufferedReader inpText = new BufferedReader(
                new InputStreamReader(CompressUtils.createInputStream(queryFile)));

        String docText = XmlHelper.readNextXMLIndexEntry(inpText);

        NmslibQueryGenerator queryGen = new NmslibQueryGenerator(nmslibFieldList, memIndexPref);

        Random rnd = new Random();

        for (int docNum = 1; docNum <= maxNumQuery
                && docText != null; ++docNum, docText = XmlHelper.readNextXMLIndexEntry(inpText)) {
            if (selProb != null) {
                if (rnd.nextFloat() > selProb)
                    continue;
            }

            Map<String, String> docFields = null;

            try {
                docFields = XmlHelper.parseXMLIndexEntry(docText);

                String queryObjStr = queryGen.getStrObjForKNNService(docFields);

                knnQueries.append(queryObjStr);
                knnQueries.newLine();
            } catch (SAXException e) {
                System.err.println("Parsing error, offending DOC:" + NL + docText + " doc # " + docNum);
                throw new Exception("Parsing error.");
            }
        }

        knnQueries.close();
    } catch (ParseException e) {
        Usage("Cannot parse arguments", options);
        if (null != knnQueries)
            try {
                knnQueries.close();
            } catch (IOException e1) {
                e1.printStackTrace();
            }
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        try {
            if (knnQueries != null)
                knnQueries.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
        System.exit(1);
    }

    System.out.println("Terminated successfully!");
}

From source file:edu.cmu.lti.oaqa.annographix.apps.SolrQueryApp.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("u", null, true, "Solr URI");
    options.addOption("q", null, true, "Query");
    options.addOption("n", null, true, "Max # of results");
    options.addOption("o", null, true, "An optional TREC-style output file");
    options.addOption("w", null, false, "Do a warm-up query call, before each query");

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    BufferedWriter trecOutFile = null;

    try {/*from w w w .j  a  v  a2s.  com*/
        CommandLine cmd = parser.parse(options, args);
        String queryFile = null, solrURI = null;

        if (cmd.hasOption("u")) {
            solrURI = cmd.getOptionValue("u");
        } else {
            Usage("Specify Solr URI");
        }

        SolrServerWrapper solr = new SolrServerWrapper(solrURI);

        if (cmd.hasOption("q")) {
            queryFile = cmd.getOptionValue("q");
        } else {
            Usage("Specify Query file");
        }

        int numRet = 100;

        if (cmd.hasOption("n")) {
            numRet = Integer.parseInt(cmd.getOptionValue("n"));
        }

        if (cmd.hasOption("o")) {
            trecOutFile = new BufferedWriter(new FileWriter(new File(cmd.getOptionValue("o"))));
        }

        List<String> fieldList = new ArrayList<String>();
        fieldList.add(UtilConst.ID_FIELD);
        fieldList.add(UtilConst.SCORE_FIELD);

        double totalTime = 0;
        double retQty = 0;

        ArrayList<Double> queryTimes = new ArrayList<Double>();

        boolean bDoWarmUp = cmd.hasOption("w");

        if (bDoWarmUp) {
            System.out.println("Using a warmup step!");
        }

        int queryQty = 0;
        for (String t : FileUtils.readLines(new File(queryFile))) {
            t = t.trim();
            if (t.isEmpty())
                continue;
            int ind = t.indexOf('|');
            if (ind < 0)
                throw new Exception("Wrong format, line: '" + t + "'");
            String qID = t.substring(0, ind);
            String q = t.substring(ind + 1);

            SolrDocumentList res = null;

            if (bDoWarmUp) {
                res = solr.runQuery(q, fieldList, numRet);
            }

            Long tm1 = System.currentTimeMillis();
            res = solr.runQuery(q, fieldList, numRet);
            Long tm2 = System.currentTimeMillis();
            retQty += res.getNumFound();
            System.out.println(qID + " Obtained: " + res.getNumFound() + " entries in " + (tm2 - tm1) + " ms");
            double delta = (tm2 - tm1);
            totalTime += delta;
            queryTimes.add(delta);
            ++queryQty;

            if (trecOutFile != null) {

                ArrayList<SolrRes> resArr = new ArrayList<SolrRes>();
                for (SolrDocument doc : res) {
                    String id = (String) doc.getFieldValue(UtilConst.ID_FIELD);
                    float score = (Float) doc.getFieldValue(UtilConst.SCORE_FIELD);
                    resArr.add(new SolrRes(id, "", score));
                }
                SolrRes[] results = resArr.toArray(new SolrRes[resArr.size()]);
                Arrays.sort(results);

                SolrEvalUtils.saveTrecResults(qID, results, trecOutFile, TREC_RUN, results.length);
            }
        }
        double devTime = 0, meanTime = totalTime / queryQty;
        for (int i = 0; i < queryQty; ++i) {
            double d = queryTimes.get(i) - meanTime;
            devTime += d * d;
        }
        devTime = Math.sqrt(devTime / (queryQty - 1));
        System.out.println(String.format("Query time, mean/standard dev: %.2f/%.2f (ms)", meanTime, devTime));
        System.out.println(String.format("Avg # of docs returned: %.2f", retQty / queryQty));

        solr.close();
        trecOutFile.close();
    } catch (ParseException e) {
        Usage("Cannot parse arguments");
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }

}

From source file:apps.LuceneQuery.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("d", null, true, "index directory");
    options.addOption("i", null, true, "input file");
    options.addOption("s", null, true, "stop word file");
    options.addOption("n", null, true, "max # of results");
    options.addOption("o", null, true, "a TREC-style output file");
    options.addOption("r", null, true, "an optional QREL file, if specified,"
            + "we save results only for queries for which we find at least one relevant entry.");

    options.addOption("prob", null, true, "question sampling probability");
    options.addOption("max_query_qty", null, true, "a maximum number of queries to run");
    options.addOption("bm25_b", null, true, "BM25 parameter: b");
    options.addOption("bm25_k1", null, true, "BM25 parameter: k1");
    options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity");

    options.addOption("seed", null, true, "random seed");

    Joiner commaJoin = Joiner.on(',');
    Joiner spaceJoin = Joiner.on(' ');

    options.addOption("source_type", null, true,
            "query source type: " + commaJoin.join(SourceFactory.getQuerySourceList()));

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    QrelReader qrels = null;//from   w w w .j a v  a2  s . com

    try {

        CommandLine cmd = parser.parse(options, args);

        String indexDir = null;

        if (cmd.hasOption("d")) {
            indexDir = cmd.getOptionValue("d");
        } else {
            Usage("Specify 'index directory'", options);
        }

        String inputFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        DictNoComments stopWords = null;

        if (cmd.hasOption("s")) {
            String stopWordFileName = cmd.getOptionValue("s");
            stopWords = new DictNoComments(new File(stopWordFileName), true /* lowercasing */);
            System.out.println("Using the stopword file: " + stopWordFileName);
        }

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        int numRet = 100;

        if (cmd.hasOption("n")) {
            numRet = Integer.parseInt(cmd.getOptionValue("n"));
            System.out.println("Retrieving at most " + numRet + " candidate entries.");
        }

        String trecOutFileName = null;

        if (cmd.hasOption("o")) {
            trecOutFileName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'a TREC-style output file'", options);
        }

        double fProb = 1.0f;

        if (cmd.hasOption("prob")) {
            try {
                fProb = Double.parseDouble(cmd.getOptionValue("prob"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'question sampling probability'", options);
            }
        }

        if (fProb <= 0 || fProb > 1) {
            Usage("Question sampling probability should be >0 and <=1", options);
        }

        System.out.println("Sample the following fraction of questions: " + fProb);

        float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT;

        if (cmd.hasOption("bm25_k1")) {
            try {
                bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_k1'", options);
            }
        }

        if (cmd.hasOption("bm25_b")) {
            try {
                bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_b'", options);
            }
        }

        long seed = 0;

        String tmpl = cmd.getOptionValue("seed");

        if (tmpl != null)
            seed = Long.parseLong(tmpl);

        System.out.println("Using seed: " + seed);

        Random randGen = new Random(seed);

        System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b));

        boolean useFixedBM25 = cmd.hasOption("bm25fixed");

        EnglishAnalyzer analyzer = new EnglishAnalyzer();
        Similarity similarity = null;

        if (useFixedBM25) {
            System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b));
            similarity = new BM25SimilarityFix(bm25_k1, bm25_b);
        } else {
            System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b));
            similarity = new BM25Similarity(bm25_k1, bm25_b);
        }

        int maxQueryQty = Integer.MAX_VALUE;

        if (cmd.hasOption("max_query_qty")) {
            try {
                maxQueryQty = Integer.parseInt(cmd.getOptionValue("max_query_qty"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'max_query_qty'", options);
            }
        }

        System.out.println(String.format("Executing at most %d queries", maxQueryQty));

        if (cmd.hasOption("r")) {
            String qrelFile = cmd.getOptionValue("r");
            System.out.println("Using the qrel file: '" + qrelFile
                    + "', queries not returning a relevant entry will be ignored.");
            qrels = new QrelReader(qrelFile);
        }

        System.out.println(String.format("Using indexing directory %s", indexDir));

        LuceneCandidateProvider candProvider = new LuceneCandidateProvider(indexDir, analyzer, similarity);
        TextCleaner textCleaner = new TextCleaner(stopWords);

        QuerySource inpQuerySource = SourceFactory.createQuerySource(sourceName, inputFileName);
        QueryEntry inpQuery = null;

        BufferedWriter trecOutFile = new BufferedWriter(new FileWriter(new File(trecOutFileName)));

        int questNum = 0, questQty = 0;

        long totalTimeMS = 0;

        while ((inpQuery = inpQuerySource.next()) != null) {
            if (questQty >= maxQueryQty)
                break;
            ++questNum;

            String queryID = inpQuery.mQueryId;

            if (randGen.nextDouble() <= fProb) {
                ++questQty;

                String tokQuery = spaceJoin.join(textCleaner.cleanUp(inpQuery.mQueryText));
                String query = TextCleaner.luceneSafeCleanUp(tokQuery).trim();

                ResEntry[] results = null;

                if (query.isEmpty()) {
                    results = new ResEntry[0];
                    System.out.println(String.format("WARNING, empty query id = '%s'", inpQuery.mQueryId));
                } else {

                    try {
                        long start = System.currentTimeMillis();

                        results = candProvider.getCandidates(questNum, query, numRet);

                        long end = System.currentTimeMillis();
                        long searchTimeMS = end - start;
                        totalTimeMS += searchTimeMS;

                        System.out.println(String.format(
                                "Obtained results for the query # %d (answered %d queries), queryID %s the search took %d ms, we asked for max %d entries got %d",
                                questNum, questQty, queryID, searchTimeMS, numRet, results.length));

                    } catch (ParseException e) {
                        e.printStackTrace();
                        System.err.println(
                                "Error parsing query: " + query + " orig question is :" + inpQuery.mQueryText);
                        System.exit(1);
                    }
                }

                boolean bSave = true;

                if (qrels != null) {
                    boolean bOk = false;
                    for (ResEntry r : results) {
                        String label = qrels.get(queryID, r.mDocId);
                        if (candProvider.isRelevLabel(label, 1)) {
                            bOk = true;
                            break;
                        }
                    }
                    if (!bOk)
                        bSave = false;
                }

                //            System.out.println(String.format("Ranking results the query # %d queryId='%s' save results? %b", 
                //                                              questNum, queryID, bSave));          
                if (bSave) {
                    saveTrecResults(queryID, results, trecOutFile, TREC_RUN, numRet);
                }
            }

            if (questNum % 1000 == 0)
                System.out.println(String.format("Proccessed %d questions", questNum));

        }

        System.out.println(String.format("Proccessed %d questions, the search took %f MS on average", questQty,
                (float) totalTimeMS / questQty));

        trecOutFile.close();

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments: " + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }
}

From source file:net.java.sen.tools.MkSenDic.java

/**
 * Build sen dictionary./*www  .j a v  a  2 s  .co  m*/
 * 
 * @param args
 *            custom dictionary files. see dic/build.xml.
 */
public static void main(String args[]) {
    ResourceBundle rb = ResourceBundle.getBundle("dictionary");
    DictionaryMaker dm1 = new DictionaryMaker();
    DictionaryMaker dm2 = new DictionaryMaker();
    DictionaryMaker dm3 = new DictionaryMaker();

    // 1st field information of connect file.
    Vector rule1 = new Vector();

    // 2nd field information of connect file.
    Vector rule2 = new Vector();

    // 3rd field information of connect file.
    Vector rule3 = new Vector();

    // 4th field information of connect file.
    // this field shows cost of morpheme connection
    // [size3*(x3*size2+x2)+x1]
    // [size3*(Attr1*size2+Attr2)+Attl]
    short score[] = new short[20131];

    long start = System.currentTimeMillis();

    // /////////////////////////////////////////
    //
    // Step1. Loading connetion file.
    //
    log.info("(1/7): reading connection matrix ... ");
    try {
        log.info("connection file = " + rb.getString("text_connection_file"));
        log.info("charset = " + rb.getString("dic.charset"));
        CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")),
                rb.getString("dic.charset"));
        String t[];
        int line = 0;
        while ((t = csvparser.nextTokens()) != null) {
            if (t.length < 4) {
                log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line);
                log.warn(rb.getString("text_connection_file") + "may be broken.");
                break;
            }
            dm1.add(t[0]);
            rule1.add(t[0]);

            dm2.add(t[1]);
            rule2.add(t[1]);

            dm3.add(t[2]);
            rule3.add(t[2]);

            if (line == score.length) {
                score = resize(score);
            }

            score[line++] = (short) Integer.parseInt(t[3]);
        }

        // /////////////////////////////////////////
        //
        // Step2. Building internal dictionary
        //
        log.info("(2/7): building type dictionary ... ");
        dm1.build();
        dm2.build();
        dm3.build();

        // if you want check specified morpheme, you uncomment and modify
        // following line:
        /*
         * System.out.print("22="); dm3.getById(22);
         * System.out.print("368="); dm3.getById(368);
         * 
         * System.out.println(dm3.getDicId("?????*,*,*,*,?"));
         * DictionaryMaker.debug = true;
         * System.out.println(dm3.getDicId("?????*,*,*,*,?"));
         * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?"));
         */

    } catch (IOException e) {
        e.printStackTrace();
        System.exit(0);
    }

    // -------------------------------------------------

    int size1 = dm1.size();
    int size2 = dm2.size();
    int size3 = dm3.size();
    int ruleSize = rule1.size();
    short matrix[] = new short[size1 * size2 * size3];
    short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost"));

    // /////////////////////////////////////////
    //
    // Step3. Writing Connection Matrix
    //
    log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = "
            + size1 * size2 * size3 + ") ...");

    for (int i = 0; i < (int) (size1 * size2 * size3); i++)
        matrix[i] = default_cost;

    for (int i = 0; i < ruleSize; i++) {
        Vector r1 = dm1.getRuleIdList((String) rule1.get(i));
        Vector r2 = dm2.getRuleIdList((String) rule2.get(i));
        Vector r3 = dm3.getRuleIdList((String) rule3.get(i));

        for (Iterator i1 = r1.iterator(); i1.hasNext();) {
            int ii1 = ((Integer) i1.next()).intValue();
            for (Iterator i2 = r2.iterator(); i2.hasNext();) {
                int ii2 = ((Integer) i2.next()).intValue();
                for (Iterator i3 = r3.iterator(); i3.hasNext();) {
                    int ii3 = ((Integer) i3.next()).intValue();
                    int pos = size3 * (size2 * ii1 + ii2) + ii3;
                    matrix[pos] = score[i];
                }
            }
        }
    }

    try {
        DataOutputStream out = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file"))));
        out.writeShort(size1);
        out.writeShort(size2);
        out.writeShort(size3);
        for (int i1 = 0; i1 < size1; i1++)
            for (int i2 = 0; i2 < size2; i2++)
                for (int i3 = 0; i3 < size3; i3++) {
                    out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]);
                    // if (matrix[size3 * (size2 * i1 + i2) + i3] !=
                    // default_cost) {
                    // }
                }
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(0);
    }

    matrix = null;
    score = null;

    // -------------------------------------------------

    int pos_start = Integer.parseInt(rb.getString("pos_start"));
    int pos_size = Integer.parseInt(rb.getString("pos_size"));

    int di = 0;
    int offset = 0;
    ArrayList dicList = new ArrayList();

    // /////////////////////////////////////////
    //
    // Step4. Reading Morpheme Information
    //
    log.info("(4/7): reading morpheme information ... ");
    String t = null;
    String[] csv = null;
    try {
        // writer for feature file.
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset")));

        log.info("load dic: " + rb.getString("text_dic_file"));
        BufferedReader dicStream = null;
        int custom_dic = -1;
        if (args.length == 0) {
            dicStream = new BufferedReader(new InputStreamReader(
                    new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset")));
        } else {
            custom_dic = 0;
            dicStream = new BufferedReader(
                    new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset")));
        }

        int line = 0;

        CSVData key_b = new CSVData();
        CSVData pos_b = new CSVData();

        while (true) {
            t = dicStream.readLine();
            if (t == null) {
                dicStream.close();
                custom_dic++;
                if (args.length == custom_dic) {
                    break;
                } else {
                    // read custum dictionary
                    log.info("load dic: " + "args[custum_dic]");
                    dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]),
                            rb.getString("dic.charset")));
                }
                continue;
            }

            CSVParser parser = new CSVParser(t);
            csv = parser.nextTokens();
            if (csv.length < (pos_size + pos_start)) {
                throw new RuntimeException("format error:" + t);
            }

            key_b.clear();
            pos_b.clear();
            for (int i = pos_start; i < (pos_start + pos_size - 1); i++) {
                key_b.append(csv[i]);
                pos_b.append(csv[i]);
            }

            key_b.append(csv[pos_start + pos_size - 1]);
            pos_b.append(csv[pos_start + pos_size - 1]);

            for (int i = pos_start + pos_size; i < (csv.length - 1); i++) {
                pos_b.append(csv[i]);
            }
            pos_b.append(csv[csv.length - 1]);

            CToken token = new CToken();

            token.rcAttr2 = (short) dm1.getDicId(key_b.toString());
            token.rcAttr1 = (short) dm2.getDicId(key_b.toString());
            token.lcAttr = (short) dm3.getDicId(key_b.toString());
            token.posid = 0;
            token.posID = offset;
            token.length = (short) csv[0].length();
            token.cost = (short) Integer.parseInt(csv[1]);

            dicList.add(new PairObject(csv[0], token));

            byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset"));
            offset += (b.length + 1);
            String pos_b_str = pos_b.toString();
            bw.write(pos_b_str, 0, pos_b_str.length());
            // bw.write(b, 0, b.length);
            bw.write(0);
            if (++di % 50000 == 0)
                log.info("" + di + "... ");
        }
        bw.close();
        // ----end of writing feature.cha ----
    } catch (Exception e) {
        log.error("Error: " + t);
        e.printStackTrace();
        System.exit(1);
    }

    rule1 = null;
    rule2 = null;
    rule3 = null;

    // /////////////////////////////////////////
    //
    // Step5. Sort lexs and write to file
    //
    log.info("(5/7): sorting lex... ");

    int value[] = new int[dicList.size()];
    char key[][] = new char[dicList.size()][];
    int spos = 0;
    int dsize = 0;
    int bsize = 0;
    String prev = "";
    Collections.sort(dicList);

    // /////////////////////////////////////////
    //
    // Step6. Writing Token Information
    //
    log.info("(6/7): writing token... ");
    try {
        // writer for token file.
        DataOutputStream out = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(rb.getString("token_file"))));

        // writing 'bos' and 'eos' and 'unknown' token.
        CToken token = new CToken();
        token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos"));
        token.write(out);

        token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos"));
        token.write(out);

        token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos"));
        token.posID = -1;
        token.write(out);
        log.info("key size = " + key.length);
        for (int i = 0; i < key.length; i++) {
            String k = (String) ((PairObject) dicList.get(i)).key;
            if (!prev.equals(k) && i != 0) {
                key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray();
                value[dsize] = bsize + (spos << 8);
                dsize++;
                bsize = 1;
                spos = i;
            } else {
                bsize++;
            }
            prev = (String) ((PairObject) dicList.get(i)).key;
            ((CToken) (((PairObject) dicList.get(i)).value)).write(out);
        }
        out.flush();
        out.close();
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray();

    value[dsize] = bsize + (spos << 8);
    dsize++;

    dm1 = null;
    dm2 = null;
    dm3 = null;
    dicList = null;

    // /////////////////////////////////////////
    //
    // Step7. Build Double Array
    //
    log.info("(7/7): building Double-Array (size = " + dsize + ") ...");

    DoubleArrayTrie da = new DoubleArrayTrie();

    da.build(key, null, value, dsize);
    try {
        da.save(rb.getString("double_array_file"));
    } catch (Exception e) {
        e.printStackTrace();
    }

    log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]");
}

From source file:TwitterClustering.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    // TODO code application logic here

    File outFile = new File(args[3]);
    Scanner s = new Scanner(new File(args[1])).useDelimiter(",");
    JSONParser parser = new JSONParser();
    Set<Cluster> clusterSet = new HashSet<Cluster>();
    HashMap<String, Tweet> tweets = new HashMap();
    FileWriter fw = new FileWriter(outFile.getAbsoluteFile());
    BufferedWriter bw = new BufferedWriter(fw);

    // init//from  w  w  w  . j a va 2 s .c o m
    try {

        Object obj = parser.parse(new FileReader(args[2]));

        JSONArray jsonArray = (JSONArray) obj;

        for (int i = 0; i < jsonArray.size(); i++) {

            Tweet twt = new Tweet();
            JSONObject jObj = (JSONObject) jsonArray.get(i);
            String text = jObj.get("text").toString();

            long sum = 0;
            for (int y = 0; y < text.toCharArray().length; y++) {

                sum += (int) text.toCharArray()[y];
            }

            String[] token = text.split(" ");
            String tID = jObj.get("id").toString();

            Set<String> mySet = new HashSet<String>(Arrays.asList(token));
            twt.setAttributeValue(sum);
            twt.setText(mySet);
            twt.setTweetID(tID);
            tweets.put(tID, twt);

        }

        // preparing initial clusters
        int i = 0;
        while (s.hasNext()) {
            String id = s.next();// id
            Tweet t = tweets.get(id.trim());
            clusterSet.add(new Cluster(i + 1, t, new LinkedList()));
            i++;
        }

        Iterator it = tweets.entrySet().iterator();

        for (int l = 0; l < 2; l++) { // limit to 25 iterations

            while (it.hasNext()) {
                Map.Entry me = (Map.Entry) it.next();

                // calculate distance to each centroid
                Tweet p = (Tweet) me.getValue();
                HashMap<Cluster, Float> distMap = new HashMap();

                for (Cluster clust : clusterSet) {

                    distMap.put(clust, jaccardDistance(p.getText(), clust.getCentroid().getText()));
                }

                HashMap<Cluster, Float> sorted = (HashMap<Cluster, Float>) sortByValue(distMap);

                sorted.keySet().iterator().next().getMembers().add(p);

            }

            // calculate new centroid and update Clusterset
            for (Cluster clust : clusterSet) {

                TreeMap<String, Long> tDistMap = new TreeMap();

                Tweet newCentroid = null;
                Long avgSumDist = new Long(0);
                for (int j = 0; j < clust.getMembers().size(); j++) {

                    avgSumDist += clust.getMembers().get(j).getAttributeValue();
                    tDistMap.put(clust.getMembers().get(j).getTweetID(),
                            clust.getMembers().get(j).getAttributeValue());
                }
                if (clust.getMembers().size() != 0) {
                    avgSumDist /= (clust.getMembers().size());
                }

                ArrayList<Long> listValues = new ArrayList<Long>(tDistMap.values());

                if (tDistMap.containsValue(findClosestNumber(listValues, avgSumDist))) {
                    // found closest
                    newCentroid = tweets
                            .get(getKeyByValue(tDistMap, findClosestNumber(listValues, avgSumDist)));
                    clust.setCentroid(newCentroid);
                }

            }

        }
        // create an iterator
        Iterator iterator = clusterSet.iterator();

        // check values
        while (iterator.hasNext()) {

            Cluster c = (Cluster) iterator.next();
            bw.write(c.getId() + "\t");
            System.out.print(c.getId() + "\t");

            for (Tweet t : c.getMembers()) {
                bw.write(t.getTweetID() + ", ");
                System.out.print(t.getTweetID() + ",");

            }
            bw.write("\n");
            System.out.println("");
        }

        System.out.println("");

        System.out.println("SSE " + sumSquaredErrror(clusterSet));

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        bw.close();
        fw.close();
    }
}

From source file:apps.Source2XML.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i", null, true, "input file");
    options.addOption("o", null, true, "output file");
    options.addOption("reparse_xml", null, false, "reparse each XML entry to ensure the parser doesn't fail");

    Joiner commaJoin = Joiner.on(',');

    options.addOption("source_type", null, true,
            "document source type: " + commaJoin.join(SourceFactory.getDocSourceList()));

    Joiner spaceJoin = Joiner.on(' ');

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    BufferedWriter outputFile = null;

    int docNum = 0;

    if (USE_LEMMATIZER && USE_STEMMER) {
        System.err.println("Bug/inconsistent code: cann't use the stemmer and lemmatizer at the same time!");
        System.exit(1);/*from w w  w . j a  v a  2 s.  c  om*/
    }

    //Stemmer stemmer = new Stemmer();
    KrovetzStemmer stemmer = new KrovetzStemmer();

    System.out.println("Using Stanford NLP?        " + USE_STANFORD);
    System.out.println("Using Stanford lemmatizer? " + USE_LEMMATIZER);
    System.out.println("Using stemmer?             " + USE_STEMMER
            + (USE_STEMMER ? " (class: " + stemmer.getClass().getCanonicalName() + ")" : ""));

    try {
        CommandLine cmd = parser.parse(options, args);

        String inputFileName = null, outputFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        if (cmd.hasOption("o")) {
            outputFileName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'output file'", options);
        }

        outputFile = new BufferedWriter(
                new OutputStreamWriter(CompressUtils.createOutputStream(outputFileName)));

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        boolean reparseXML = options.hasOption("reparse_xml");

        DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName);
        DocumentEntry inpDoc = null;
        TextCleaner textCleaner = new TextCleaner(
                new DictNoComments(new File("data/stopwords.txt"), true /* lower case */), USE_STANFORD,
                USE_LEMMATIZER);

        Map<String, String> outputMap = new HashMap<String, String>();

        outputMap.put(UtilConst.XML_FIELD_DOCNO, null);
        outputMap.put(UtilConst.XML_FIELD_TEXT, null);

        XmlHelper xmlHlp = new XmlHelper();

        if (reparseXML)
            System.out.println("Will reparse every XML entry to verify correctness!");

        while ((inpDoc = inpDocSource.next()) != null) {
            ++docNum;

            ArrayList<String> toks = textCleaner.cleanUp(inpDoc.mDocText);
            ArrayList<String> goodToks = new ArrayList<String>();
            for (String s : toks)
                if (s.length() <= MAX_WORD_LEN && // Exclude long and short words
                        s.length() >= MIN_WORD_LEN && isGoodWord(s))
                    goodToks.add(USE_STEMMER ? stemmer.stem(s) : s);

            String partlyCleanedText = spaceJoin.join(goodToks);
            String cleanText = XmlHelper.removeInvaildXMLChars(partlyCleanedText);
            // isGoodWord combiend with Stanford tokenizer should be quite restrictive already
            //cleanText = replaceSomePunct(cleanText);

            outputMap.replace(UtilConst.XML_FIELD_DOCNO, inpDoc.mDocId);
            outputMap.replace(UtilConst.XML_FIELD_TEXT, cleanText);

            String xml = xmlHlp.genXMLIndexEntry(outputMap);

            if (reparseXML) {
                try {
                    XmlHelper.parseDocWithoutXMLDecl(xml);
                } catch (Exception e) {
                    System.err.println("Error re-parsing xml for document ID: " + inpDoc.mDocId);
                    System.exit(1);
                }
            }

            /*
            {
              System.out.println(inpDoc.mDocId);
              System.out.println("=====================");
              System.out.println(partlyCleanedText);
              System.out.println("=====================");
              System.out.println(cleanText);
            } 
            */

            try {
                outputFile.write(xml);
                outputFile.write(NL);
            } catch (Exception e) {
                e.printStackTrace();
                System.err.println("Error processing/saving a document!");
            }

            if (docNum % 1000 == 0)
                System.out.println(String.format("Processed %d documents", docNum));
        }

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments" + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    } finally {
        System.out.println(String.format("Processed %d documents", docNum));

        try {
            if (null != outputFile) {
                outputFile.close();
                System.out.println("Output file is closed! all seems to be fine...");
            }
        } catch (IOException e) {
            System.err.println("IO exception: " + e);
            e.printStackTrace();
        }
    }
}

From source file:apps.LuceneIndexer.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i", null, true, "input file");
    options.addOption("o", null, true, "output directory");
    options.addOption("r", null, true, "optional output TREC-format QREL file");

    options.addOption("bm25_b", null, true, "BM25 parameter: b");
    options.addOption("bm25_k1", null, true, "BM25 parameter: k1");
    options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity");

    Joiner commaJoin = Joiner.on(',');
    Joiner spaceJoin = Joiner.on(' ');

    options.addOption("source_type", null, true,
            "document source type: " + commaJoin.join(SourceFactory.getDocSourceList()));

    // If you increase this value, you may need to modify the following line in *.sh file
    // export MAVEN_OPTS="-Xms8192m -server"
    double ramBufferSizeMB = 1024 * 8; // 8 GB

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    IndexWriter indexWriter = null;/*from   w w w  .  ja  va  2s. c o m*/
    BufferedWriter qrelWriter = null;

    int docNum = 0;

    try {
        CommandLine cmd = parser.parse(options, args);

        String inputFileName = null, outputDirName = null, qrelFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        if (cmd.hasOption("o")) {
            outputDirName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'index directory'", options);
        }

        if (cmd.hasOption("r")) {
            qrelFileName = cmd.getOptionValue("r");
        }

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        if (qrelFileName != null)
            qrelWriter = new BufferedWriter(new FileWriter(qrelFileName));

        File outputDir = new File(outputDirName);
        if (!outputDir.exists()) {
            if (!outputDir.mkdirs()) {
                System.out.println("couldn't create " + outputDir.getAbsolutePath());
                System.exit(1);
            }
        }
        if (!outputDir.isDirectory()) {
            System.out.println(outputDir.getAbsolutePath() + " is not a directory!");
            System.exit(1);
        }
        if (!outputDir.canWrite()) {
            System.out.println("Can't write to " + outputDir.getAbsolutePath());
            System.exit(1);
        }

        boolean useFixedBM25 = cmd.hasOption("bm25fixed");

        float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT;

        if (cmd.hasOption("bm25_k1")) {
            try {
                bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_k1'", options);
            }
        }

        if (cmd.hasOption("bm25_b")) {
            try {
                bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_b'", options);
            }
        }

        EnglishAnalyzer analyzer = new EnglishAnalyzer();
        FSDirectory indexDir = FSDirectory.open(Paths.get(outputDirName));
        IndexWriterConfig indexConf = new IndexWriterConfig(analyzer);

        /*
            OpenMode.CREATE creates a new index or overwrites an existing one.
            https://lucene.apache.org/core/6_0_0/core/org/apache/lucene/index/IndexWriterConfig.OpenMode.html#CREATE
        */
        indexConf.setOpenMode(OpenMode.CREATE);
        indexConf.setRAMBufferSizeMB(ramBufferSizeMB);

        System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b));

        if (useFixedBM25) {
            System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25SimilarityFix(bm25_k1, bm25_b));
        } else {
            System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25Similarity(bm25_k1, bm25_b));
        }

        indexWriter = new IndexWriter(indexDir, indexConf);

        DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName);
        DocumentEntry inpDoc = null;
        TextCleaner textCleaner = new TextCleaner(null);

        while ((inpDoc = inpDocSource.next()) != null) {
            ++docNum;

            Document luceneDoc = new Document();
            ArrayList<String> cleanedToks = textCleaner.cleanUp(inpDoc.mDocText);
            String cleanText = spaceJoin.join(cleanedToks);

            //        System.out.println(inpDoc.mDocId);
            //        System.out.println(cleanText);
            //        System.out.println("==============================");

            luceneDoc.add(new StringField(UtilConst.FIELD_ID, inpDoc.mDocId, Field.Store.YES));
            luceneDoc.add(new TextField(UtilConst.FIELD_TEXT, cleanText, Field.Store.YES));
            indexWriter.addDocument(luceneDoc);

            if (inpDoc.mIsRel != null && qrelWriter != null) {
                saveQrelOneEntry(qrelWriter, inpDoc.mQueryId, inpDoc.mDocId, inpDoc.mIsRel ? MAX_GRADE : 0);
            }
            if (docNum % 1000 == 0)
                System.out.println(String.format("Indexed %d documents", docNum));

        }

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments" + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    } finally {
        System.out.println(String.format("Indexed %d documents", docNum));

        try {
            if (null != indexWriter)
                indexWriter.close();
            if (null != qrelWriter)
                qrelWriter.close();
        } catch (IOException e) {
            System.err.println("IO exception: " + e);
            e.printStackTrace();
        }
    }
}

From source file:diffhunter.DiffHunter.java

/**
 * @param args the command line arguments
 * @throws org.apache.commons.cli.ParseException
 * @throws java.io.IOException/*from   ww  w .ja  v a  2  s  . co  m*/
 */
public static void main(String[] args) throws ParseException, IOException {

    //String test_ = Paths.get("J:\\VishalData\\additional\\", "Sasan" + "_BDB").toAbsolutePath().toString();

    // TODO code application logic here
    /*args = new String[]
    {
    "-i", "-b", "J:\\VishalData\\additional\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted.bed", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-o", "J:\\VishalData"
    };*/

    /*args = new String[]
    {
    "-c", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-1", "J:\\VishalData\\Ptbp2_Adult_testis_CLIP_mm9_plus_strand_sorted_BDB", "-2", "J:\\VishalData\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted_BDB", "-w", "200", "-s", "50", "-o", "J:\\VishalData"
    };*/
    Options options = new Options();

    // add t option
    options.addOption("i", "index", false, "Indexing BED files.");
    options.addOption("b", "bed", true, "bed file to be indexed");
    options.addOption("o", "output", true, "Folder that the index/comparison file will be created.");
    options.addOption("r", "reference", true, "Reference annotation file to be used for indexing");
    options.addOption("c", "compare", false, "Finding differences between two conditions");
    options.addOption("1", "first", true, "First sample index location");
    options.addOption("2", "second", true, "Second sample index location");
    options.addOption("w", "window", true, "Length of window for identifying differences");
    options.addOption("s", "sliding", true, "Length of sliding");

    CommandLineParser parser = new BasicParser();
    CommandLine cmd = parser.parse(options, args);

    boolean indexing = false;
    boolean comparing = false;

    //Indexing!
    if (cmd.hasOption("i")) {
        //if(cmd.hasOption("1"))
        //System.err.println("sasan");

        //System.out.println("sasa");
        indexing = true;

    } else if (cmd.hasOption("c")) {
        //System.err.println("");
        comparing = true;

    } else {
        //System.err.println("Option is not deteced.");
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("diffhunter", options);
        return;
    }

    //Indexing is selected
    //
    if (indexing == true) {
        //Since indexing is true.
        //User have to provide file for indexing.
        if (!(cmd.hasOption("o") || cmd.hasOption("r") || cmd.hasOption("b"))) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("diffhunter", options);
            return;
        }
        String bedfile_ = cmd.getOptionValue("b");
        String reference_file = cmd.getOptionValue("r");
        String folder_loc = cmd.getOptionValue("o");

        String sample_name = FilenameUtils.getBaseName(bedfile_);

        try (Database B2 = BerkeleyDB_Box.Get_BerkeleyDB(
                Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString(), true, sample_name)) {
            Indexer indexing_ = new Indexer(reference_file);
            indexing_.Make_Index(B2, bedfile_,
                    Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString());
            B2.close();

        }
    } else if (comparing == true) {
        if (!(cmd.hasOption("o") || cmd.hasOption("w") || cmd.hasOption("s") || cmd.hasOption("1")
                || cmd.hasOption("2"))) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("diffhunter", options);
            return;
        }
        String folder_loc = cmd.getOptionValue("o");
        int window_ = Integer.parseInt(cmd.getOptionValue("w"));
        //int window_=600;

        int slide_ = Integer.parseInt(cmd.getOptionValue("s"));

        String first = cmd.getOptionValue("1").replace("_BDB", "");
        String second = cmd.getOptionValue("2").replace("_BDB", "");
        String reference_file = cmd.getOptionValue("r");
        //String folder_loc=cmd.getOptionValue("o");

        String sample_name_first = FilenameUtils.getBaseName(first);
        String sample_name_second = FilenameUtils.getBaseName(second);

        Database B1 = BerkeleyDB_Box.Get_BerkeleyDB(first + "_BDB", false, sample_name_first);
        Database B2 = BerkeleyDB_Box.Get_BerkeleyDB(second + "_BDB", false, sample_name_second);

        List<String> first_condition_genes = Files
                .lines(Paths.get(first + "_BDB", sample_name_first + ".txt").toAbsolutePath())
                .collect(Collectors.toList());
        List<String> second_condition_genes = Files
                .lines(Paths.get(second + "_BDB", sample_name_second + ".txt").toAbsolutePath())
                .collect(Collectors.toList());
        System.out.println("First and second condition are loaded!!! ");
        List<String> intersection_ = new ArrayList<>(first_condition_genes);
        intersection_.retainAll(second_condition_genes);

        BufferedWriter output = new BufferedWriter(
                new FileWriter(Paths.get(folder_loc, "differences_" + window_ + "_s" + slide_ + "_c" + ".txt")
                        .toAbsolutePath().toString(), false));
        List<Result_Window> final_results = Collections.synchronizedList(new ArrayList<>());
        Worker_New worker_class = new Worker_New();
        worker_class.Read_Reference(reference_file);

        while (!intersection_.isEmpty()) {
            List<String> selected_genes = new ArrayList<>();
            //if (intersection_.size()<=10000){selected_genes.addAll(intersection_.subList(0, intersection_.size()));}
            //else selected_genes.addAll(intersection_.subList(0, 10000));
            if (intersection_.size() <= intersection_.size()) {
                selected_genes.addAll(intersection_.subList(0, intersection_.size()));
            } else {
                selected_genes.addAll(intersection_.subList(0, intersection_.size()));
            }
            intersection_.removeAll(selected_genes);
            //System.out.println("Intersection count is:"+intersection_.size());
            //final List<Result_Window> resultssss_=new ArrayList<>();
            IntStream.range(0, selected_genes.size()).parallel().forEach(i -> {
                System.out.println(selected_genes.get(i) + "\tprocessing......");
                String gene_of_interest = selected_genes.get(i);//"ENSG00000142657|PGD";//intersection_.get(6);////"ENSG00000163395|IGFN1";//"ENSG00000270066|SCARNA2";
                int start = worker_class.dic_genes.get(gene_of_interest).start_loc;
                int end = worker_class.dic_genes.get(gene_of_interest).end_loc;

                Map<Integer, Integer> first_ = Collections.EMPTY_MAP;
                try {
                    first_ = BerkeleyDB_Box.Get_Coord_Read(B1, gene_of_interest);
                } catch (IOException | ClassNotFoundException ex) {
                    Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex);
                }

                Map<Integer, Integer> second_ = Collections.EMPTY_MAP;
                try {
                    second_ = BerkeleyDB_Box.Get_Coord_Read(B2, gene_of_interest);
                } catch (IOException | ClassNotFoundException ex) {
                    Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex);
                }
                List<Window> top_windows_first = worker_class.Get_Top_Windows(window_, first_, slide_);
                List<Window> top_windows_second = worker_class.Get_Top_Windows(window_, second_, slide_);
                //System.out.println("passed for window peak call for gene \t"+selected_genes.get(i));
                // System.out.println("top_window_first_Count\t"+top_windows_first.size());
                // System.out.println("top_window_second_Count\t"+top_windows_second.size());
                if (top_windows_first.isEmpty() && top_windows_second.isEmpty()) {
                    return;
                }

                List<Result_Window> res_temp = new Worker_New().Get_Significant_Windows(gene_of_interest, start,
                        end, top_windows_first, top_windows_second, second_, first_, sample_name_first,
                        sample_name_second, 0.01);
                if (!res_temp.isEmpty()) {
                    final_results.addAll(res_temp);//final_results.addAll(worker_class.Get_Significant_Windows(gene_of_interest, start, end, top_windows_first, top_windows_second, second_, first_, first_condition, second_condition, 0.01));

                } //System.out.println(selected_genes.get(i)+"\tprocessed.");

            });

            /*selected_genes.parallelStream().forEach(i ->
             {
                    
                    
             });*/
            List<Double> pvals = new ArrayList<>();

            for (int i = 0; i < final_results.size(); i++) {
                pvals.add(final_results.get(i).p_value);
            }
            List<Double> qvals = MultipleTestCorrection.benjaminiHochberg(pvals);

            System.out.println("Writing to file...");
            output.append("Gene_Symbol\tContributing_Sample\tStart\tEnd\tOddsRatio\tp_Value\tFDR");
            output.newLine();

            for (int i = 0; i < final_results.size(); i++) {
                Result_Window item = final_results.get(i);
                output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t"
                        + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value
                        + "\t" + qvals.get(i)); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non);
                output.newLine();
            }

            /* for (Result_Window item : final_results)
             {
            output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t" + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non);
            output.newLine();
             }
               */
            final_results.clear();

        }
        output.close();

    }
    System.out.println("Done.");

}

From source file:com.cws.esolutions.security.main.PasswordUtility.java

public static void main(final String[] args) {
    final String methodName = PasswordUtility.CNAME + "#main(final String[] args)";

    if (DEBUG) {// www . j  a  v a  2 s. c  o  m
        DEBUGGER.debug("Value: {}", methodName);
    }

    if (args.length == 0) {
        HelpFormatter usage = new HelpFormatter();
        usage.printHelp(PasswordUtility.CNAME, options, true);

        System.exit(1);
    }

    BufferedReader bReader = null;
    BufferedWriter bWriter = null;

    try {
        // load service config first !!
        SecurityServiceInitializer.initializeService(PasswordUtility.SEC_CONFIG, PasswordUtility.LOG_CONFIG,
                false);

        if (DEBUG) {
            DEBUGGER.debug("Options options: {}", options);

            for (String arg : args) {
                DEBUGGER.debug("Value: {}", arg);
            }
        }

        CommandLineParser parser = new PosixParser();
        CommandLine commandLine = parser.parse(options, args);

        if (DEBUG) {
            DEBUGGER.debug("CommandLineParser parser: {}", parser);
            DEBUGGER.debug("CommandLine commandLine: {}", commandLine);
            DEBUGGER.debug("CommandLine commandLine.getOptions(): {}", (Object[]) commandLine.getOptions());
            DEBUGGER.debug("CommandLine commandLine.getArgList(): {}", commandLine.getArgList());
        }

        final SecurityConfigurationData secConfigData = PasswordUtility.svcBean.getConfigData();
        final SecurityConfig secConfig = secConfigData.getSecurityConfig();
        final PasswordRepositoryConfig repoConfig = secConfigData.getPasswordRepo();
        final SystemConfig systemConfig = secConfigData.getSystemConfig();

        if (DEBUG) {
            DEBUGGER.debug("SecurityConfigurationData secConfig: {}", secConfigData);
            DEBUGGER.debug("SecurityConfig secConfig: {}", secConfig);
            DEBUGGER.debug("RepositoryConfig secConfig: {}", repoConfig);
            DEBUGGER.debug("SystemConfig systemConfig: {}", systemConfig);
        }

        if (commandLine.hasOption("encrypt")) {
            if ((StringUtils.isBlank(repoConfig.getPasswordFile()))
                    || (StringUtils.isBlank(repoConfig.getSaltFile()))) {
                System.err.println("The password/salt files are not configured. Entries will not be stored!");
            }

            File passwordFile = FileUtils.getFile(repoConfig.getPasswordFile());
            File saltFile = FileUtils.getFile(repoConfig.getSaltFile());

            if (DEBUG) {
                DEBUGGER.debug("File passwordFile: {}", passwordFile);
                DEBUGGER.debug("File saltFile: {}", saltFile);
            }

            final String entryName = commandLine.getOptionValue("entry");
            final String username = commandLine.getOptionValue("username");
            final String password = commandLine.getOptionValue("password");
            final String salt = RandomStringUtils.randomAlphanumeric(secConfig.getSaltLength());

            if (DEBUG) {
                DEBUGGER.debug("String entryName: {}", entryName);
                DEBUGGER.debug("String username: {}", username);
                DEBUGGER.debug("String password: {}", password);
                DEBUGGER.debug("String salt: {}", salt);
            }

            final String encodedSalt = PasswordUtils.base64Encode(salt);
            final String encodedUserName = PasswordUtils.base64Encode(username);
            final String encryptedPassword = PasswordUtils.encryptText(password, salt,
                    secConfig.getSecretAlgorithm(), secConfig.getIterations(), secConfig.getKeyBits(),
                    secConfig.getEncryptionAlgorithm(), secConfig.getEncryptionInstance(),
                    systemConfig.getEncoding());
            final String encodedPassword = PasswordUtils.base64Encode(encryptedPassword);

            if (DEBUG) {
                DEBUGGER.debug("String encodedSalt: {}", encodedSalt);
                DEBUGGER.debug("String encodedUserName: {}", encodedUserName);
                DEBUGGER.debug("String encodedPassword: {}", encodedPassword);
            }

            if (commandLine.hasOption("store")) {
                try {
                    new File(passwordFile.getParent()).mkdirs();
                    new File(saltFile.getParent()).mkdirs();

                    boolean saltFileExists = (saltFile.exists()) ? true : saltFile.createNewFile();

                    if (DEBUG) {
                        DEBUGGER.debug("saltFileExists: {}", saltFileExists);
                    }

                    // write the salt out first
                    if (!(saltFileExists)) {
                        throw new IOException("Unable to create salt file");
                    }

                    boolean passwordFileExists = (passwordFile.exists()) ? true : passwordFile.createNewFile();

                    if (!(passwordFileExists)) {
                        throw new IOException("Unable to create password file");
                    }

                    if (commandLine.hasOption("replace")) {
                        File[] files = new File[] { saltFile, passwordFile };

                        if (DEBUG) {
                            DEBUGGER.debug("File[] files: {}", (Object) files);
                        }

                        for (File file : files) {
                            if (DEBUG) {
                                DEBUGGER.debug("File: {}", file);
                            }

                            String currentLine = null;
                            File tmpFile = new File(FileUtils.getTempDirectory() + "/" + "tmpFile");

                            if (DEBUG) {
                                DEBUGGER.debug("File tmpFile: {}", tmpFile);
                            }

                            bReader = new BufferedReader(new FileReader(file));
                            bWriter = new BufferedWriter(new FileWriter(tmpFile));

                            while ((currentLine = bReader.readLine()) != null) {
                                if (!(StringUtils.equals(currentLine.trim().split(",")[0], entryName))) {
                                    bWriter.write(currentLine + System.getProperty("line.separator"));
                                    bWriter.flush();
                                }
                            }

                            bWriter.close();

                            FileUtils.deleteQuietly(file);
                            FileUtils.copyFile(tmpFile, file);
                            FileUtils.deleteQuietly(tmpFile);
                        }
                    }

                    FileUtils.writeStringToFile(saltFile, entryName + "," + encodedUserName + "," + encodedSalt
                            + System.getProperty("line.separator"), true);
                    FileUtils.writeStringToFile(passwordFile, entryName + "," + encodedUserName + ","
                            + encodedPassword + System.getProperty("line.separator"), true);
                } catch (IOException iox) {
                    ERROR_RECORDER.error(iox.getMessage(), iox);
                }
            }

            System.out.println("Entry Name " + entryName + " stored.");
        }

        if (commandLine.hasOption("decrypt")) {
            String saltEntryName = null;
            String saltEntryValue = null;
            String decryptedPassword = null;
            String passwordEntryName = null;

            if ((StringUtils.isEmpty(commandLine.getOptionValue("entry"))
                    && (StringUtils.isEmpty(commandLine.getOptionValue("username"))))) {
                throw new ParseException("No entry or username was provided to decrypt.");
            }

            if (StringUtils.isEmpty(commandLine.getOptionValue("username"))) {
                throw new ParseException("no entry provided to decrypt");
            }

            String entryName = commandLine.getOptionValue("entry");
            String username = commandLine.getOptionValue("username");

            if (DEBUG) {
                DEBUGGER.debug("String entryName: {}", entryName);
                DEBUGGER.debug("String username: {}", username);
            }

            File passwordFile = FileUtils.getFile(repoConfig.getPasswordFile());
            File saltFile = FileUtils.getFile(repoConfig.getSaltFile());

            if (DEBUG) {
                DEBUGGER.debug("File passwordFile: {}", passwordFile);
                DEBUGGER.debug("File saltFile: {}", saltFile);
            }

            if ((!(saltFile.canRead())) || (!(passwordFile.canRead()))) {
                throw new IOException(
                        "Unable to read configured password/salt file. Please check configuration and/or permissions.");
            }

            for (String lineEntry : FileUtils.readLines(saltFile, systemConfig.getEncoding())) {
                saltEntryName = lineEntry.split(",")[0];

                if (DEBUG) {
                    DEBUGGER.debug("String saltEntryName: {}", saltEntryName);
                }

                if (StringUtils.equals(saltEntryName, entryName)) {
                    saltEntryValue = PasswordUtils.base64Decode(lineEntry.split(",")[2]);

                    break;
                }
            }

            if (StringUtils.isEmpty(saltEntryValue)) {
                throw new SecurityException("No entries were found that matched the provided information");
            }

            for (String lineEntry : FileUtils.readLines(passwordFile, systemConfig.getEncoding())) {
                passwordEntryName = lineEntry.split(",")[0];

                if (DEBUG) {
                    DEBUGGER.debug("String passwordEntryName: {}", passwordEntryName);
                }

                if (StringUtils.equals(passwordEntryName, saltEntryName)) {
                    String decodedPassword = PasswordUtils.base64Decode(lineEntry.split(",")[2]);

                    decryptedPassword = PasswordUtils.decryptText(decodedPassword, saltEntryValue,
                            secConfig.getSecretAlgorithm(), secConfig.getIterations(), secConfig.getKeyBits(),
                            secConfig.getEncryptionAlgorithm(), secConfig.getEncryptionInstance(),
                            systemConfig.getEncoding());

                    break;
                }
            }

            if (StringUtils.isEmpty(decryptedPassword)) {
                throw new SecurityException("No entries were found that matched the provided information");
            }

            System.out.println(decryptedPassword);
        } else if (commandLine.hasOption("encode")) {
            System.out.println(PasswordUtils.base64Encode((String) commandLine.getArgList().get(0)));
        } else if (commandLine.hasOption("decode")) {
            System.out.println(PasswordUtils.base64Decode((String) commandLine.getArgList().get(0)));
        }
    } catch (IOException iox) {
        ERROR_RECORDER.error(iox.getMessage(), iox);

        System.err.println("An error occurred during processing: " + iox.getMessage());
        System.exit(1);
    } catch (ParseException px) {
        ERROR_RECORDER.error(px.getMessage(), px);

        System.err.println("An error occurred during processing: " + px.getMessage());
        System.exit(1);
    } catch (SecurityException sx) {
        ERROR_RECORDER.error(sx.getMessage(), sx);

        System.err.println("An error occurred during processing: " + sx.getMessage());
        System.exit(1);
    } catch (SecurityServiceException ssx) {
        ERROR_RECORDER.error(ssx.getMessage(), ssx);
        System.exit(1);
    } finally {
        try {
            if (bReader != null) {
                bReader.close();
            }

            if (bWriter != null) {
                bReader.close();
            }
        } catch (IOException iox) {
        }
    }

    System.exit(0);
}