Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:IR.LuceneModel.java

public static void main(String[] args) throws IOException {
    System.out.println(/*from w  w w.j  a va  2  s  .  co m*/
            "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)");

    String indexLocation = null;
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String s = br.readLine();

    LuceneModel indexer = null;
    try {
        indexLocation = s;
        indexer = new LuceneModel(s);
    } catch (Exception ex) {
        System.out.println("Cannot create index..." + ex.getMessage());
        System.exit(-1);
    }

    // ===================================================
    // read input from user until he enters q for quit
    // ===================================================
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println(
                    "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)");
            System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            // try to add file into the index
            indexer.indexFileOrDirectory(s);
        } catch (Exception e) {
            System.out.println("Error indexing " + s + " : " + e.getMessage());
        }
    }

    // ===================================================
    // after adding, we always have to call the
    // closeIndex, otherwise the index is not created
    // ===================================================
    indexer.closeIndex();

    // =========================================================
    // Now search
    // =========================================================
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector;//= TopScoreDocCollector.create(100, true);
    s = "";
    ScoreDoc[] hits;
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println("Enter the search query (q=quit):");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            File queryFile = new File(s);
            BufferedReader r = new BufferedReader(new FileReader(queryFile));

            String query;//= r.readLine();
            int count = 0;
            String q1 = "LuceneResults.txt";

            File luceneFile = new File(q1);
            luceneFile.createNewFile();
            FileWriter writer = new FileWriter(luceneFile);

            while ((query = r.readLine()) != null) {
                try {
                    count++;
                    collector = TopScoreDocCollector.create(100, true);
                    QueryParser parser = new QueryParser(Version.LUCENE_47, "contents", analyzer);
                    Query q = parser.parse(query.replace('/', ' '));
                    searcher.search(q, collector);

                    hits = collector.topDocs().scoreDocs;

                    int query_id;
                    query_id = count; // change this for new query 

                    System.out.println("Found " + hits.length + " hits.");

                    for (int i = 0; i < hits.length; ++i) {
                        int docId = hits[i].doc;
                        Document d = searcher.doc(docId);
                        System.out.println(query_id + ". " + d.get("path").replaceAll(".html", "") + " "
                                + (i + 1) + " " + hits[i].score + " LuceneModel");
                        writer.write(String
                                .format(query_id + " " + "Q0" + " " + d.get("path").replaceAll(".html", "")
                                        + " " + (i + 1) + " " + hits[i].score + " LuceneModel\n"));
                        writer.flush();
                        //                    System.out.println(fmt.format(""+query_id,"Q0",""+d.get("path"),""+(i + 1),""+hits[i].score));

                    }

                } catch (Exception e) {
                    //            System.out.println(e.printStackTrace());
                    e.printStackTrace();
                    continue;
                }
                // 5. term stats --> watch out for which "version" of the term
                // must be checked here instead!
                Term termInstance = new Term("contents", s);
                long termFreq = reader.totalTermFreq(termInstance);
                long docCount = reader.docFreq(termInstance);
                System.out.println(s + " Term Frequency " + termFreq + " - Document Frequency " + docCount);
                //              r.close();
            }
            r.close();
            writer.close();
        } catch (Exception e) {
            System.out.println("Error searching " + s + " : " + e.getMessage());
            break;
        }

    }

}

From source file:it.cnr.isti.hpc.dexter.lucene.LuceneHelper.java

License:Apache License

/**
 * @param query/*from   ww w.j  a v  a2s. c o  m*/
 *            - a query
 * @param field
 *            - the field where to search the query
 * @return number of documents containing the text in query in the given
 *         fields
 */
public int getFreq(String query, String field) {
    Query q = null;
    searcher = getSearcher();
    TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);

    // try {

    Text t = new Text(query).disableStopwords();
    PhraseQuery pq = new PhraseQuery();
    int i = 0;
    for (String term : t.getTerms()) {
        pq.add(new Term(field, term), i++);
    }
    q = pq;
    logger.debug(q.toString());
    // } catch (ParseException e) {
    // logger.error("querying the index: {} ", e.toString());
    // return -1;
    // }
    try {
        searcher.search(q, collector);
    } catch (IOException e) {
        logger.error("querying the index: {} ", e.toString());
        return -1;
    }
    return collector.getTotalHits();
}

From source file:it.cnr.isti.hpc.dexter.lucene.LuceneHelper.java

License:Apache License

/**
 * @param query/*from   w  w  w. jav a 2s . co m*/
 *            entities containing the text of the query as a phrase (terms
 *            consecutive) will be be returned.
 * @param field
 *            the field where the query must be performed (summary, content,
 *            title ..).
 * @param n
 *            the max number of results to produce.
 * @return the top wiki-id matching the query
 */
public List<Integer> query(String query, String field, int n) {
    searcher = getSearcher();
    TopScoreDocCollector collector = TopScoreDocCollector.create(n, true);
    List<Integer> results = new ArrayList<Integer>();
    Query q = null;

    try {
        q = new QueryParser(Version.LUCENE_41, field, new StandardAnalyzer(Version.LUCENE_41))
                .parse("\"" + query + "\"");
    } catch (ParseException e) {
        logger.error("querying the index: {} ", e.toString());
        return results;
    }

    try {
        searcher.search(q, collector);
    } catch (IOException e) {
        logger.error("querying the index: {} ", e.toString());
        return results;
    }

    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        results.add(getWikiId(docId));
    }

    logger.debug("query {} docs {}", query, results);
    return results;
}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndex(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery andQuery = new BooleanQuery();
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }/*from  ww  w  . j a  v  a 2 s .  co  m*/
    Query query = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer).parse(queryString);
    andQuery.add(query, BooleanClause.Occur.MUST);
    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);
    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);

    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    //setsback to action
    objectsToReturn.put("hits", hits);

    //highlighter
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));
    if (hits != null) {
        logger.debug("hits size: " + hits.length);
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);

            String[] subobjNames = doc.getValues(IndexingConstants.SUBOBJ_NAME);
            if (subobjNames != null && subobjNames.length != 0) {
                String views = "";
                for (int k = 0; k < subobjNames.length; k++) {
                    views += subobjNames[k] + " ";
                }
                objectsToReturn.put(biobjId + "-views", views);
            }
            String summary = "";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);

                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);
                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (NumberFormatException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }
            }
        }
    }
    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndexFuzzy(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery orQuery = new BooleanQuery();
    BooleanQuery andQuery = new BooleanQuery();
    for (int i = 0; i < fields.length; i++) {
        Query query = new FuzzyQuery(new Term(fields[i], queryString));
        query = query.rewrite(searcher.getIndexReader());
        orQuery.add(query, BooleanClause.Occur.SHOULD);
    }//from  www  . j av a  2s .  co  m
    andQuery.add(orQuery, BooleanClause.Occur.MUST);
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }

    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);

    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    objectsToReturn.put("hits", hits);

    //highlighter
    //orQuery = orQuery.rewrite(searcher.getIndexReader());
    //andQuery = andQuery.rewrite(searcher.getIndexReader());
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));

    if (hits != null) {
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);
            String summary = " ";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);
                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);

                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }

            }
        }
    }

    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}

From source file:lucene.searchengine.LuceneSearchEngine.java

public static void main(String[] args) throws IOException {
    System.out.println(//from   w  w w  .j  av  a  2s .c  o m
            "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)");

    String indexLocation = null;
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String s = br.readLine();

    LuceneSearchEngine indexer = null;
    try {
        indexLocation = s;
        indexer = new LuceneSearchEngine(s);
    } catch (Exception ex) {
        System.out.println("Cannot create index..." + ex.getMessage());
        System.exit(-1);
    }

    // ===================================================
    // read input from user until he enters q for quit
    // ===================================================
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println(
                    "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)");
            System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            // try to add file into the index
            indexer.indexFileOrDirectory(s);
        } catch (Exception e) {
            System.out.println("Error indexing " + s + " : " + e.getMessage());
        }
    }

    // ===================================================
    // after adding, we always have to call the
    // closeIndex, otherwise the index is not created
    // ===================================================
    indexer.closeIndex();

    // =========================================================
    // Now search
    // =========================================================
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));

    //===========================================================
    //  GET Term frequency
    //===========================================================
    // Creating a output file to store the term,term_frequency pairs.
    PrintWriter tfwriter = new PrintWriter("..\\term-frequency.csv");

    Fields fields = MultiFields.getFields(reader);
    HashMap<String, Long> tfmap = new HashMap<String, Long>();
    Terms terms = fields.terms("contents");
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef bref = null;
    while ((bref = termsEnum.next()) != null) {
        String term_name = new String(bref.bytes, bref.offset, bref.length);
        Term term_instance = new Term("contents", term_name);
        long termFrequency = reader.totalTermFreq(term_instance);
        tfmap.put(term_name, termFrequency);
    }
    System.out.println(tfmap.size());
    for (String key : tfmap.keySet()) {
        tfwriter.write(key + "," + tfmap.get(key));
        tfwriter.write("\n");
    }
    tfwriter.close();
    //====================================================================
    // Code END to fetch term frequency
    //====================================================================
    IndexSearcher searcher = new IndexSearcher(reader);
    s = "";
    while (!s.equalsIgnoreCase("q")) {
        TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
        try {
            System.out.println("Enter the search query (q=quit):");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(s);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            // 4. display results
            System.out.println("Found " + hits.length + " hits.");
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                System.out.println((i + 1) + ". " + d.get("filename") + " score=" + hits[i].score);
            }
            // 5. term stats --> watch out for which "version" of the term
            // must be checked here instead!
            Term termInstance = new Term("contents", s);
            long termFreq = reader.totalTermFreq(termInstance);
            long docCount = reader.docFreq(termInstance);
            System.out.println(s + " Term Frequency " + termFreq + " - Document Frequency " + docCount);
        } catch (Exception e) {
            System.out.println("Error searching " + s + " : " + e.getMessage());
            break;
        }
    }
}

From source file:lucenesearche.HW3.java

public static void main(String[] args) throws IOException {
    System.out.println(/* ww w .  j  a v a  2s.  com*/
            "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)");

    String indexLocation = null;
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String s = br.readLine();

    HW3 indexer = null;
    try {
        indexLocation = s;
        indexer = new HW3(s);
    } catch (Exception ex) {
        System.out.println("Cannot create index..." + ex.getMessage());
        System.exit(-1);
    }
    String query1, query2, query3, query4;
    query1 = "Lucene_Results_Stopped.txt";
    query2 = "Lucene_Q2_top100.txt";
    query3 = "Lucene_Q3_top100.txt";
    query4 = "Lucene_Q4_top100.txt";

    File luceneFile = new File(query1); // change filename for each query
    int query_id;

    // ===================================================
    // read input from user until he enters q for quit
    // ===================================================
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println(
                    "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)");
            System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            // try to add file into the index
            indexer.indexFileOrDirectory(s);
        } catch (Exception e) {
            System.out.println("Error indexing " + s + " : " + e.getMessage());
        }
    }

    // ===================================================
    // after adding, we always have to call the
    // closeIndex, otherwise the index is not created
    // ===================================================
    indexer.closeIndex();

    // =========================================================
    // Now search
    // =========================================================
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    IndexSearcher searcher = new IndexSearcher(reader);
    //TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
    Formatter f = new Formatter();

    s = "";
    File file1 = new File(
            "C:\\Users\\shantanu\\Downloads\\NetBeansProjects\\LuceneSearchE\\src\\lucenesearche\\query_stopped.txt");
    ScoreDoc[] hits;
    try {
        BufferedReader b = new BufferedReader(new FileReader(file1));
        query_id = 1;
        FileInputStream fis = new FileInputStream(
                "C:\\Users\\shantanu\\Downloads\\NetBeansProjects\\LuceneSearchE\\src\\lucenesearche\\query_stopped.txt");
        Scanner scanner = new Scanner(fis);

        luceneFile.createNewFile();
        FileWriter writer = new FileWriter(luceneFile);

        while (scanner.hasNextLine()) {
            String line;

            //line = b.readLine();
            line = scanner.nextLine();
            if (line == null)
                break;

            System.out.println(b.readLine());
            //s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }
            TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
            Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(line);
            searcher.search(q, collector);
            //System.out.println(searcher);

            hits = collector.topDocs().scoreDocs;

            System.out.println(hits.length);

            // 4. display results
            // change this for new query 
            //writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s","Query ID","Q0","Document Name","Rank","Cosine Similarity Score","System Name\n"));
            System.out.println("Found " + hits.length + " hits.");
            //System.out.println(f.format("%-10s %-10s %-80s %-10s %-40s %-20s","Query ID","Q0","Document Name","Rank","Cosine Similarity Score","System Name"));
            for (int i = 0; i < hits.length; ++i) {
                Formatter fmt = new Formatter();
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                //System.out.println(d.get("filename"));
                //System.out.println((i+1) +". " + d.get("path")+" "+ hits[i].score);
                String a = d.get("filename");
                String parts = a.substring(0, a.indexOf('.'));
                //System.out.println(parts);

                writer.append(String.format("%-10s %-10s %-30s %-10s %-30s", query_id, "Q0", parts, (i + 1),
                        hits[i].score));
                writer.append('\n');
                writer.flush();
                //System.out.println(fmt.format("%-10s %-10s %-80s %-10s %-40s %-20s",""+query_id,"Q0",""+d.get("path"),""+(i + 1),""+hits[i].score,"Shantanu-SYS-001"));
            }

            // 5. term stats --> watch out for which "version" of the term
            // must be checked here instead!
            /*Term termInstance = new Term("contents", s);
            long termFreq = reader.totalTermFreq(termInstance);
            long docCount = reader.docFreq(termInstance);
            System.out.println(s + " Term Frequency " + termFreq
               + " - Document Frequency " + docCount);*/
            query_id += 1;
        }
        writer.close();
    } catch (Exception e) {
        System.out.println("Error searching " + s + " : " + e.toString());
        //break;
    }

}

From source file:lucenesearche.HW3.java

public static void main(String[] args) throws IOException {
    System.out.println(/*from ww  w.  j  ava  2  s  .c  om*/
            "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)");

    String indexLocation = null;
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String s = br.readLine();

    HW3 indexer = null;
    try {
        indexLocation = s;
        indexer = new HW3(s);
    } catch (Exception ex) {
        System.out.println("Cannot create index..." + ex.getMessage());
        System.exit(-1);
    }

    // ===================================================
    // read input from user until he enters q for quit
    // ===================================================
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println(
                    "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)");
            System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            // try to add file into the index
            indexer.indexFileOrDirectory(s);
        } catch (Exception e) {
            System.out.println("Error indexing " + s + " : " + e.getMessage());
        }
    }

    // ===================================================
    // after adding, we always have to call the
    // closeIndex, otherwise the index is not created
    // ===================================================
    indexer.closeIndex();

    // =========================================================
    // Now search
    // =========================================================
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
    Formatter f = new Formatter();

    s = "";
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println("Enter the search query (q=quit):");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(s);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            // 4. display results
            String query1, query2, query3, query4;
            query1 = "Lucene_Q1_top100.txt";
            query2 = "Lucene_Q2_top100.txt";
            query3 = "Lucene_Q3_top100.txt";
            query4 = "Lucene_Q4_top100.txt";
            File luceneFile = new File(query4); // change filename for each query
            int query_id;
            query_id = 4; // change this for new query 
            luceneFile.createNewFile();
            FileWriter writer = new FileWriter(luceneFile);
            writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s", "Query ID", "Q0", "Document Name",
                    "Rank", "Cosine Similarity Score", "System Name\n"));
            System.out.println("Found " + hits.length + " hits.");
            System.out.println(f.format("%-10s %-10s %-80s %-10s %-40s %-20s", "Query ID", "Q0",
                    "Document Name", "Rank", "Cosine Similarity Score", "System Name"));
            for (int i = 0; i < hits.length; ++i) {
                Formatter fmt = new Formatter();
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                //System.out.println((i+1) +". " + d.get("path")+" "+ hits[i].score);
                writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s", "" + query_id, "Q0",
                        "" + d.get("path"), "" + (i + 1), "" + hits[i].score, "Shantanu-SYS-001\n"));
                writer.flush();
                System.out.println(fmt.format("%-10s %-10s %-80s %-10s %-40s %-20s", "" + query_id, "Q0",
                        "" + d.get("path"), "" + (i + 1), "" + hits[i].score, "Shantanu-SYS-001"));
            }
            writer.close();

            // 5. term stats --> watch out for which "version" of the term
            // must be checked here instead!
            Term termInstance = new Term("contents", s);
            long termFreq = reader.totalTermFreq(termInstance);
            long docCount = reader.docFreq(termInstance);
            System.out.println(s + " Term Frequency " + termFreq + " - Document Frequency " + docCount);

        } catch (Exception e) {
            System.out.println("Error searching " + s + " : " + e.getMessage());
            break;
        }

    }

}

From source file:mhp_ph.DLelement.java

License:Open Source License

public LinkedList<phArt> generalSearch(String sTerm) throws Exception {
    LinkedList<phArt> lArt = new LinkedList<phArt>();
    Connection conn = null;//from   w  w  w  .jav a 2  s .  com

    try {
        Class.forName("org.sqlite.JDBC");
        conn = DriverManager.getConnection("jdbc:sqlite:ph.db");

        PreparedStatement p = conn.prepareStatement(
                "select id_e, tytul, tytul2, autor, opis_f, tom, numer, rok from elementy where id_e = ?;");

        Analyzer a = new StandardAnalyzer(Version.LUCENE_29);
        Directory d = FSDirectory.open(new File("./IND").getCanonicalFile());

        Query q = new QueryParser(Version.LUCENE_29, "all", a).parse(sTerm);

        IndexSearcher is = new IndexSearcher(d);
        TopScoreDocCollector sc = TopScoreDocCollector.create(10000, true);

        is.search(q, sc);

        ScoreDoc[] sd = sc.topDocs().scoreDocs;

        for (int i = 0; i < sd.length; ++i) {
            try {
                int docId = sd[i].doc;
                org.apache.lucene.document.Document d1 = is.doc(docId);
                //System.out.println(sd[i].score + ". " + d1.get("tytul"));
                String id = d1.get("id_e");
                p.setInt(1, Integer.parseInt(id.substring(0, id.length() - 6)));
                ResultSet rs = p.executeQuery();
                if (rs.next()) {
                    phArt phTmp = new phArt(rs.getString("tytul"), rs.getString("tytul2"),
                            rs.getString("autor"), rs.getString("opis_f"), rs.getString("numer"),
                            rs.getString("tom"), rs.getString("rok"), sd[i].score, rs.getInt("id_e"));
                    lArt.add(phTmp);
                }
                rs.close();
            } catch (Exception e) {
            }
        }
    } catch (Exception e) {
    } finally {
        try {
            conn.close();
        } catch (Exception e) {
        }
    }

    return lArt;
}

From source file:mhp_ph.DLelement.java

License:Open Source License

/**
 * Detailed search/*from   w w w .j  a  v a 2 s.  c o m*/
 * @param tytul Title
 * @param opisF Description - unused
 * @param autor Author
 * @param rok Year
 * @param tom Volume
 * @param numer Number
 * @param andTitle And/Or state for title
 * @param andAuthor And/Or state for author
 * @param andYear And/Or state for year
 * @param andVolume And/Or state for volume
 * @param andNumber And/Or state for number
 * @return Articles
 * @throws Exception Exception
 */
public LinkedList<phArt> detailedSearch(String tytul, String opisF, String autor, String rok, String tom,
        String numer, boolean andTitle, boolean andAuthor, boolean andYear, boolean andVolume,
        boolean andNumber) throws Exception {
    LinkedList<phArt> lArt = new LinkedList<phArt>();
    LinkedList<phArt> lArt2 = new LinkedList<phArt>();
    Connection conn = null;

    if (tytul.isEmpty() && autor.isEmpty() && (!rok.isEmpty() || !tom.isEmpty() || !numer.isEmpty())) {
        return yearVolumeNumberSearch(rok, tom, numer);
    }

    try {
        Class.forName("org.sqlite.JDBC");
        conn = DriverManager.getConnection("jdbc:sqlite:ph.db");

        PreparedStatement p = conn.prepareStatement(
                "select id_e, tytul, tytul2, autor, opis_f, tom, numer, rok from elementy where id_e = ?;");

        Analyzer a = new StandardAnalyzer(Version.LUCENE_29);
        Directory d = FSDirectory.open(new File("./IND").getCanonicalFile());

        LinkedList<String> lF = new LinkedList<String>();
        LinkedList<String> lK = new LinkedList<String>();
        LinkedList<BooleanClause.Occur> lC = new LinkedList<BooleanClause.Occur>();
        if (tytul != null && tytul.compareTo("") != 0) {
            for (String title : tytul.split(" ")) {
                if (title.compareTo("") != 0 && title.length() > 2) {
                    lF.add("tytul");
                    lK.add(title);
                    if (andTitle) {
                        lC.add(BooleanClause.Occur.MUST);
                    } else {
                        lC.add(BooleanClause.Occur.SHOULD);
                    }
                }
            }
        }
        if (autor != null && autor.compareTo("") != 0 && autor.length() > 2) {
            for (String author : autor.split(" ")) {
                if (author.compareTo("") != 0) {
                    lF.add("autor");
                    lK.add(author);
                    if (andAuthor) {
                        lC.add(BooleanClause.Occur.MUST);
                    } else {
                        lC.add(BooleanClause.Occur.SHOULD);
                    }
                }
            }
        }

        String[] year = rok.split(" ");
        String[] volume = tom.split(" ");
        String[] number = numer.split(" ");

        String[] searchF = new String[lF.size()];
        lF.toArray(searchF);
        String[] searchK = new String[lK.size()];
        lK.toArray(searchK);
        BooleanClause.Occur[] searchC = new BooleanClause.Occur[lC.size()];
        lC.toArray(searchC);

        Query q = MultiFieldQueryParser.parse(Version.LUCENE_29, searchK, searchF, searchC, a);

        IndexSearcher is = new IndexSearcher(d);
        TopScoreDocCollector sc = TopScoreDocCollector.create(10000, true);

        is.search(q, sc);

        ScoreDoc[] sd = sc.topDocs().scoreDocs;

        for (int i = 0; i < sd.length; ++i) {
            int docId = sd[i].doc;
            org.apache.lucene.document.Document d1 = is.doc(docId);
            //System.out.println(sd[i].score + ". " + d1.get("tytul"));
            try {
                String id = d1.get("id_e");
                p.setInt(1, Integer.parseInt(id.substring(0, id.length() - 6)));
                ResultSet rs = p.executeQuery();
                if (rs.next()) {
                    if (contains(volume, rs.getString("tom")) && contains(number, rs.getString("numer"))
                            && containsYear(year, rs.getString("rok"))) {
                        phArt phTmp = new phArt(rs.getString("tytul"), rs.getString("tytul2"),
                                rs.getString("autor"), rs.getString("opis_f"), rs.getString("numer"),
                                rs.getString("tom"), rs.getString("rok"), sd[i].score, rs.getInt("id_e"));
                        lArt.add(phTmp);
                    } else if ((contains(volume, rs.getString("tom"))
                            || contains(volume, rs.getString("tom")) == andVolume)
                            && (contains(number, rs.getString("numer"))
                                    || contains(number, rs.getString("numer")) == andNumber)
                            && (containsYear(year, rs.getString("rok"))
                                    || containsYear(year, rs.getString("rok")) == andYear)) {
                        phArt phTmp = new phArt(rs.getString("tytul"), rs.getString("tytul2"),
                                rs.getString("autor"), rs.getString("opis_f"), rs.getString("numer"),
                                rs.getString("tom"), rs.getString("rok"), sd[i].score, rs.getInt("id_e"));
                        lArt2.add(phTmp);
                    }

                }
                rs.close();
            } catch (Exception e) {
            }
        }
    } catch (Exception e) {
    } finally {
        try {
            conn.close();
        } catch (Exception e) {
        }
    }
    for (phArt p : lArt2) {
        lArt.add(p);
    }

    return lArt;
}