Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:edu.ucdenver.ccp.nlp.index.Search.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {

    String index = "index";

    String queries = null;/*from w  w w. ja va 2 s. c o  m*/

    String queryString = null;
    int hitsPerPage = 100;

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    EnglishAnalyzer analyzer = new EnglishAnalyzer(Version.LUCENE_40);

    BufferedReader in = null;
    in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    //query building starts here.
    //QueryParser parser = new QueryParser(Version.LUCENE_40, "title", analyzer);
    MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_40,
            new String[] { "title", "abs", "mentions" }, analyzer);

    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            //c for cisplatin

            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        //Query q = queryParser.parse(querystr);
        Query query = parser.parse(line);
        //System.out.println("Searching for: " + query.toString(field));

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        // 4. display results
        System.out.println("Found " + hits.length + " hits.");

        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println((i + 1) + ". " + d.get("pmid") + "\t" + d.get("title"));
        }

        //doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:edu.wayne.cs.severe.ir4se.lucene.SearchFiles.java

License:Apache License

@SuppressWarnings("deprecation")
public static void search(String system, int documentType, int queryNumber, String indexDirectoryPath,
        String queryString, String fileOutput, String[] targetClasses, boolean runIndividualTerms,
        boolean append) throws Exception {

    String index = indexDirectoryPath;
    FileWriter f = new FileWriter(index + "../NotFound.txt", true);

    for (int i = 0; i < targetClasses.length; i++) {
        String target = targetClasses[i];
        boolean found = Indexer.isFileDocInIndex(indexDirectoryPath, target);
        if (!found)
            f.append("Target doc " + i + " - " + target + " not found in index!\n");
    }//  w  ww. j  av a 2s .  c o m
    f.close();
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true);

    int numDocs = reader.numDocs();
    System.out.println("The number of documents in the index is: " + numDocs);

    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);

    String[] fields;
    fields = new String[1];
    fields[0] = "contents";

    if (!runIndividualTerms) {
        MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
        int hitsPerPage = numDocs;
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        Query query = parser.parse(queryString);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        System.out.println("The number of hits is: " + hits.length);

        // file with the results (score and position) only for the relevant
        // documents
        // the file contains entries in the following format:
        // (queryNumber,relDoc1,posRelDoc1,scoreRelDoc1,relDoc2,posRelDoc2,scoreRelDoc2,...)
        FileWriter fwRelevant = new FileWriter(fileOutput, append);

        String path = "";
        String docName = "";
        String docPathAndName = "";
        for (String target : targetClasses) {
            boolean found = false;
            for (int i = 0; i < hits.length; i++) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                path = d.get("path");

                float score = hits[i].score;

                if (documentType == 2) {
                    docName = d.get("docName");

                    docPathAndName = path.toLowerCase() + "." + docName.toLowerCase();

                    if (target.equalsIgnoreCase(docPathAndName)) {
                        fwRelevant.write(system + ";" + queryNumber + ";" + target + ";" + (i + 1) + ";"
                                + hits.length + ";" + numDocs + ";" + score + "\n");
                        found = true;
                        break;
                    }
                } else if (documentType == 1) {
                    File pathDir = new File(path.trim());
                    String fileName = pathDir.getName();
                    docName = fileName.replaceAll(".txt", "");
                    fwRelevant.write((i + 1) + ". doc = " + docName + " score = " + score + "\n");
                }
            }
            if (found == false)
                fwRelevant.write(system + ";" + queryNumber + ";" + target + "; NOT_RETRIEVED" + "\n");

        }
        // fw.close();
        fwRelevant.close();
        reader.close();
    } else // runIndividualTerms = true
    {
        /**
         * each query will be divided in its constituent terms and each term
         * will be run as a separate query
         **/
        /**
         * this is useful to determine the similarity of each of the terms
         * in a query to a target document so that we determine which terms
         * in the query tend to lead to the best results, i.e., to finding
         * the targets sooner
         **/

        SearchFiles.search(system, documentType, queryNumber, indexDirectoryPath, queryString,
                fileOutput.replaceAll(".txt", "_wholeQuery.txt"), targetClasses, false, append);

        FileWriter fw = new FileWriter(fileOutput.replaceAll(".txt", "_terms.txt"));
        fw.write(
                "\n\n\n------------------------------------------------------------------------------------\n\n");
        fw.write("                               Results for query " + queryNumber + "\n");
        fw.write("------------------------------------------------------------------------------------\n\n");

        // file with the results (score and position) only for the relevant
        // documents
        // the file contains entries in the following format:
        // (queryNumber,term1,term1TF,term1DF,relDoc1,posRelDoc1Term1,scoreRelDoc1Term1,relDoc2,posRelDoc2Term1,scoreRelDoc2Term1,...)
        // (queryNumber,term2,term2TF,term2DF,relDoc1,posRelDoc1Term2,scoreRelDoc1Term2,relDoc2,posRelDoc2Term2,scoreRelDoc2Term2,...)
        // ...
        FileWriter fwRelevant = new FileWriter(
                fileOutput.replaceAll(".txt", "_terms_RelevantDocsPositions.txt"));

        String[] queryTerms = queryString.split(" ");
        for (int l = 0; l < queryTerms.length; l++) {
            MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
            int hitsPerPage = numDocs;
            TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

            String q = queryTerms[l];
            Query query = parser.parse(q);
            searcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            fw.write("TERM " + (l + 1) + ": " + q + "\n\n");
            fwRelevant.write("\n" + queryNumber + "," + q);
            for (int i = 0; i < hits.length; i++) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                String path = d.get("path");
                float score = hits[i].score;
                if (documentType == 2) {
                    String docName = d.get("docName");
                    fw.write((i + 1) + ". doc = " + path + " " + docName + " - score = " + score + "\n");
                    for (int k = 0; k < targetClasses.length; k++) {
                        if (docName.equalsIgnoreCase(targetClasses[k])) {
                            String contents = d.get("contents");
                            int frequency = countOccurrences(contents, q);// tf
                            fwRelevant.write("," + frequency);

                            fwRelevant.write("," + reader.docFreq(new Term("contents", q)));// df
                            fwRelevant.write("," + path + "." + docName + "," + (i + 1) + "," + score);
                            break;
                        }
                    }
                } else if (documentType == 1) {
                    File pathDir = new File(path);
                    String fileName = pathDir.getName();
                    String docName = fileName.replaceAll(".txt", "");
                    fw.write((i + 1) + ". doc = " + docName + " score = " + score + "\n");
                }
            }
            fw.write("\n\n\n");
        }
        fw.close();
        f.close();
        fwRelevant.close();
        reader.close();
    }
}

From source file:es.eucm.ead.editor.indexes.Index.java

License:Open Source License

/**
 * Query the index.//w  ww. j  av  a2  s. c  o m
 * 
 * @param queryText
 *            textual query.
 * @return an object with the results of the search: matches will be objects
 *         that have fields with contents that matched the query.
 */
public Array<Match> search(String queryText) {
    Array<Match> matches = new Array<Match>();
    try {
        Query query = getQueryParser().parse(queryText + FUZZY_SEARCH_SYMBOL + fuzzyFactor);

        TopScoreDocCollector collector = TopScoreDocCollector.create(maxSearchHits, true);
        indexSearcher.search(query, collector);

        for (ScoreDoc hit : collector.topDocs().scoreDocs) {
            Document doc = indexSearcher.doc(hit.doc);
            Integer id = Integer.parseInt(doc.getFieldable(DOCUMENT_ID_FIELD_NAME).stringValue());
            SearchNode node = idsToNodes.get(id);
            Match match = new Match(node, hit.score);
            matches.add(match);
        }
        matches.sort();
    } catch (Exception e) {
        Gdx.app.error("Index", "Error parsing or looking up " + queryText, e);
    }
    return matches;
}

From source file:es.eucm.ead.editor.model.ModelIndex.java

License:Open Source License

/**
 * Query the index. The fields//  w  w  w. ja  v  a2 s  .c o m
 * "eid", "is" and "has" are interpreted as follows:
 * <ul>
 * <li>eid - exact editor-id match
 * <li>is - node class-name match
 * <li>has - node contents class-name match
 * </ul>
 * @param field field that is being searched
 * @param queryText contents of the query
 * @param quick 
 * @return an object with the results of the search
 */
public SearchResult search(String field, String queryText, boolean quick) {

    // Short-circuited queries
    if (field.equals(isClassQueryField)) {
        return searchByClass(queryText);
    } else if (field.equals(editorIdQueryField)) {
        return searchById(queryText);
    } else if (field.equals(hasContentClassQueryField)) {
        return searchByContentClass(queryText);
    }

    // normal queries
    try {
        IndexReader reader = IndexReader.open(searchIndex);
        Query query = (field.isEmpty()) ? getQueryAllParser().parse(queryText)
                : new QueryParser(Version.LUCENE_35, field, searchAnalyzer).parse(queryText);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        SearchResult sr = new SearchResult(searcher, query, quick, hits, model.getNodesById());
        return sr;
    } catch (Exception e) {
        logger.warn("Error parsing or looking up query '{}' in index", queryText, e);
    }
    return new SearchResult();
}

From source file:eu.eexcess.sourceselection.redde.dbsampling.DBSampler.java

License:Apache License

/**
 * sollects samples from general database (index)
 * //from   w w w .ja  v a  2  s  . co  m
 * @param queryString
 *            the search string
 * @param searcher
 * @param maxHitsPerQuery
 *            maximum number of documents stored from result for queryString
 * @throws ParseException
 * @throws IOException
 */
private void collectSamples(String queryString, IndexSearcher searcher, int maxHitsPerQuery)
        throws ParseException, IOException {

    Query query = new QueryParser(Settings.IndexFields.IndexTextField, new EnglishAnalyzer())
            .parse(queryString);

    TopScoreDocCollector collector = TopScoreDocCollector.create(maxHitsPerQuery, true);
    searcher.search(query, collector);
    writeToIndex(searcher, collector, maxHitsPerQuery);
}

From source file:eu.eexcess.sourceselection.redde.indexer.topterm.DBDomainSampler.java

License:Apache License

public void sample(Set<SampleArguments> sampleArgs) throws IllegalStateException, ParseException, IOException {

    if (domainToTermsTree == null) {
        throw new IllegalStateException("no terms aligned");
    }/*from  ww  w . ja v  a2 s .  co  m*/

    for (SampleArguments subSample : sampleArgs) {

        // merge requested domain-terms
        Set<String> terms = distinctUnifyValues(subSample.sampleDomains);

        // sample with domain-term dependent query
        String queryString = String.join("", terms);

        Query query = new QueryParser(DBDomainSampler.fieldOfInterest, new EnglishAnalyzer())
                .parse(queryString);
        TopScoreDocCollector collector = TopScoreDocCollector.create(1000, false);
        new IndexSearcher(inIndexReader).search(query, collector);
        // ScoreDoc[] docs = collector.topDocs().scoreDocs;
        // TODO: create and store docs to new index called subSample.name
    }
    throw new UnsupportedOperationException("not implemented yet");
}

From source file:Example.lucene.HelloLucene.java

public static void main(String[] args) throws IOException, ParseException {
    // 0. Specify the analyzer for tokenizing text.
    //    The same analyzer should be used for indexing and searching
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    // 1. create the index
    Directory index = FSDirectory.open(new File("indexing"));

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
    try (IndexWriter w = new IndexWriter(index, config)) {
        addDoc(w, "Lucene in Action 123.456", "193398817");
        addDoc(w, "Lucene for Dummies 123 456", "55320055Z");
        addDoc(w, "Managing Gigabytes 123456", "55063554A");
        addDoc(w, "", "9900333X");
        addDoc(w,//from  w ww. ja va2 s .  c  om
                "?",
                "9900333X");
    }

    // 2. query
    String querystr = args.length > 0 ? args[0] : "";

    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query q = new QueryParser(Version.LUCENE_45, "title", analyzer).parse(querystr);

    // 3. search
    int hitsPerPage = 10;
    try (IndexReader reader = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        // 4. display results
        System.out.println("Found " + hits.length + " hits.");
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title"));
        }
    }
}

From source file:Example.lucene.TestIndexer.java

public static void main(String[] args) throws IOException, ParseException {
    // 0. Specify the analyzer for tokenizing text.
    //    The same analyzer should be used for indexing and searching
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);
    String InDirName = "data/test_snipped";
    File InDir = new File(InDirName);

    // 1. create the index
    Directory index = FSDirectory.open(new File("data/indexingonly"));

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
    try (IndexWriter w = new IndexWriter(index, config)) {
        String[] s;//from  w ww . j a  va  2 s.  com
        int id = 1;
        for (File f : InDir.listFiles()) {
            try (ArcReader ar = new ArcReader(f)) {
                System.out.println(f.getName());
                while (ar.Next()) {
                    s = ar.Record.ArchiveContent.split("\n");
                    switch (s.length) {
                    case 2:
                        addDoc(w, id++, ar.Record.URL, s[0], s[1]);
                        break;
                    case 1:
                        addDoc(w, id++, ar.Record.URL, s[0], "");
                        break;
                    default:
                        break;
                    }
                }
            }
        }
    }

    // 2. query
    String querystr = args.length > 0 ? args[0] : "";

    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query q = new QueryParser(Version.LUCENE_45, "title", analyzer).parse(querystr);

    // 3. search
    int hitsPerPage = 10;
    try (IndexReader reader = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(reader);

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        // 4. display results
        System.out.println("Found " + hits.length + " hits.");
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("url") + "\t" + d.get("title") + "\t"
                    + d.get("content"));
        }
    }
}

From source file:Example.lucene.TestSearch.java

public static void main(String[] args) throws ParseException, IOException {

    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexingonly"));

    // 2. query/*from   www . j  a  v  a2 s. co  m*/
    String querystr = args.length > 0 ? args[0] : "golf user";

    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query q = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)
            .parse(querystr);

    // 3. search
    int hitsPerPage = 10;
    try (IndexReader reader = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(reader);

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(q, collector);

        TopDocs td = collector.topDocs(5);
        ScoreDoc[] hits = td.scoreDocs;

        // 4. display results
        System.out.println("Found " + hits.length + " hits. from " + td.totalHits + " docs.");
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("url") + "\t" + d.get("title") + "\t"
                    + d.get("content"));

        }
    }
}

From source file:game.TermFreq.java

void loadDoc() throws Exception {
    IndexReader reader = retriever.getReader();
    IndexSearcher searcher = retriever.getSearcher();

    Term docIdTerm = new Term(TrecDocRetriever.FIELD_ID, this.docIdToGuess);
    TermQuery tq = new TermQuery(docIdTerm);

    TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);
    searcher.search(tq, collector);/*from   www.  j a  va 2s  .co  m*/
    this.luceneDocIdToGuess = collector.topDocs().scoreDocs[0].doc;
    this.docToGuess = reader.document(luceneDocIdToGuess);
    this.contentOfDocToGuess = docToGuess.get(FIELD_ANALYZED_CONTENT);
}