invertedindex.LineNumberSearcher.java Source code

Java tutorial

Introduction

Here is the source code for invertedindex.LineNumberSearcher.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package invertedindex;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.jsoup.Jsoup;

/**
 *
 * @author kkgarg
 */
public class LineNumberSearcher {

    private static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
    //    private static ConfigFile cf = new ConfigFile();
    private static int topDocs = 100;
    private ArrayList<String> lineNumbersList;

    private String getLineIndexLocation() {
        String lineIndexLocation = "";
        try {
            ConfigFile cf = new ConfigFile();
            lineIndexLocation = cf.getLineIndexPath();
        } catch (Exception e) {
            System.out.println(e);
        }
        return lineIndexLocation;
    }

    public ArrayList<String> search(String keyword, String filePath) throws IOException {

        String indexLocation = getLineIndexLocation();
        //    System.out.println("Inside LINE search method");

        try {
            IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
            IndexSearcher searcher = new IndexSearcher(reader);
            TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true);

            String queryContent = keyword;

            queryContent = "\"" + queryContent + "\"";
            //      queryContent = "*" + queryContent + "*";

            String queryFilePath = filePath;
            //                System.out.println("FIELPATH "+queryFilePath);
            queryFilePath = "\"" + queryFilePath + "\"";
            //      queryFilePath = "*" + queryFilePath + "*";

            QueryParser queryParserContent = new QueryParser(Version.LUCENE_47, "contents", analyzer);
            QueryParser queryParserFilePath = new QueryParser(Version.LUCENE_47, "path", analyzer);

            queryParserContent.setAllowLeadingWildcard(true);
            //queryParserFileName.setAllowLeadingWildcard(true);
            //               Query q = queryParser.parse(query);
            Query qContent = queryParserContent.parse(queryContent);
            Query qFileName = queryParserFilePath.parse(queryFilePath);
            //                System.out.println("FIELPATH "+qFileName);
            BooleanQuery q = new BooleanQuery();
            q.add(qContent, Occur.MUST); // MUST implies that the keyword must occur.
            q.add(qFileName, Occur.MUST);

            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            //      System.out.println("Found " + hits.length + " hits.");
            lineNumbersList = new ArrayList<>();
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                //          System.out.println((i + 1) + ". " + d.get("filename")
                //             + " score=" + hits[i].score);
                //                    System.out.println("Line Number is "+d.get("lineNumber"));
                //                    System.out.println("Content is "+d.get("contents"));
                //                    String filePath = d.get("path");
                lineNumbersList.add(d.get("lineNumber"));
            }

            reader.close();
            return lineNumbersList;

        } catch (Exception e) {
            System.out.println("Error searching in line number search " + indexLocation + " : " + e.getMessage());

        }

        return lineNumbersList;

    }
}