com.mathworks.xzheng.tools.HighlightIt.java Source code

Java tutorial

Introduction

Here is the source code for com.mathworks.xzheng.tools.HighlightIt.java

Source

package com.mathworks.xzheng.tools;

/**
 * Copyright Manning Publications Co.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific lan      
*/

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;

import java.io.FileWriter;
import java.io.StringReader;

// From chapter 8
public class HighlightIt {
    private static final String text = "In this section we'll show you how to make the simplest "
            + "programmatic query, searching for a single term, and then "
            + "we'll see how to use QueryParser to accept textual queries. "
            + "In the sections that follow, well take this simple example "
            + "further by detailing all the query types built into Lucene. "
            + "We begin with the simplest search of all: searching for all "
            + "documents that contain a single term.";

    public static void main(String[] args) throws Exception {

        if (args.length != 1) {
            System.err.println("Usage: HighlightIt <filename-out>");
            System.exit(-1);
        }

        String filename = args[0];

        String searchText = "term"; // #1
        QueryParser parser = new QueryParser(Version.LUCENE_46, // #1
                "f", // #1
                new StandardAnalyzer(Version.LUCENE_46));// #1
        Query query = parser.parse(searchText); // #1

        SimpleHTMLFormatter formatter = // #2
                new SimpleHTMLFormatter("<span class=\"highlight\">", // #2
                        "</span>"); // #2

        TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // #3
                .tokenStream("f", new StringReader(text)); // #3

        QueryScorer scorer = new QueryScorer(query, "f"); // #4

        Highlighter highlighter = new Highlighter(formatter, scorer); // #5
        highlighter.setTextFragmenter( // #6
                new SimpleSpanFragmenter(scorer)); // #6

        String result = // #7
                highlighter.getBestFragments(tokens, text, 3, "..."); // #7

        FileWriter writer = new FileWriter(filename); // #8
        writer.write("<html>"); // #8
        writer.write("<style>\n" + // #8
                ".highlight {\n" + // #8
                " background: yellow;\n" + // #8
                "}\n" + // #8
                "</style>"); // #8
        writer.write("<body>"); // #8
        writer.write(result); // #8
        writer.write("</body></html>"); // #8
        writer.close(); // #8
    }
}

/*
#1 Create the query
#2 Customize surrounding tags
#3 Tokenize text
#4 Create QueryScorer
#5 Create highlighter
#6 Use SimpleSpanFragmenter to fragment
#7 Highlight best 3 fragments
#8 Write highlighted HTML
*/