org.apache.lucene.search.SearchUtil.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.search.SearchUtil.java

Source

package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.*;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.search.utils.HTMLParser;

public class SearchUtil {
    public static final String SMART_STOP_WORDS[] = { "taglib", "page", "<%", "%>", "struts", ".jsp",
            "$TITLE_START$", "$TITLE_END$", "$SECURE_START$", "$SECURE_END$", "isELIgnored", "pageEncoding",
            "UTF-8" };

    public static void main(String[] argv) throws IOException, InterruptedException {
        /*if ("-dir".equals(argv[0])) {
           String[] files = new File(argv[1]).list();
           java.util.Arrays.sort(files);
           for (int i = 0; i < files.length; i++) {
        System.err.println(files[i]);
        File file = new File(argv[1], files[i]);
        new Test().parse(file);
           }
        } 
        else{
           new Test().parse(new File(argv[0]));
        }*/

        String contents = "move your ideas";
        System.out.println("Original Contents:\t" + contents);
        contents = SearchUtil.removeJspTags(contents);
        System.out.println("Removed JSP_TAGS:\t" + contents);
        System.out.println(SearchUtil.getTitle(contents));
        contents = SearchUtil.removeSecureContents(contents);
        System.out.println("Removed Secure Text:\t" + contents);
        contents = SearchUtil.removeTitle(contents, "");
        System.out.println("Removed Title:\t" + contents);
        System.out.println("Display String:\t" + SearchUtil.createDisplayStr(contents, "Samsung"));

        //      new SearchUtil().parse(new File("E:/Dev/Others/VZDN-Site/WebRoot/jsps/GotoMarket.jsp"));
    }

    private String contents;

    public static String getTitle(String contents) {
        if (contents.indexOf("$TITLE_START$") != -1 && contents.indexOf("$TITLE_END$") != -1) {
            int start = contents.indexOf("$TITLE_START$");
            int end = contents.indexOf("$TITLE_END$");
            return contents.substring(start + "$TITLE_START$".length(), end);
        } else {
            return "Title";
        }
    }

    public static String removeTitle(String contents, String title) {
        if (contents.indexOf("$TITLE_START$") != -1 && contents.indexOf("$TITLE_END$") != -1) {
            StringBuilder sb = new StringBuilder(contents);
            int start = sb.indexOf("$TITLE_START$");
            int end = sb.indexOf("$TITLE_END$");
            String tmp = sb.replace(start, end + "$TITLE_END$".length(), "").toString();
            return StringUtils.removeStart(tmp, title);
        } else {
            return contents;
        }
    }

    public static String removeJspTags(String contents) {
        StringBuilder sb = new StringBuilder(contents);
        while (true) {
            if (sb.indexOf("<%") != -1 && sb.indexOf("%>") != -1) {
                int start = sb.indexOf("<%");
                int end = sb.indexOf("%>");
                sb.replace(start, end + 2, "");
            } else {
                break;
            }
        }
        return sb.toString();
    }

    public static String removeDirty(String contents, String startStr, String endStr, int strLen) {
        StringBuilder sb = new StringBuilder(contents);
        while (true) {
            if (sb.indexOf(startStr) != -1 && sb.indexOf(endStr) != -1) {
                int start = sb.indexOf(startStr);
                int end = sb.indexOf(endStr);
                sb.replace(start, end + strLen, "");
            } else {
                break;
            }
        }
        return sb.toString();
    }

    public static String removeSecureContents(String contents) {
        StringBuilder sb = new StringBuilder(contents);
        while (true) {
            if (sb.indexOf("$SECURE_START$") != -1 && sb.indexOf("$SECURE_END$") != -1) {
                int start = sb.indexOf("$SECURE_START$");
                int end = sb.indexOf("$SECURE_END$");
                sb.replace(start, end + "$SECURE_END$".length(), "");
            } else {
                break;
            }
        }
        return sb.toString();
    }

    public static String removeSecureTags(String contents) {
        contents = contents.replace("$SECURE_START$", "");
        contents = contents.replace("$SECURE_END$", "");
        return contents;
    }

    public static boolean allowToAddCotent(String contents, String searchText) {
        searchText = searchText.replace("\"", "").toLowerCase();
        contents = contents.toLowerCase();
        String[] searchTextArray = searchText.split(" ");

        for (int i = 0; i < searchTextArray.length; i++) {
            if (contents.indexOf(searchTextArray[i]) != -1) {
                return true;
            }
        }
        return false;
    }

    public static String highlightSearchWord(String displayString, String searchQuery) {
        String[] searchQueryArray = null;
        String[] displayStringArray = null;

        searchQuery = StringUtils.replace(searchQuery, "  ", " ");

        if (searchQuery.indexOf("\"") != -1 && searchQuery.lastIndexOf("\"") != -1) {
            searchQuery = searchQuery.replace("\"", "");

            if (searchQuery.indexOf(" ") != -1) {
                String displayStringLowerCase = displayString.toLowerCase();
                String searchQueryLowerCase = searchQuery.toLowerCase();

                int searchQueryLocation = displayStringLowerCase.indexOf(searchQueryLowerCase);

                StringBuffer insertBlod = new StringBuffer(displayString);
                insertBlod.insert(searchQueryLocation + searchQuery.length(), "</strong>");
                insertBlod.insert(searchQueryLocation, "<strong>");

                displayString = insertBlod.toString();
                return displayString;
            }
        }

        searchQueryArray = searchQuery.split(" ");
        displayStringArray = displayString.split(" ");
        for (int i = 0; i < displayStringArray.length; i++) {
            for (int j = 0; j < searchQueryArray.length; j++) {
                if (displayStringArray[i].equalsIgnoreCase(searchQueryArray[j])) {
                    displayStringArray[i] = "<strong>" + displayStringArray[i] + "</strong>";
                }
            }
        }
        displayString = StringUtils.join(displayStringArray, " ");

        return displayString;
    }

    public static String createDisplayStr(String contents, String searchText) {
        if (contents.indexOf("Dashboard Dev Center - Technical Resources") != -1) {
            String _break = "";
            System.out.println(_break);
        }

        if (contents != null && contents.length() <= 150) {
            return contents;
        }

        //Remove some special characters
        contents = StringUtils.remove(contents, "\"");
        contents = StringUtils.remove(contents, "\'");
        contents = StringUtils.remove(contents, "(");
        contents = StringUtils.remove(contents, ")");
        contents = StringUtils.remove(contents, "{");
        contents = StringUtils.remove(contents, "}");
        contents = StringUtils.remove(contents, "[");
        contents = StringUtils.remove(contents, "]");

        searchText = StringUtils.replace(searchText, "  ", " ");

        String originalContents = contents;
        contents = contents.toLowerCase(); //for case-insensitive searching
        String displayStr = "";
        searchText = searchText.toLowerCase();

        boolean andCondition = false;

        //user searched using query: "apply your ideas"
        if (searchText.indexOf("\"") != -1 && searchText.lastIndexOf("\"") != -1) {
            andCondition = true;
        }

        searchText = searchText.replace("\"", "");

        String[] searchTextArray = null;

        if (contents != null && contents.trim().length() > 0 && andCondition
                && contents.indexOf(searchText) != -1) {
            searchTextArray = new String[] { searchText };
        } else {
            //user searched using query: apply your ideas
            searchTextArray = searchText.split(" ");
        }

        for (int i = 0; i < searchTextArray.length; i++) {
            if (contents.indexOf(searchTextArray[i] + " ") != -1) {
                displayStr = displayStr(originalContents, contents, searchTextArray[i] + " ");
                if (displayStr.length() > 0) {
                    return displayStr;
                }
            }
        }
        if (displayStr.length() == 0) {
            if (originalContents.length() > 100) {
                displayStr = originalContents.substring(0, 99);
            } else {
                displayStr = originalContents;
            }
            displayStr = displayStr.substring(0, displayStr.trim().lastIndexOf(" ") + 1);
        }
        return displayStr;
    }

    private static String displayStr(String originalContents, String contents, String searchStr) {

        if (originalContents.indexOf("Dashboard Dev Center - Technical Resources") != -1) {
            String _break = "";
            System.out.println(_break);
        }

        //split in two arrays based on search string
        String[] leftRightSentences = contents.split(searchStr);
        if (leftRightSentences.length > 2) {
            String tmp = "";
            for (int i = 1; i < leftRightSentences.length; i++) {
                tmp += leftRightSentences[i];
            }
            leftRightSentences[1] = tmp;
        }

        int leftPosition = 0;
        int rightPosition = 0;
        String startStr = "";
        String endStr = "";

        if (leftRightSentences[0].length() > 100) {
            if (leftRightSentences[0].lastIndexOf(".") != -1) {
                leftPosition = leftRightSentences[0].lastIndexOf(".");
            } else {
                leftPosition = leftRightSentences[0].lastIndexOf(" ");
            }
            startStr = originalContents.substring(leftPosition + 1, leftRightSentences[0].length());
        } else {
            startStr = originalContents.substring(0, leftRightSentences[0].length());
        }

        if (leftRightSentences[1].length() > 100) {
            if (leftRightSentences[1].indexOf(".") != -1) {
                rightPosition = leftRightSentences[1].indexOf(".");
            } else {
                rightPosition = leftRightSentences[1].indexOf(" ");
            }
            endStr = originalContents.substring(leftRightSentences[0].length(),
                    leftRightSentences[0].length() + searchStr.length() + rightPosition);
        } else {
            endStr = originalContents.substring(leftRightSentences[0].length(),
                    leftRightSentences[0].length() + leftRightSentences[1].length());
        }

        String finalStr = startStr + endStr;
        int finalStrLocation = originalContents.indexOf(finalStr);
        if (originalContents.indexOf(".", finalStrLocation) != -1) {
            int dotPositionAfterFinalStrLocation = originalContents.indexOf(".", finalStrLocation);
            finalStr = originalContents.substring(finalStrLocation, dotPositionAfterFinalStrLocation + 1);
        }
        finalStr = finalStr.substring(0, finalStr.trim().lastIndexOf(" ") + 1);

        return finalStr;

    }

    public String getContents() {
        return contents;
    }

    public void parse(File file) throws IOException, InterruptedException {
        FileInputStream fis = null;
        try {
            fis = new FileInputStream(file);
            HTMLParser parser = new HTMLParser(fis);

            //         this.title=Entities.encode(parser.getTitle());
            StringBuilder sb = new StringBuilder();
            LineNumberReader reader = new LineNumberReader(parser.getReader());
            for (String l = reader.readLine(); l != null; l = reader.readLine()) {
                //            System.out.println(l);
                sb.append(l + " ");
            }
            this.contents = sb.toString();
            //         System.out.println("Parsed Contents: "+contents);
        } finally {
            if (fis != null)
                fis.close();
        }
    }
}