Example usage for org.apache.lucene.search.highlight Highlighter getBestFragments

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragments.

Prototype

public final String[] getBestFragments(TokenStream tokenStream, String text, int maxNumFragments)
        throws IOException, InvalidTokenOffsetsException

Source Link

Document

Highlights chosen terms in a text, extracting the most relevant sections.

Usage

From source file:com.rapidminer.search.GlobalSearchHandler.java

License:Open Source License

/**
 * Creates the search result for search methods.
 *
 * @param searchTerm/*from   w w w . j  av a 2  s .c  om*/
 *       the search string
 * @param searcher
 *       the index searcher instance which was used to search
 * @param result
 *       the result of the search
 * @param highlightResult
 *       if {@code true}, the {@link GlobalSearchResult#getBestFragments()} will be created
 * @return the search result instance, never {@code null}
 * @throws IOException
 *       if something goes wrong
 */
private GlobalSearchResult createSearchResult(final String searchTerm, final Query parsedQuery,
        final IndexSearcher searcher, final TopDocs result, final boolean highlightResult) throws IOException {
    int resultNumber = result.scoreDocs.length;
    List<Document> resultList = new ArrayList<>(resultNumber);
    List<String[]> highlights = highlightResult ? new LinkedList<>() : null;
    ScoreDoc lastResult = resultNumber > 0 ? result.scoreDocs[result.scoreDocs.length - 1] : null;
    for (ScoreDoc scoreDoc : result.scoreDocs) {
        Document doc = searcher.doc(scoreDoc.doc);
        resultList.add(doc);

        if (highlightResult) {
            // search result highlighting best match on name field
            QueryScorer scorer = new QueryScorer(parsedQuery);
            Highlighter highlighter = new Highlighter(HIGHLIGHT_FORMATTER, scorer);
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, FRAGMENT_SIZE);
            highlighter.setTextFragmenter(fragmenter);
            try {
                TokenStream stream = TokenSources.getTokenStream(GlobalSearchUtilities.FIELD_NAME,
                        searcher.getIndexReader().getTermVectors(scoreDoc.doc),
                        doc.get(GlobalSearchUtilities.FIELD_NAME), GlobalSearchUtilities.ANALYZER,
                        Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE - 1);
                if (stream != null) {
                    highlights.add(highlighter.getBestFragments(stream,
                            doc.get(GlobalSearchUtilities.FIELD_NAME), MAX_NUMBER_OF_FRAGMENTS));
                } else {
                    highlights.add(null);
                }
            } catch (InvalidTokenOffsetsException e) {
                highlights.add(null);
            }
        }
    }
    return new GlobalSearchResult(resultList, searchTerm, lastResult, result.totalHits, highlights);
}

From source file:de.blizzy.documentr.search.GetSearchHitTask.java

License:Open Source License

@Override
public SearchHit call() throws IOException {
    Formatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); //$NON-NLS-1$ //$NON-NLS-2$
    Scorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(FRAGMENT_SIZE));
    highlighter.setEncoder(new SimpleHTMLEncoder());

    Document doc = reader.document(docId);
    String projectName = doc.get(PageIndex.PROJECT);
    String branchName = doc.get(PageIndex.BRANCH);
    String path = doc.get(PageIndex.PATH);
    String title = doc.get(PageIndex.TITLE);
    String text = doc.get(PageIndex.TEXT);
    String[] tagsArray = doc.getValues(PageIndex.TAG);
    List<String> tags = Lists.newArrayList(tagsArray);
    Collections.sort(tags);//w  w w.  j  av a2  s  .  c  o  m
    TokenStream tokenStream = null;
    String highlightedText = StringUtils.EMPTY;
    try {
        tokenStream = TokenSources.getAnyTokenStream(reader, docId, PageIndex.TEXT, doc, analyzer);
        String[] fragments = highlighter.getBestFragments(tokenStream, text, NUM_FRAGMENTS);
        cleanupFragments(fragments);
        highlightedText = Util.join(fragments, " <strong>...</strong> "); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        // ignore
    } finally {
        Closeables.closeQuietly(tokenStream);
    }
    return new SearchHit(projectName, branchName, path, title, highlightedText, tags);
}

From source file:de.innovationgate.wga.server.api.Lucene.java

License:Open Source License

public List<String> bestFileFragments(int fragmentSize, int maxFragments, String prefix, String suffix,
        String encode) throws WGException {
    if (!_wga.getCore().isLuceneEnabled()) {
        _cx.addwarning("Unable to retrieve best file fragments - lucene is not enabled.");
        return Collections.emptyList();
    }/*  ww  w . j  av a 2 s. com*/

    if (_wga.database().db().getContentStoreVersion() < WGDatabase.CSVERSION_WGA5
            || (_wga.database().db().getContentStoreVersion() == WGDatabase.CSVERSION_WGA5
                    && _wga.database().db().getContentStorePatchLevel() < 5)) {
        _cx.addwarning("bestFileFragments() is not supported on this content store version.");
        return Collections.emptyList();
    }

    org.apache.lucene.search.Query query = (org.apache.lucene.search.Query) _cx.gethttpsession()
            .getAttribute(Query.SESSION_ATTRIBUTE_SIMPLIFIED_LUCENEQUERY);
    if (query == null) {
        // no query in session
        return Collections.emptyList();
    }

    String filename = null;
    SearchDetails sd = _cx.getcontent().getSearchDetails();
    if (sd != null && sd instanceof LuceneSearchDetails) {
        filename = ((LuceneSearchDetails) sd).getFilename();
    }

    if (filename == null) {
        return Collections.emptyList();
    }

    if (encode == null) {
        encode = _wga.design().getTmlDefaultEncoding();
    }

    String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
    String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

    // create highlighter
    Highlighter highlighter = _wga.getCore().getLuceneManager()
            .createHighlighter(LuceneManager.INDEXFIELD_ALLCONTENT, query, formatter);

    // retrieve attachment text
    WGFileMetaData md = _cx.content().getFileMetaData(filename);
    if (md == null) {
        return Collections.emptyList();
    }

    BinaryFieldData textData = md.getPlainText();
    if (textData == null) {
        return Collections.emptyList();
    }

    try {
        // TODO highlighter does not support streams - should we limit plaintext size here?
        Reader textReader = new InputStreamReader(textData.getInputStream());
        String text = IOUtils.toString(textReader);
        textReader.close();

        // create tokenstream
        TokenStream tokenStream = _wga.getCore().getLuceneManager().createTokenStream(text, _cx.content());

        // create fragmenter
        Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
        highlighter.setTextFragmenter(fragmenter);

        String[] highlighted = highlighter.getBestFragments(tokenStream, text, maxFragments);
        if (highlighted != null) {
            List<String> list = new ArrayList<String>();
            for (int i = 0; i < highlighted.length; i++) {
                String fragment = highlighted[i];
                if (encode != null) {
                    try {
                        fragment = _cx.multiencode(encode, fragment);
                    } catch (FormattingException e) {
                        _cx.addwarning("Unable to retrieve best fragments for file '" + filename
                                + "' bc. of formating exception '" + e.getMessage() + "'.");
                        return Collections.emptyList();
                    }
                }
                fragment = WGUtils.strReplace(fragment, prefixPlaceholder, prefix, true);
                fragment = WGUtils.strReplace(fragment, suffixPlaceholder, suffix, true);
                list.add(fragment);
            }
            return list;
        } else {
            return Collections.emptyList();
        }
    } catch (Exception e) {
        _cx.addwarning("Unable to retrieve best fragments for file '" + filename + "' bc. of exception '"
                + e.getMessage() + "'.");
        return Collections.emptyList();
    }
}

From source file:de.innovationgate.wgpublisher.webtml.utils.TMLContext.java

License:Open Source License

/**
 * retrieves a list of the best fragments from the given contentItem based upon the last lucene query with highlight attribute set to true
 * query hits are highlighted (surrounded by given <prefix> and <suffix>) in fragments
 * @param itemname the item fragements should be retrieved from
 * @param fragmentSize the number of characters for each fragment
 * @param maxFragments the maximum number of fragements returned
 * @param prefix the prefix for highlighting the search term in fragements
 * @param suffix the suffix for highlighting the search term in fragements
 * @param encode encode each fragment in the given encoding - prefix and suffix will not be encoded
 * @return list of fragements (Strings) - if no lucene query present returns EMPTY_LIST
 * @throws WGAPIException//from w  w w .jav  a 2s .  c  o  m
 */
@CodeCompletion
public List bestfragments(String itemname, int fragmentSize, int maxFragments, String prefix, String suffix,
        String encode) throws WGAPIException {
    // check preconditions for highlighting
    if (itemname == null) {
        throw new WGIllegalArgumentException("Unable to retrieve best fragments for item 'null'.");
    }
    if (!getwgacore().isLuceneEnabled()) {
        addwarning("Unable to highlight item '" + itemname + "' bc. lucene is not enabled.");
        return Collections.EMPTY_LIST;
    }
    // try to retrieve last lucene query for highlighting
    org.apache.lucene.search.Query query = (org.apache.lucene.search.Query) getrequest().getSession()
            .getAttribute(Query.SESSION_ATTRIBUTE_SIMPLIFIED_LUCENEQUERY);
    if (query == null) {
        // no query in session - highlighting not possible
        return Collections.EMPTY_LIST;
    }

    // lowercase name
    itemname = itemname.toLowerCase();

    // create htmlformatter to highlight fragments with "$HIGHLIGHT_PREFIX$", "$HIGHLIGHT_SUFFIX$"
    // these placeholders are later on replaced by the given prefix and suffix
    // this additional step is necessary to encode the fragment text properly
    // see B00004BBE
    String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
    String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

    // create highlighter
    Highlighter highlighter = getwgacore().getLuceneManager().createHighlighter(itemname, query, formatter);

    // retrieve itemtext
    String text = itemTextValue(itemname, "none");
    if (text == null) {
        return Collections.EMPTY_LIST;
    }

    // remove html/xml from text
    // fragments should not contain html/xml bc. of design issues
    try {
        text = WGUtils.toPlainText(text, " ", false);
        // B000049EA
        // if the item value contains encoded html entities these entities has been converted to their characters
        // we should do an html encode for sure
        // text = WGUtils.encodeHTML(text); --> has side effects @see B00004BBE
    } catch (IOException e) {
        addwarning("Unable to highlight item '" + itemname + "' bc. of exception '" + e.getMessage() + "'.");
        return Collections.EMPTY_LIST;
    }

    // create tokenstream
    TokenStream tokenStream = getwgacore().getLuceneManager().createTokenStream(text, content());

    // create fragmenter
    Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
    highlighter.setTextFragmenter(fragmenter);

    try {
        String[] highlighted = highlighter.getBestFragments(tokenStream, text, maxFragments);
        if (highlighted != null) {
            ArrayList list = new ArrayList();
            for (int i = 0; i < highlighted.length; i++) {
                // B00004BBE
                // evtl. encode fragment
                String fragment = highlighted[i];
                if (encode != null) {
                    try {
                        fragment = multiencode(encode, fragment);
                    } catch (FormattingException e) {
                        addwarning("Unable to highlight item '" + itemname + "' bc. of formating exception '"
                                + e.getMessage() + "'.");
                        return Collections.EMPTY_LIST;
                    }
                }
                // B00004BBE
                // replace highlight placeholders with correct prefix and suffix
                fragment = WGUtils.strReplace(fragment, prefixPlaceholder, prefix, true);
                fragment = WGUtils.strReplace(fragment, suffixPlaceholder, suffix, true);
                list.add(fragment);
            }
            return list;
        } else {
            return Collections.EMPTY_LIST;
        }
    } catch (IOException e) {
        addwarning("Unable to highlight item '" + itemname + "' bc. of exception '" + e.getMessage() + "'.");
        return Collections.EMPTY_LIST;
    } catch (InvalidTokenOffsetsException e) {
        addwarning("Unable to highlight item '" + itemname + "' bc. of exception '" + e.getMessage() + "'.");
        return Collections.EMPTY_LIST;
    }
}

From source file:net.chwise.documents.HighlightedFragmentsRetriever.java

License:Open Source License

public String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String fieldName,
        String fieldContents, int fragmentNumber, int fragmentSize)
        throws IOException, InvalidTokenOffsetsException {

    TokenStream stream = TokenSources.getTokenStream(fieldName, fieldContents, analyzer);
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentSize);

    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

    String[] fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber);

    if (fragments.length == 0) {
        //Return starting piece of fieldContents fragment
        fragments = new String[1];
        fragments[0] = fieldContents.substring(0, Math.min(fragmentSize, fieldContents.length()));
    }//ww  w .  j av  a  2  s  . c  om

    return fragments;
}

From source file:org.apache.nutch.summary.lucene.LuceneSummarizer.java

License:Apache License

public Summary getSummary(String text, Query query) {

    String[] terms = query.getTerms();
    WeightedTerm[] weighted = new WeightedTerm[terms.length];
    for (int i = 0; i < terms.length; i++) {
        weighted[i] = new WeightedTerm(1.0f, terms[i]);
    }/*from  w  w  w .  jav a  2 s  .c o  m*/
    Highlighter highlighter = new Highlighter(FORMATTER, new QueryScorer(weighted));
    TokenStream tokens = analyzer.tokenStream("content", new StringReader(text));
    Summary summary = new Summary();
    try {
        // TODO : The max number of fragments (3) should be configurable
        String[] result = highlighter.getBestFragments(tokens, text, 3);
        for (int i = 0; i < result.length; i++) {
            String[] parts = result[i].split(SEPARATOR);
            boolean highlight = false;
            for (int j = 0; j < parts.length; j++) {
                if (highlight) {
                    summary.add(new Highlight(parts[j]));
                } else {
                    summary.add(new Fragment(parts[j]));
                }
                highlight = !highlight;
            }
            summary.add(new Ellipsis());
        }

        /* TODO MC  BUG resolved 0000029 - if query terms do not occur on text, an empty summary is returned. Now it sends the first tokens. */
        if (result == null || result.length == 0) {
            tokens = analyzer.tokenStream("content", new StringReader(text));

            Token firstToken = null, lastToken = null;
            Token token = null;
            int maxLen = 100; // the same as defined in SimpleFragmenter but it is private

            /*
            ArrayList<Token> titleTokens=new ArrayList<Token>();
            ArrayList<Token> textTokens=new ArrayList<Token>();
            boolean titleMatched=false;
            boolean hasMatched=false; // exit match after match title the first time               
                    
            // remove title from text. compares pairs of text
            while ((titleMatched || !hasMatched) && (token=tokens.next())!=null) {
                       
               if (token.type().equals("<WORD>")) {
                       
                  if (titleTokens.size()==0) {
                     titleTokens.add(token);
                  }
                  else if (textTokens.size()<titleTokens.size()) {
                     textTokens.add(token);
                  }
                       
                  if (textTokens.size()==titleTokens.size()) {
                     // compare
                     titleMatched=true;
                     for (int i=0;i<textTokens.size() && titleMatched;i++) {
             if (!textTokens.get(i).termText().equals(titleTokens.get(i).termText())) {
                titleMatched=false;     
             }                     
                     }
                     if (titleMatched) { // try to match a larger pattern
             titleTokens.add(textTokens.get(0));
             textTokens.remove(0);
             hasMatched=true;
                     }
                     else { // remove rest of title from text
             if (hasMatched) {
                firstToken=textTokens.get(titleTokens.size()-2);                                              
             }
             else { // add one more token to title
                titleTokens.add(textTokens.get(0));
                 textTokens.remove(0);
             }
                     }
                  }
               }          
            }
                    
            if (textTokens.size()==0) {
               return summary;
            }
                                  
            for (int i=0;i<textTokens.size() && textTokens.get(i).endOffset()-firstToken.startOffset()<maxLen;i++) {
               lastToken=textTokens.get(i);
            }
            */

            // read tokens until maxLen
            while ((token = tokens.next()) != null) {
                if (token.type().equals("<WORD>")) {
                    if (firstToken == null) {
                        firstToken = token;
                    } else if (token.endOffset() - firstToken.startOffset() < maxLen) {
                        lastToken = token;
                    } else {
                        break;
                    }
                }
            }
            if (lastToken == null) {
                lastToken = firstToken;
            }

            summary.add(new Fragment(text.substring(firstToken.startOffset(), lastToken.endOffset())));
            summary.add(new Ellipsis());
        }
        /* TODO MC */

    } catch (Exception e) {
        // Nothing to do...
    }
    return summary;
}

From source file:org.apache.wiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Searches pages using a particular combination of flags.
 *
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags/*from w w  w.  ja  v  a  2 s  . com*/
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
 */
public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
        QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        }

        try {
            File dir = new File(m_luceneDirectory);
            Directory luceneDir = new SimpleFSDirectory(dir, null);
            IndexReader reader = IndexReader.open(luceneDir);
            searcher = new IndexSearcher(reader);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;
        }

        ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

        list = new ArrayList<SearchResult>(hits.length);
        for (int curr = 0; curr < hits.length; curr++) {
            int docID = hits[curr].doc;
            Document doc = searcher.doc(docID);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.
                }

                int score = (int) (hits[curr].score * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);

                }

                SearchResult result = new SearchResultImpl(page, score, fragments);
                list.add(result);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
            }
        }
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse query ", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
        log.error("Tokens are incompatible with provided text ", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error(e);
            }
        }
    }

    return list;
}

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

public String[] fragments(Resource resource, String propertyName, String text) throws SearchEngineException {
    Highlighter highlighter = createHighlighter(propertyName);
    TokenStream tokenStream = createTokenStream(resource, propertyName, text);
    try {//from  www  .ja va  2 s  .c o m
        return highlighter.getBestFragments(tokenStream, text, getMaxNumFragments());
    } catch (IOException e) {
        throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias()
                + "] and property [" + propertyName + "]");
    }
}

From source file:org.haplo.app.SearchResultExcerptHighlighter.java

License:Mozilla Public License

static public String[] bestHighlightedExcerpts(String escapedText, String searchTerms, int maxExcerptLength) {
    try {/*from ww w  .ja  v  a 2 s . com*/
        // Scorer selects the terms which need highlighting. Created from a 'query' based on the extracted search terms.
        Scorer scorer;
        Fragmenter fragmenter;
        if (searchTerms != null && searchTerms.length() > 0) {
            QueryParser queryParser = new QueryParser("FIELD", new StandardAnalyzer());
            Query query = queryParser.parse(searchTerms);
            scorer = new QueryScorer(query);
            fragmenter = new SimpleSpanFragmenter((QueryScorer) scorer, maxExcerptLength);
        } else {
            scorer = new NoHighlightingScorer();
            fragmenter = new SimpleFragmenter(maxExcerptLength);
        }

        // Parse the escaped text into tokens, which retain the positions in the text
        StandardAnalyzer analyser = new StandardAnalyzer();
        TokenStream tokenStream = analyser.tokenStream("FIELD", new StringReader(escapedText));

        // Finally, do the highlighting!
        Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b>", "</b>"), scorer);
        highlighter.setTextFragmenter(fragmenter);
        return highlighter.getBestFragments(tokenStream, escapedText, NUMBER_OF_FRAGMENTS);
    } catch (Exception e) {
        Logger.getLogger("org.haplo.app").info("Exception in SearchResultExcerptHighlighter: ", e);
        return null;
    }
}

From source file:servlet.Checkcopy.java

/**
 * Processes requests for both HTTP <code>GET</code> and <code>POST</code>
 * methods./*from w w  w .  j  a v a 2 s .c  om*/
 *
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    HttpSession ss = request.getSession();
    Assignment a = (Assignment) ss.getAttribute("curAm");
    int safv_id = Integer.parseInt(request.getParameter("safv_id"));
    String studentAmPath = getServletContext().getRealPath("/") + "/file/student_assignment_file/";
    if (a.getAss_type().equalsIgnoreCase("file")) {
        StAssignmentFile sa = (StAssignmentFile) ss.getAttribute("sa");
        StAmFileList f = StAmFileList.getSafvByListIdSafv(safv_id, sa.getList_id());
        String filename = f.getPath_file();
        String fileExtension = filename.substring(filename.lastIndexOf(".") + 1);
        String keyword = "";
        if (fileExtension.equalsIgnoreCase("docx")) {
            keyword = DocumentFunction.readDocxFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("doc")) {
            keyword = DocumentFunction.readDocFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("xls")) {
            keyword = DocumentFunction.readXlsFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("xlsx")) {
            keyword = DocumentFunction.readXlsxFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("pdf")) {
            keyword = DocumentFunction.readPdfFile(studentAmPath + filename);
        }

        if (!keyword.equals("")) {
            System.out.println("----------------------search...");
            Directory directory = null;
            IndexReader indexReader;
            ArrayList<String[]> indexsetList = null;
            try {
                directory = FSDirectory.open(
                        new File(studentAmPath + "//" + a.getCourse().getCourse_id() + "//" + sa.getAm_id()));
                indexReader = DirectoryReader.open(directory);
                IndexSearcher searcher = new IndexSearcher(indexReader);
                BooleanQuery.setMaxClauseCount(20000);
                QueryParser parser = new QueryParser(Version.LUCENE_47, "student_assignment",
                        new ThaiAnalyzer(Version.LUCENE_47));
                Query query = parser.parse(QueryParser.escape(keyword));

                int hitsPerPage = 10;
                Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE,
                        new SortField("student_assignment", SortField.Type.STRING) });
                TopFieldCollector topField = TopFieldCollector.create(sort, hitsPerPage, true, true, true,
                        false);
                searcher.search(query, topField);
                TopDocs docs = topField.topDocs();
                SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>", "</font>");
                Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));

                indexsetList = new ArrayList<>();
                for (int i = 0; i < docs.totalHits; i++) {
                    String[] indexset = new String[5];
                    int id = docs.scoreDocs[i].doc;
                    float score = docs.scoreDocs[i].score;
                    Document doc = searcher.doc(id);
                    String text = doc.get("student_assignment");
                    String st_am_id = doc.get("st_am_id");
                    String owner_safv_id = doc.get("safv_id");
                    //                    System.out.println(text);
                    //                    System.out.println(st_am_id);
                    //                    System.out.println(owner_safv_id);
                    //                    System.out.println("-----------");
                    TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id,
                            "student_assignment", new ThaiAnalyzer(Version.LUCENE_47));

                    String[] hltextArr = highlighter.getBestFragments(tokenStream, text, hitsPerPage);
                    String hltext = "";
                    for (String string : hltextArr) {
                        hltext += string.toString() + "<br/>";
                    }
                    indexset[0] = st_am_id;
                    indexset[1] = hltext;
                    //getting owner of
                    StAmFileList file = StAmFileList.getSafvBySafv(Integer.parseInt(owner_safv_id));
                    if (file != null) {
                        System.out.println((a.getAm_id() + " /" + file.getList_id()));
                        StAssignmentFile stam = StAssignmentFile.getStAmBbyAmIDAndList(a.getAm_id(),
                                file.getList_id());
                        String html = "";
                        //add ???
                        boolean add = true;
                        if (stam.getG_id() == 0) {
                            //if no group that mean it's a individual work
                            if (sa.getAcc_id() != stam.getAcc_id()) {
                                Account owneracc = Account.getNameByID(stam.getAcc_id());
                                html = "<img style=\"width:30px\" src=\"" + owneracc.getProfile_pic()
                                        + "\" data-toggle=\"tooltip\" data-placement=\"top\" title=\"\" class=\"img-circle\" data-original-title=\""
                                        + owneracc.getFirstname() + "\">";
                            } else {
                                add = false;
                            }
                        } else {
                            if (sa.getG_id() != stam.getG_id()) {
                                List<Account> ownerlist = Account.getNameByGIDandAmID(stam.getG_id(),
                                        stam.getAm_id());
                                html = "<a class=\"showGroup\" data-toggle=\"popover\" data-html=\"true\" data-content=\""
                                        + Util.createPopoverGroup(ownerlist) + "\">Group no. "
                                        + Group_member.getGNOById(stam.getG_id()) + "</a>";
                            } else {
                                add = false;
                            }
                        }
                        indexset[2] = html;
                        indexset[3] = score + "";
                        indexset[4] = owner_safv_id;
                        if (add) {
                            indexsetList.add(indexset);
                        }
                    }
                }
            } catch (IOException ex) {
                Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex);
            } catch (ParseException ex) {
                Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InvalidTokenOffsetsException ex) {
                Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex);
            }
            //            for (String[] strings : indexsetList) {
            //                System.out.println(strings[0] + " : "+ strings[2] +" : " + strings[1] );
            //            }
            request.setAttribute("nowUUid", f.getUuid());
            request.setAttribute("keyword", keyword);
            request.setAttribute("indexsetList", indexsetList);
        } else {
            request.setAttribute("error_msg", "This assignment cannot use for check copy.");
        }
        //            System.out.println(keyword);

        getServletContext().getRequestDispatcher("/Checkcopy.jsp?tab=AllAssignment").forward(request, response);
    }
}