Example usage for org.apache.lucene.search.highlight Highlighter getBestFragments

List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragments

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragments.

Prototype

public final String[] getBestFragments(TokenStream tokenStream, String text, int maxNumFragments)
        throws IOException, InvalidTokenOffsetsException 

Source Link

Document

Highlights chosen terms in a text, extracting the most relevant sections.

Usage

From source file:com.rapidminer.search.GlobalSearchHandler.java

License:Open Source License

/**
 * Creates the search result for search methods.
 *
 * @param searchTerm/*from   w w w . j  av a 2  s .c  om*/
 *       the search string
 * @param searcher
 *       the index searcher instance which was used to search
 * @param result
 *       the result of the search
 * @param highlightResult
 *       if {@code true}, the {@link GlobalSearchResult#getBestFragments()} will be created
 * @return the search result instance, never {@code null}
 * @throws IOException
 *       if something goes wrong
 */
private GlobalSearchResult createSearchResult(final String searchTerm, final Query parsedQuery,
        final IndexSearcher searcher, final TopDocs result, final boolean highlightResult) throws IOException {
    int resultNumber = result.scoreDocs.length;
    List<Document> resultList = new ArrayList<>(resultNumber);
    List<String[]> highlights = highlightResult ? new LinkedList<>() : null;
    ScoreDoc lastResult = resultNumber > 0 ? result.scoreDocs[result.scoreDocs.length - 1] : null;
    for (ScoreDoc scoreDoc : result.scoreDocs) {
        Document doc = searcher.doc(scoreDoc.doc);
        resultList.add(doc);

        if (highlightResult) {
            // search result highlighting best match on name field
            QueryScorer scorer = new QueryScorer(parsedQuery);
            Highlighter highlighter = new Highlighter(HIGHLIGHT_FORMATTER, scorer);
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, FRAGMENT_SIZE);
            highlighter.setTextFragmenter(fragmenter);
            try {
                TokenStream stream = TokenSources.getTokenStream(GlobalSearchUtilities.FIELD_NAME,
                        searcher.getIndexReader().getTermVectors(scoreDoc.doc),
                        doc.get(GlobalSearchUtilities.FIELD_NAME), GlobalSearchUtilities.ANALYZER,
                        Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE - 1);
                if (stream != null) {
                    highlights.add(highlighter.getBestFragments(stream,
                            doc.get(GlobalSearchUtilities.FIELD_NAME), MAX_NUMBER_OF_FRAGMENTS));
                } else {
                    highlights.add(null);
                }
            } catch (InvalidTokenOffsetsException e) {
                highlights.add(null);
            }
        }
    }
    return new GlobalSearchResult(resultList, searchTerm, lastResult, result.totalHits, highlights);
}

From source file:de.blizzy.documentr.search.GetSearchHitTask.java

License:Open Source License

@Override
public SearchHit call() throws IOException {
    Formatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); //$NON-NLS-1$ //$NON-NLS-2$
    Scorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(FRAGMENT_SIZE));
    highlighter.setEncoder(new SimpleHTMLEncoder());

    Document doc = reader.document(docId);
    String projectName = doc.get(PageIndex.PROJECT);
    String branchName = doc.get(PageIndex.BRANCH);
    String path = doc.get(PageIndex.PATH);
    String title = doc.get(PageIndex.TITLE);
    String text = doc.get(PageIndex.TEXT);
    String[] tagsArray = doc.getValues(PageIndex.TAG);
    List<String> tags = Lists.newArrayList(tagsArray);
    Collections.sort(tags);//w  w w.  j  av a2  s  .  c  o  m
    TokenStream tokenStream = null;
    String highlightedText = StringUtils.EMPTY;
    try {
        tokenStream = TokenSources.getAnyTokenStream(reader, docId, PageIndex.TEXT, doc, analyzer);
        String[] fragments = highlighter.getBestFragments(tokenStream, text, NUM_FRAGMENTS);
        cleanupFragments(fragments);
        highlightedText = Util.join(fragments, " <strong>...</strong> "); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        // ignore
    } finally {
        Closeables.closeQuietly(tokenStream);
    }
    return new SearchHit(projectName, branchName, path, title, highlightedText, tags);
}

From source file:de.innovationgate.wga.server.api.Lucene.java

License:Open Source License

public List<String> bestFileFragments(int fragmentSize, int maxFragments, String prefix, String suffix,
        String encode) throws WGException {
    if (!_wga.getCore().isLuceneEnabled()) {
        _cx.addwarning("Unable to retrieve best file fragments - lucene is not enabled.");
        return Collections.emptyList();
    }/*  ww  w . j  av a 2 s. com*/

    if (_wga.database().db().getContentStoreVersion() < WGDatabase.CSVERSION_WGA5
            || (_wga.database().db().getContentStoreVersion() == WGDatabase.CSVERSION_WGA5
                    && _wga.database().db().getContentStorePatchLevel() < 5)) {
        _cx.addwarning("bestFileFragments() is not supported on this content store version.");
        return Collections.emptyList();
    }

    org.apache.lucene.search.Query query = (org.apache.lucene.search.Query) _cx.gethttpsession()
            .getAttribute(Query.SESSION_ATTRIBUTE_SIMPLIFIED_LUCENEQUERY);
    if (query == null) {
        // no query in session
        return Collections.emptyList();
    }

    String filename = null;
    SearchDetails sd = _cx.getcontent().getSearchDetails();
    if (sd != null && sd instanceof LuceneSearchDetails) {
        filename = ((LuceneSearchDetails) sd).getFilename();
    }

    if (filename == null) {
        return Collections.emptyList();
    }

    if (encode == null) {
        encode = _wga.design().getTmlDefaultEncoding();
    }

    String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
    String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

    // create highlighter
    Highlighter highlighter = _wga.getCore().getLuceneManager()
            .createHighlighter(LuceneManager.INDEXFIELD_ALLCONTENT, query, formatter);

    // retrieve attachment text
    WGFileMetaData md = _cx.content().getFileMetaData(filename);
    if (md == null) {
        return Collections.emptyList();
    }

    BinaryFieldData textData = md.getPlainText();
    if (textData == null) {
        return Collections.emptyList();
    }

    try {
        // TODO highlighter does not support streams - should we limit plaintext size here?
        Reader textReader = new InputStreamReader(textData.getInputStream());
        String text = IOUtils.toString(textReader);
        textReader.close();

        // create tokenstream
        TokenStream tokenStream = _wga.getCore().getLuceneManager().createTokenStream(text, _cx.content());

        // create fragmenter
        Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
        highlighter.setTextFragmenter(fragmenter);

        String[] highlighted = highlighter.getBestFragments(tokenStream, text, maxFragments);
        if (highlighted != null) {
            List<String> list = new ArrayList<String>();
            for (int i = 0; i < highlighted.length; i++) {
                String fragment = highlighted[i];
                if (encode != null) {
                    try {
                        fragment = _cx.multiencode(encode, fragment);
                    } catch (FormattingException e) {
                        _cx.addwarning("Unable to retrieve best fragments for file '" + filename
                                + "' bc. of formating exception '" + e.getMessage() + "'.");
                        return Collections.emptyList();
                    }
                }
                fragment = WGUtils.strReplace(fragment, prefixPlaceholder, prefix, true);
                fragment = WGUtils.strReplace(fragment, suffixPlaceholder, suffix, true);
                list.add(fragment);
            }
            return list;
        } else {
            return Collections.emptyList();
        }
    } catch (Exception e) {
        _cx.addwarning("Unable to retrieve best fragments for file '" + filename + "' bc. of exception '"
                + e.getMessage() + "'.");
        return Collections.emptyList();
    }
}

From source file:de.innovationgate.wgpublisher.webtml.utils.TMLContext.java

License:Open Source License

/**
 * retrieves a list of the best fragments from the given contentItem based upon the last lucene query with highlight attribute set to true
 * query hits are highlighted (surrounded by given <prefix> and <suffix>) in fragments
 * @param itemname the item fragements should be retrieved from
 * @param fragmentSize the number of characters for each fragment
 * @param maxFragments the maximum number of fragements returned
 * @param prefix the prefix for highlighting the search term in fragements
 * @param suffix the suffix for highlighting the search term in fragements
 * @param encode encode each fragment in the given encoding - prefix and suffix will not be encoded
 * @return list of fragements (Strings) - if no lucene query present returns EMPTY_LIST
 * @throws WGAPIException//from w  w w .jav  a 2s .  c  o  m
 */
@CodeCompletion
public List bestfragments(String itemname, int fragmentSize, int maxFragments, String prefix, String suffix,
        String encode) throws WGAPIException {
    // check preconditions for highlighting
    if (itemname == null) {
        throw new WGIllegalArgumentException("Unable to retrieve best fragments for item 'null'.");
    }
    if (!getwgacore().isLuceneEnabled()) {
        addwarning("Unable to highlight item '" + itemname + "' bc. lucene is not enabled.");
        return Collections.EMPTY_LIST;
    }
    // try to retrieve last lucene query for highlighting
    org.apache.lucene.search.Query query = (org.apache.lucene.search.Query) getrequest().getSession()
            .getAttribute(Query.SESSION_ATTRIBUTE_SIMPLIFIED_LUCENEQUERY);
    if (query == null) {
        // no query in session - highlighting not possible
        return Collections.EMPTY_LIST;
    }

    // lowercase name
    itemname = itemname.toLowerCase();

    // create htmlformatter to highlight fragments with "$HIGHLIGHT_PREFIX$", "$HIGHLIGHT_SUFFIX$"
    // these placeholders are later on replaced by the given prefix and suffix
    // this additional step is necessary to encode the fragment text properly
    // see B00004BBE
    String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
    String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

    // create highlighter
    Highlighter highlighter = getwgacore().getLuceneManager().createHighlighter(itemname, query, formatter);

    // retrieve itemtext
    String text = itemTextValue(itemname, "none");
    if (text == null) {
        return Collections.EMPTY_LIST;
    }

    // remove html/xml from text
    // fragments should not contain html/xml bc. of design issues
    try {
        text = WGUtils.toPlainText(text, " ", false);
        // B000049EA
        // if the item value contains encoded html entities these entities has been converted to their characters
        // we should do an html encode for sure
        // text = WGUtils.encodeHTML(text); --> has side effects @see B00004BBE
    } catch (IOException e) {
        addwarning("Unable to highlight item '" + itemname + "' bc. of exception '" + e.getMessage() + "'.");
        return Collections.EMPTY_LIST;
    }

    // create tokenstream
    TokenStream tokenStream = getwgacore().getLuceneManager().createTokenStream(text, content());

    // create fragmenter
    Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
    highlighter.setTextFragmenter(fragmenter);

    try {
        String[] highlighted = highlighter.getBestFragments(tokenStream, text, maxFragments);
        if (highlighted != null) {
            ArrayList list = new ArrayList();
            for (int i = 0; i < highlighted.length; i++) {
                // B00004BBE
                // evtl. encode fragment
                String fragment = highlighted[i];
                if (encode != null) {
                    try {
                        fragment = multiencode(encode, fragment);
                    } catch (FormattingException e) {
                        addwarning("Unable to highlight item '" + itemname + "' bc. of formating exception '"
                                + e.getMessage() + "'.");
                        return Collections.EMPTY_LIST;
                    }
                }
                // B00004BBE
                // replace highlight placeholders with correct prefix and suffix
                fragment = WGUtils.strReplace(fragment, prefixPlaceholder, prefix, true);
                fragment = WGUtils.strReplace(fragment, suffixPlaceholder, suffix, true);
                list.add(fragment);
            }
            return list;
        } else {
            return Collections.EMPTY_LIST;
        }
    } catch (IOException e) {
        addwarning("Unable to highlight item '" + itemname + "' bc. of exception '" + e.getMessage() + "'.");
        return Collections.EMPTY_LIST;
    } catch (InvalidTokenOffsetsException e) {
        addwarning("Unable to highlight item '" + itemname + "' bc. of exception '" + e.getMessage() + "'.");
        return Collections.EMPTY_LIST;
    }
}

From source file:net.chwise.documents.HighlightedFragmentsRetriever.java

License:Open Source License

public String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String fieldName,
        String fieldContents, int fragmentNumber, int fragmentSize)
        throws IOException, InvalidTokenOffsetsException {

    TokenStream stream = TokenSources.getTokenStream(fieldName, fieldContents, analyzer);
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentSize);

    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

    String[] fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber);

    if (fragments.length == 0) {
        //Return starting piece of fieldContents fragment
        fragments = new String[1];
        fragments[0] = fieldContents.substring(0, Math.min(fragmentSize, fieldContents.length()));
    }//ww  w .  j av  a  2  s  . c  om

    return fragments;
}

From source file:org.apache.nutch.summary.lucene.LuceneSummarizer.java

License:Apache License

public Summary getSummary(String text, Query query) {

    String[] terms = query.getTerms();
    WeightedTerm[] weighted = new WeightedTerm[terms.length];
    for (int i = 0; i < terms.length; i++) {
        weighted[i] = new WeightedTerm(1.0f, terms[i]);
    }/*from  w  w  w .  jav a  2 s  .c o  m*/
    Highlighter highlighter = new Highlighter(FORMATTER, new QueryScorer(weighted));
    TokenStream tokens = analyzer.tokenStream("content", new StringReader(text));
    Summary summary = new Summary();
    try {
        // TODO : The max number of fragments (3) should be configurable
        String[] result = highlighter.getBestFragments(tokens, text, 3);
        for (int i = 0; i < result.length; i++) {
            String[] parts = result[i].split(SEPARATOR);
            boolean highlight = false;
            for (int j = 0; j < parts.length; j++) {
                if (highlight) {
                    summary.add(new Highlight(parts[j]));
                } else {
                    summary.add(new Fragment(parts[j]));
                }
                highlight = !highlight;
            }
            summary.add(new Ellipsis());
        }

        /* TODO MC  BUG resolved 0000029 - if query terms do not occur on text, an empty summary is returned. Now it sends the first tokens. */
        if (result == null || result.length == 0) {
            tokens = analyzer.tokenStream("content", new StringReader(text));

            Token firstToken = null, lastToken = null;
            Token token = null;
            int maxLen = 100; // the same as defined in SimpleFragmenter but it is private

            /*
            ArrayList<Token> titleTokens=new ArrayList<Token>();
            ArrayList<Token> textTokens=new ArrayList<Token>();
            boolean titleMatched=false;
            boolean hasMatched=false; // exit match after match title the first time               
                    
            // remove title from text. compares pairs of text
            while ((titleMatched || !hasMatched) && (token=tokens.next())!=null) {
                       
               if (token.type().equals("<WORD>")) {
                       
                  if (titleTokens.size()==0) {
                     titleTokens.add(token);
                  }
                  else if (textTokens.size()<titleTokens.size()) {
                     textTokens.add(token);
                  }
                       
                  if (textTokens.size()==titleTokens.size()) {
                     // compare
                     titleMatched=true;
                     for (int i=0;i<textTokens.size() && titleMatched;i++) {
             if (!textTokens.get(i).termText().equals(titleTokens.get(i).termText())) {
                titleMatched=false;     
             }                     
                     }
                     if (titleMatched) { // try to match a larger pattern
             titleTokens.add(textTokens.get(0));
             textTokens.remove(0);
             hasMatched=true;
                     }
                     else { // remove rest of title from text
             if (hasMatched) {
                firstToken=textTokens.get(titleTokens.size()-2);                                              
             }
             else { // add one more token to title
                titleTokens.add(textTokens.get(0));
                 textTokens.remove(0);
             }
                     }
                  }
               }          
            }
                    
            if (textTokens.size()==0) {
               return summary;
            }
                                  
            for (int i=0;i<textTokens.size() && textTokens.get(i).endOffset()-firstToken.startOffset()<maxLen;i++) {
               lastToken=textTokens.get(i);
            }
            */

            // read tokens until maxLen
            while ((token = tokens.next()) != null) {
                if (token.type().equals("<WORD>")) {
                    if (firstToken == null) {
                        firstToken = token;
                    } else if (token.endOffset() - firstToken.startOffset() < maxLen) {
                        lastToken = token;
                    } else {
                        break;
                    }
                }
            }
            if (lastToken == null) {
                lastToken = firstToken;
            }

            summary.add(new Fragment(text.substring(firstToken.startOffset(), lastToken.endOffset())));
            summary.add(new Ellipsis());
        }
        /* TODO MC */

    } catch (Exception e) {
        // Nothing to do...
    }
    return summary;
}

From source file:org.apache.wiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Searches pages using a particular combination of flags.
 *
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags/*from w w  w.  ja  v  a  2 s  . com*/
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
 */
public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
        QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        }

        try {
            File dir = new File(m_luceneDirectory);
            Directory luceneDir = new SimpleFSDirectory(dir, null);
            IndexReader reader = IndexReader.open(luceneDir);
            searcher = new IndexSearcher(reader);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;
        }

        ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

        list = new ArrayList<SearchResult>(hits.length);
        for (int curr = 0; curr < hits.length; curr++) {
            int docID = hits[curr].doc;
            Document doc = searcher.doc(docID);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.
                }

                int score = (int) (hits[curr].score * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);

                }

                SearchResult result = new SearchResultImpl(page, score, fragments);
                list.add(result);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
            }
        }
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse query ", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
        log.error("Tokens are incompatible with provided text ", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error(e);
            }
        }
    }

    return list;
}

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

public String[] fragments(Resource resource, String propertyName, String text) throws SearchEngineException {
    Highlighter highlighter = createHighlighter(propertyName);
    TokenStream tokenStream = createTokenStream(resource, propertyName, text);
    try {//from  www  .ja va  2 s  .c o m
        return highlighter.getBestFragments(tokenStream, text, getMaxNumFragments());
    } catch (IOException e) {
        throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias()
                + "] and property [" + propertyName + "]");
    }
}

From source file:org.haplo.app.SearchResultExcerptHighlighter.java

License:Mozilla Public License

static public String[] bestHighlightedExcerpts(String escapedText, String searchTerms, int maxExcerptLength) {
    try {/*from ww w  .ja  v  a 2 s . com*/
        // Scorer selects the terms which need highlighting. Created from a 'query' based on the extracted search terms.
        Scorer scorer;
        Fragmenter fragmenter;
        if (searchTerms != null && searchTerms.length() > 0) {
            QueryParser queryParser = new QueryParser("FIELD", new StandardAnalyzer());
            Query query = queryParser.parse(searchTerms);
            scorer = new QueryScorer(query);
            fragmenter = new SimpleSpanFragmenter((QueryScorer) scorer, maxExcerptLength);
        } else {
            scorer = new NoHighlightingScorer();
            fragmenter = new SimpleFragmenter(maxExcerptLength);
        }

        // Parse the escaped text into tokens, which retain the positions in the text
        StandardAnalyzer analyser = new StandardAnalyzer();
        TokenStream tokenStream = analyser.tokenStream("FIELD", new StringReader(escapedText));

        // Finally, do the highlighting!
        Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b>", "</b>"), scorer);
        highlighter.setTextFragmenter(fragmenter);
        return highlighter.getBestFragments(tokenStream, escapedText, NUMBER_OF_FRAGMENTS);
    } catch (Exception e) {
        Logger.getLogger("org.haplo.app").info("Exception in SearchResultExcerptHighlighter: ", e);
        return null;
    }
}

From source file:servlet.Checkcopy.java

/**
 * Processes requests for both HTTP <code>GET</code> and <code>POST</code>
 * methods./*from w w  w .  j  a v a 2 s .c  om*/
 *
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    HttpSession ss = request.getSession();
    Assignment a = (Assignment) ss.getAttribute("curAm");
    int safv_id = Integer.parseInt(request.getParameter("safv_id"));
    String studentAmPath = getServletContext().getRealPath("/") + "/file/student_assignment_file/";
    if (a.getAss_type().equalsIgnoreCase("file")) {
        StAssignmentFile sa = (StAssignmentFile) ss.getAttribute("sa");
        StAmFileList f = StAmFileList.getSafvByListIdSafv(safv_id, sa.getList_id());
        String filename = f.getPath_file();
        String fileExtension = filename.substring(filename.lastIndexOf(".") + 1);
        String keyword = "";
        if (fileExtension.equalsIgnoreCase("docx")) {
            keyword = DocumentFunction.readDocxFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("doc")) {
            keyword = DocumentFunction.readDocFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("xls")) {
            keyword = DocumentFunction.readXlsFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("xlsx")) {
            keyword = DocumentFunction.readXlsxFile(studentAmPath + filename);
        } else if (fileExtension.equalsIgnoreCase("pdf")) {
            keyword = DocumentFunction.readPdfFile(studentAmPath + filename);
        }

        if (!keyword.equals("")) {
            System.out.println("----------------------search...");
            Directory directory = null;
            IndexReader indexReader;
            ArrayList<String[]> indexsetList = null;
            try {
                directory = FSDirectory.open(
                        new File(studentAmPath + "//" + a.getCourse().getCourse_id() + "//" + sa.getAm_id()));
                indexReader = DirectoryReader.open(directory);
                IndexSearcher searcher = new IndexSearcher(indexReader);
                BooleanQuery.setMaxClauseCount(20000);
                QueryParser parser = new QueryParser(Version.LUCENE_47, "student_assignment",
                        new ThaiAnalyzer(Version.LUCENE_47));
                Query query = parser.parse(QueryParser.escape(keyword));

                int hitsPerPage = 10;
                Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE,
                        new SortField("student_assignment", SortField.Type.STRING) });
                TopFieldCollector topField = TopFieldCollector.create(sort, hitsPerPage, true, true, true,
                        false);
                searcher.search(query, topField);
                TopDocs docs = topField.topDocs();
                SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>", "</font>");
                Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));

                indexsetList = new ArrayList<>();
                for (int i = 0; i < docs.totalHits; i++) {
                    String[] indexset = new String[5];
                    int id = docs.scoreDocs[i].doc;
                    float score = docs.scoreDocs[i].score;
                    Document doc = searcher.doc(id);
                    String text = doc.get("student_assignment");
                    String st_am_id = doc.get("st_am_id");
                    String owner_safv_id = doc.get("safv_id");
                    //                    System.out.println(text);
                    //                    System.out.println(st_am_id);
                    //                    System.out.println(owner_safv_id);
                    //                    System.out.println("-----------");
                    TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id,
                            "student_assignment", new ThaiAnalyzer(Version.LUCENE_47));

                    String[] hltextArr = highlighter.getBestFragments(tokenStream, text, hitsPerPage);
                    String hltext = "";
                    for (String string : hltextArr) {
                        hltext += string.toString() + "<br/>";
                    }
                    indexset[0] = st_am_id;
                    indexset[1] = hltext;
                    //getting owner of
                    StAmFileList file = StAmFileList.getSafvBySafv(Integer.parseInt(owner_safv_id));
                    if (file != null) {
                        System.out.println((a.getAm_id() + " /" + file.getList_id()));
                        StAssignmentFile stam = StAssignmentFile.getStAmBbyAmIDAndList(a.getAm_id(),
                                file.getList_id());
                        String html = "";
                        //add ???
                        boolean add = true;
                        if (stam.getG_id() == 0) {
                            //if no group that mean it's a individual work
                            if (sa.getAcc_id() != stam.getAcc_id()) {
                                Account owneracc = Account.getNameByID(stam.getAcc_id());
                                html = "<img style=\"width:30px\" src=\"" + owneracc.getProfile_pic()
                                        + "\" data-toggle=\"tooltip\" data-placement=\"top\" title=\"\" class=\"img-circle\" data-original-title=\""
                                        + owneracc.getFirstname() + "\">";
                            } else {
                                add = false;
                            }
                        } else {
                            if (sa.getG_id() != stam.getG_id()) {
                                List<Account> ownerlist = Account.getNameByGIDandAmID(stam.getG_id(),
                                        stam.getAm_id());
                                html = "<a class=\"showGroup\" data-toggle=\"popover\" data-html=\"true\" data-content=\""
                                        + Util.createPopoverGroup(ownerlist) + "\">Group no. "
                                        + Group_member.getGNOById(stam.getG_id()) + "</a>";
                            } else {
                                add = false;
                            }
                        }
                        indexset[2] = html;
                        indexset[3] = score + "";
                        indexset[4] = owner_safv_id;
                        if (add) {
                            indexsetList.add(indexset);
                        }
                    }
                }
            } catch (IOException ex) {
                Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex);
            } catch (ParseException ex) {
                Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InvalidTokenOffsetsException ex) {
                Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex);
            }
            //            for (String[] strings : indexsetList) {
            //                System.out.println(strings[0] + " : "+ strings[2] +" : " + strings[1] );
            //            }
            request.setAttribute("nowUUid", f.getUuid());
            request.setAttribute("keyword", keyword);
            request.setAttribute("indexsetList", indexsetList);
        } else {
            request.setAttribute("error_msg", "This assignment cannot use for check copy.");
        }
        //            System.out.println(keyword);

        getServletContext().getRequestDispatcher("/Checkcopy.jsp?tab=AllAssignment").forward(request, response);
    }
}