Example usage for org.apache.lucene.index.memory MemoryIndex createSearcher

List of usage examples for org.apache.lucene.index.memory MemoryIndex createSearcher

Introduction

In this page you can find the example usage for org.apache.lucene.index.memory MemoryIndex createSearcher.

Prototype

public IndexSearcher createSearcher() 

Source Link

Document

Creates and returns a searcher that can be used to execute arbitrary Lucene queries and to collect the resulting query results as hits.

Usage

From source file:ch.sentric.hbase.prospective.Percolator.java

License:Apache License

/**
* Tries to find a set of queries that match the given document.
* 
* @param doc//from w ww.  j av  a  2  s .  c  o m
*            the Lucene document
* @return the matching queries
* @throws IOException
*             if an I/O error occurs
*/
public Response<T> percolate(final Document doc, final Map<T, Query> queries) throws IOException {
    // first, parse the source doc into a MemoryIndex
    final MemoryIndex memoryIndex = new MemoryIndex();

    for (final Fieldable field : doc.getFields()) {
        if (!field.isIndexed()) {
            continue;
        }

        final TokenStream tokenStream = field.tokenStreamValue();
        if (tokenStream != null) {
            memoryIndex.addField(field.name(), tokenStream, field.getBoost());
        } else {
            final Reader reader = field.readerValue();
            if (reader != null) {
                memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), reader),
                        field.getBoost());
            } else {
                final String value = field.stringValue();
                if (value != null) {
                    memoryIndex.addField(field.name(),
                            analyzer.reusableTokenStream(field.name(), new CharSequenceReader(value)),
                            field.getBoost());
                }
            }
        }
    }

    // do the search
    final IndexSearcher searcher = memoryIndex.createSearcher();
    final Map<T, Query> matches = new HashMap<T, Query>(0);

    if (queries != null && !queries.isEmpty()) {
        final ExistsCollector collector = new ExistsCollector();
        for (final Map.Entry<T, Query> entry : queries.entrySet()) {
            collector.reset();
            searcher.search(entry.getValue(), collector);
            if (collector.exists()) {
                matches.put(entry.getKey(), entry.getValue());
            }
        }
    }

    return new Response<T>(matches);
}

From source file:com.jaeksoft.searchlib.parser.HtmlParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum forcedLang)
        throws IOException, SearchLibException {

    titleBoost = getFloatProperty(ClassPropertyEnum.TITLE_BOOST);
    boostTagMap = new TreeMap<String, BoostTag>();
    boostTagMap.put("h1", new BoostTag(ClassPropertyEnum.H1_BOOST));
    boostTagMap.put("h2", new BoostTag(ClassPropertyEnum.H2_BOOST));
    boostTagMap.put("h3", new BoostTag(ClassPropertyEnum.H3_BOOST));
    boostTagMap.put("h4", new BoostTag(ClassPropertyEnum.H4_BOOST));
    boostTagMap.put("h5", new BoostTag(ClassPropertyEnum.H5_BOOST));
    boostTagMap.put("h6", new BoostTag(ClassPropertyEnum.H6_BOOST));
    ignoreMetaNoIndex = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOINDEX);
    ignoreMetaNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOFOLLOW);
    ignoreLinkNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_LINK_NOFOLLOW);
    ignoreUntitledDocuments = getBooleanProperty(ClassPropertyEnum.IGNORE_UNTITLED_DOCUMENTS);
    ignoreNonCanonical = getBooleanProperty(ClassPropertyEnum.IGNORE_NON_CANONICAL);

    String currentCharset = null;
    String headerCharset = null;/*from  ww  w.j av  a  2 s  .  c om*/
    String detectedCharset = null;

    IndexDocument sourceDocument = getSourceDocument();

    if (sourceDocument != null) {
        FieldValueItem fieldValueItem = sourceDocument
                .getFieldValue(UrlItemFieldEnum.INSTANCE.contentTypeCharset.getName(), 0);
        if (fieldValueItem != null)
            headerCharset = fieldValueItem.getValue();
        if (headerCharset == null) {
            fieldValueItem = sourceDocument.getFieldValue(UrlItemFieldEnum.INSTANCE.contentEncoding.getName(),
                    0);
            if (fieldValueItem != null)
                headerCharset = fieldValueItem.getValue();
        }
        currentCharset = headerCharset;
    }

    if (currentCharset == null) {
        detectedCharset = streamLimiter.getDetectedCharset();
        currentCharset = detectedCharset;
    }

    if (currentCharset == null) {
        currentCharset = getProperty(ClassPropertyEnum.DEFAULT_CHARSET).getValue();
    }

    String xPathExclusions = getProperty(ClassPropertyEnum.XPATH_EXCLUSION).getValue();
    Set<Object> xPathExclusionsSet = null;
    if (!StringUtils.isEmpty(xPathExclusions))
        xPathExclusionsSet = new HashSet<Object>();

    HtmlParserEnum htmlParserEnum = HtmlParserEnum.find(getProperty(ClassPropertyEnum.HTML_PARSER).getValue());

    HtmlDocumentProvider htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter,
            xPathExclusions, xPathExclusionsSet);
    if (htmlProvider == null)
        return;

    URL currentURL = htmlProvider.getBaseHref();
    IndexDocument srcDoc = getSourceDocument();
    String streamOriginalUrl = streamLimiter.getOriginURL();
    try {
        if (currentURL == null && !StringUtils.isEmpty(streamOriginalUrl))
            currentURL = LinkUtils.newEncodedURL(streamOriginalUrl);
        if (currentURL == null && srcDoc != null) {
            FieldValueItem fvi = srcDoc.getFieldValue(UrlItemFieldEnum.INSTANCE.url.getName(), 0);
            if (fvi != null)
                currentURL = LinkUtils.newEncodedURL(fvi.getValue());
        }
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

    URL canonicalURL = htmlProvider.getCanonicalLink(currentURL);
    if (canonicalURL != null) {
        String canUrl = canonicalURL.toExternalForm();
        addDetectedLink(canUrl);
        if (ignoreNonCanonical) {
            String curUrl = currentURL.toExternalForm();
            if (!canUrl.equals(curUrl)) {
                isCanonical = false;
                return;
            }
        }
    }
    isCanonical = true;

    String title = htmlProvider.getTitle();
    if (ignoreUntitledDocuments)
        if (title == null || title.length() == 0)
            return;

    ParserResultItem result = getNewParserResultItem();

    addFieldTitle(result, title);

    result.addField(ParserFieldEnum.htmlProvider, htmlProvider.getName());

    // Check ContentType charset in meta http-equiv
    String metaCharset = htmlProvider.getMetaCharset();

    String selectedCharset = selectCharset(headerCharset, metaCharset, detectedCharset);

    if (selectedCharset != null) {
        if (!selectedCharset.equals(currentCharset)) {
            currentCharset = selectedCharset;
            htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter,
                    xPathExclusions, xPathExclusionsSet);
        }
    }

    StringWriter writer = new StringWriter();
    IOUtils.copy(streamLimiter.getNewInputStream(), writer, currentCharset);
    result.addField(ParserFieldEnum.htmlSource, writer.toString());
    writer.close();

    HtmlNodeAbstract<?> rootNode = htmlProvider.getRootNode();
    if (rootNode == null)
        return;

    for (HtmlNodeAbstract<?> metaNode : htmlProvider.getMetas()) {
        String metaName = metaNode.getAttributeText("name");
        if (metaName != null && metaName.startsWith(OPENSEARCHSERVER_FIELD)) {
            String field = metaName.substring(OPENSEARCHSERVER_FIELD_LENGTH);
            String[] fields = field.split("\\.");
            if (fields != null) {
                String content = metaNode.getAttributeText("content");
                result.addDirectFields(fields, content);
            }
        }
    }

    result.addField(ParserFieldEnum.charset, currentCharset);

    String metaRobots = null;

    String metaDcLanguage = null;

    String metaContentLanguage = null;

    for (HtmlNodeAbstract<?> node : htmlProvider.getMetas()) {
        String attr_name = node.getAttributeText("name");
        String attr_http_equiv = node.getAttributeText("http-equiv");
        if ("keywords".equalsIgnoreCase(attr_name))
            result.addField(ParserFieldEnum.meta_keywords, HtmlDocumentProvider.getMetaContent(node));
        else if ("description".equalsIgnoreCase(attr_name))
            result.addField(ParserFieldEnum.meta_description, HtmlDocumentProvider.getMetaContent(node));
        else if ("robots".equalsIgnoreCase(attr_name))
            metaRobots = HtmlDocumentProvider.getMetaContent(node);
        else if ("dc.language".equalsIgnoreCase(attr_name))
            metaDcLanguage = HtmlDocumentProvider.getMetaContent(node);
        else if ("content-language".equalsIgnoreCase(attr_http_equiv))
            metaContentLanguage = HtmlDocumentProvider.getMetaContent(node);
    }

    boolean metaRobotsFollow = true;
    boolean metaRobotsNoIndex = false;
    if (metaRobots != null) {
        metaRobots = metaRobots.toLowerCase();
        if (metaRobots.contains("noindex") && !ignoreMetaNoIndex) {
            metaRobotsNoIndex = true;
            result.addField(ParserFieldEnum.meta_robots, "noindex");
        }
        if (metaRobots.contains("nofollow") && !ignoreMetaNoFollow) {
            metaRobotsFollow = false;
            result.addField(ParserFieldEnum.meta_robots, "nofollow");
        }
    }

    UrlFilterItem[] urlFilterList = getUrlFilterList();

    boolean removeFragment = ClassPropertyEnum.KEEP_REMOVE_LIST[1]
            .equalsIgnoreCase(getProperty(ClassPropertyEnum.URL_FRAGMENT).getValue());

    List<HtmlNodeAbstract<?>> nodes = rootNode.getAllNodes("a", "frame", "img");
    if (srcDoc != null && nodes != null && metaRobotsFollow) {
        for (HtmlNodeAbstract<?> node : nodes) {
            String href = null;
            String rel = null;
            String nodeName = node.getNodeName();
            if ("a".equals(nodeName)) {
                href = node.getAttributeText("href");
                rel = node.getAttributeText("rel");
            } else if ("frame".equals(nodeName) || "img".equals(nodeName)) {
                href = node.getAttributeText("src");
            }
            boolean follow = true;
            if (rel != null)
                if (rel.contains("nofollow") && !ignoreLinkNoFollow)
                    follow = false;
            URL newUrl = null;
            if (href != null)
                if (!href.startsWith("javascript:"))
                    if (currentURL != null) {
                        href = StringEscapeUtils.unescapeXml(href);
                        newUrl = LinkUtils.getLink(currentURL, href, urlFilterList, removeFragment);
                    }
            if (newUrl != null) {
                ParserFieldEnum field = null;
                if (newUrl.getHost().equalsIgnoreCase(currentURL.getHost())) {
                    if (follow)
                        field = ParserFieldEnum.internal_link;
                    else
                        field = ParserFieldEnum.internal_link_nofollow;
                } else {
                    if (follow)
                        field = ParserFieldEnum.external_link;
                    else
                        field = ParserFieldEnum.external_link_nofollow;
                }
                String link = newUrl.toExternalForm();
                result.addField(field, link);
                if (follow)
                    addDetectedLink(link);
            }
        }
    }

    if (!metaRobotsNoIndex) {
        nodes = rootNode.getNodes("html", "body");
        if (nodes == null || nodes.size() == 0)
            nodes = rootNode.getNodes("html");
        if (nodes != null && nodes.size() > 0) {
            StringBuilder sb = new StringBuilder();
            getBodyTextContent(result, sb, nodes.get(0), true, null, 1024, xPathExclusionsSet);
            result.addField(ParserFieldEnum.body, sb);
        }
    }

    // Identification de la langue:
    Locale lang = null;
    String langMethod = null;
    String[] pathHtml = { "html" };
    nodes = rootNode.getNodes(pathHtml);
    if (nodes != null && nodes.size() > 0) {
        langMethod = "html lang attribute";
        String l = nodes.get(0).getAttributeText("lang");
        if (l != null)
            lang = Lang.findLocaleISO639(l);
    }
    if (lang == null && metaContentLanguage != null) {
        langMethod = "meta http-equiv content-language";
        lang = Lang.findLocaleISO639(metaContentLanguage);
    }
    if (lang == null && metaDcLanguage != null) {
        langMethod = "meta dc.language";
        lang = Lang.findLocaleISO639(metaDcLanguage);
    }

    if (lang != null) {
        result.addField(ParserFieldEnum.lang, lang.getLanguage());
        result.addField(ParserFieldEnum.lang_method, langMethod);
    } else if (!metaRobotsNoIndex)
        lang = result.langDetection(10000, ParserFieldEnum.body);

    if (getFieldMap().isMapped(ParserFieldEnum.generated_title)) {

        StringBuilder sb = new StringBuilder();
        try {
            if (!StringUtils.isEmpty(streamOriginalUrl))
                sb.append(new URI(streamOriginalUrl).getHost());
        } catch (URISyntaxException e) {
            Logging.error(e);
        }

        String generatedTitle = null;
        for (Map.Entry<String, BoostTag> entry : boostTagMap.entrySet()) {
            BoostTag boostTag = entry.getValue();
            if (boostTag.firstContent != null) {
                generatedTitle = boostTag.firstContent;
                break;
            }
        }

        if (generatedTitle == null) {
            final String FIELD_TITLE = "contents";

            MemoryIndex bodyMemoryIndex = new MemoryIndex();
            Analyzer bodyAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_36);
            String bodyText = result.getMergedBodyText(100000, " ", ParserFieldEnum.body);
            bodyMemoryIndex.addField(FIELD_TITLE, bodyText, bodyAnalyzer);

            IndexSearcher indexSearcher = bodyMemoryIndex.createSearcher();
            IndexReader indexReader = indexSearcher.getIndexReader();
            MoreLikeThis mlt = new MoreLikeThis(indexReader);
            mlt.setAnalyzer(bodyAnalyzer);
            mlt.setFieldNames(new String[] { FIELD_TITLE });
            mlt.setMinWordLen(3);
            mlt.setMinTermFreq(1);
            mlt.setMinDocFreq(1);

            String[] words = mlt.retrieveInterestingTerms(0);
            if (words != null && words.length > 0)
                generatedTitle = words[0];
        }

        if (generatedTitle != null) {
            if (sb.length() > 0)
                sb.append(" - ");
            sb.append(generatedTitle);
        }

        if (sb.length() > 67) {
            int pos = sb.indexOf(" ", 60);
            if (pos == -1)
                pos = 67;
            sb.setLength(pos);
            sb.append("...");
        }
        result.addField(ParserFieldEnum.generated_title, sb.toString());
    }

}

From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java

License:Apache License

private IndexReader getReaderForField(String field) throws IOException {
    if (wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
        tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
        cachedTokenStream = true;/*from w  ww. j av a2 s .c  o  m*/
    }
    IndexReader reader = readers.get(field);
    if (reader == null) {
        MemoryIndex indexer = new MemoryIndex();
        indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
        tokenStream.reset();
        IndexSearcher searcher = indexer.createSearcher();
        reader = searcher.getIndexReader();
        readers.put(field, reader);
    }

    return reader;
}

From source file:org.elasticsearch.index.percolator.ExtractQueryTermsServiceTests.java

License:Apache License

public void testCreateQueryMetadataQuery() throws Exception {
    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());

    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    Query query = ExtractQueryTermsService.createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD,
            UNKNOWN_QUERY_FIELD);
    assertThat(query, instanceOf(TermsQuery.class));

    // no easy way to get to the terms in TermsQuery,
    // if there a less then 16 terms then it gets rewritten to bq and then we can easily check the terms
    BooleanQuery booleanQuery = (BooleanQuery) ((ConstantScoreQuery) query.rewrite(indexReader)).getQuery();
    assertThat(booleanQuery.clauses().size(), equalTo(15));
    assertClause(booleanQuery, 0, QUERY_TERMS_FIELD, "_field3\u0000me");
    assertClause(booleanQuery, 1, QUERY_TERMS_FIELD, "_field3\u0000unhide");
    assertClause(booleanQuery, 2, QUERY_TERMS_FIELD, "field1\u0000brown");
    assertClause(booleanQuery, 3, QUERY_TERMS_FIELD, "field1\u0000dog");
    assertClause(booleanQuery, 4, QUERY_TERMS_FIELD, "field1\u0000fox");
    assertClause(booleanQuery, 5, QUERY_TERMS_FIELD, "field1\u0000jumps");
    assertClause(booleanQuery, 6, QUERY_TERMS_FIELD, "field1\u0000lazy");
    assertClause(booleanQuery, 7, QUERY_TERMS_FIELD, "field1\u0000over");
    assertClause(booleanQuery, 8, QUERY_TERMS_FIELD, "field1\u0000quick");
    assertClause(booleanQuery, 9, QUERY_TERMS_FIELD, "field1\u0000the");
    assertClause(booleanQuery, 10, QUERY_TERMS_FIELD, "field2\u0000more");
    assertClause(booleanQuery, 11, QUERY_TERMS_FIELD, "field2\u0000some");
    assertClause(booleanQuery, 12, QUERY_TERMS_FIELD, "field2\u0000text");
    assertClause(booleanQuery, 13, QUERY_TERMS_FIELD, "field4\u0000123");
    assertClause(booleanQuery, 14, UNKNOWN_QUERY_FIELD, "");
}

From source file:org.elasticsearch.index.query.MoreLikeThisQueryBuilderTests.java

License:Apache License

/**
 * Here we could go overboard and use a pre-generated indexed random document for a given Item,
 * but for now we'd prefer to simply return the id as the content of the document and that for
 * every field.//  w w w. java2s. co  m
 */
private static Fields generateFields(String[] fieldNames, String text) throws IOException {
    MemoryIndex index = new MemoryIndex();
    for (String fieldName : fieldNames) {
        index.addField(fieldName, text, new WhitespaceAnalyzer());
    }
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}

From source file:org.elasticsearch.index.query.PercolateQueryBuilder.java

License:Apache License

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
    if (indexedDocumentIndex != null || indexedDocumentType != null || indexedDocumentId != null) {
        throw new IllegalStateException("query builder must be rewritten first");
    }/*from ww w.j av  a  2 s  .c o  m*/

    if (document == null) {
        throw new IllegalStateException("nothing to percolator");
    }

    MapperService mapperService = context.getMapperService();
    DocumentMapperForType docMapperForType = mapperService.documentMapperWithAutoCreate(documentType);
    DocumentMapper docMapper = docMapperForType.getDocumentMapper();

    ParsedDocument doc = docMapper.parse(source(context.index().getName(), documentType, "_temp_id", document));

    FieldNameAnalyzer fieldNameAnalyzer = (FieldNameAnalyzer) docMapper.mappers().indexAnalyzer();
    // Need to this custom impl because FieldNameAnalyzer is strict and the percolator sometimes isn't when
    // 'index.percolator.map_unmapped_fields_as_string' is enabled:
    Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
        @Override
        protected Analyzer getWrappedAnalyzer(String fieldName) {
            Analyzer analyzer = fieldNameAnalyzer.analyzers().get(fieldName);
            if (analyzer != null) {
                return analyzer;
            } else {
                return context.getAnalysisService().defaultIndexAnalyzer();
            }
        }
    };
    final IndexSearcher docSearcher;
    if (doc.docs().size() > 1) {
        assert docMapper.hasNestedObjects();
        docSearcher = createMultiDocumentSearcher(analyzer, doc);
    } else {
        MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc.rootDoc(), analyzer, true, false);
        docSearcher = memoryIndex.createSearcher();
        docSearcher.setQueryCache(null);
    }

    PercolatorQueryCache registry = context.getPercolatorQueryCache();
    if (registry == null) {
        throw new QueryShardException(context, "no percolator query registry");
    }

    PercolateQuery.Builder builder = new PercolateQuery.Builder(documentType, registry, document, docSearcher);
    Settings indexSettings = registry.getIndexSettings().getSettings();
    if (indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null)
            .onOrAfter(Version.V_5_0_0_alpha1)) {
        MappedFieldType fieldType = context.fieldMapper(field);
        if (fieldType == null) {
            throw new QueryShardException(context, "field [" + field + "] does not exist");
        }

        if (!(fieldType instanceof PercolatorFieldMapper.PercolatorFieldType)) {
            throw new QueryShardException(context, "expected field [" + field
                    + "] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
        }
        PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
        builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getUnknownQueryFieldName());
    } else {
        Query percolateTypeQuery = new TermQuery(
                new Term(TypeFieldMapper.NAME, PercolatorFieldMapper.LEGACY_TYPE_NAME));
        builder.setPercolateTypeQuery(percolateTypeQuery);
    }
    return builder.build();
}

From source file:org.elasticsearch.index.query.PercolateQueryBuilder.java

License:Apache License

private IndexSearcher createMultiDocumentSearcher(Analyzer analyzer, ParsedDocument doc) {
    IndexReader[] memoryIndices = new IndexReader[doc.docs().size()];
    List<ParseContext.Document> docs = doc.docs();
    int rootDocIndex = docs.size() - 1;
    assert rootDocIndex > 0;
    for (int i = 0; i < docs.size(); i++) {
        ParseContext.Document d = docs.get(i);
        MemoryIndex memoryIndex = MemoryIndex.fromDocument(d, analyzer, true, false);
        memoryIndices[i] = memoryIndex.createSearcher().getIndexReader();
    }//from ww  w  . ja v a 2 s .co  m
    try {
        MultiReader mReader = new MultiReader(memoryIndices, true);
        LeafReader slowReader = SlowCompositeReaderWrapper.wrap(mReader);
        final IndexSearcher slowSearcher = new IndexSearcher(slowReader) {

            @Override
            public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException {
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                bq.add(query, BooleanClause.Occur.MUST);
                bq.add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT);
                return super.createNormalizedWeight(bq.build(), needsScores);
            }

        };
        slowSearcher.setQueryCache(null);
        return slowSearcher;
    } catch (IOException e) {
        throw new ElasticsearchException("Failed to create index for percolator with nested document ", e);
    }
}

From source file:org.elasticsearch.index.query.PercolateQueryTests.java

License:Apache License

public void testVariousQueries() throws Exception {
    addPercolatorQuery("1", new TermQuery(new Term("field", "brown")));
    addPercolatorQuery("2", new TermQuery(new Term("field", "monkey")));
    addPercolatorQuery("3", new TermQuery(new Term("field", "fox")));
    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
    bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD);
    bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD);
    addPercolatorQuery("4", bq1.build());
    BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
    bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST);
    addPercolatorQuery("5", bq2.build());
    BooleanQuery.Builder bq3 = new BooleanQuery.Builder();
    bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT);
    addPercolatorQuery("6", bq3.build());
    BooleanQuery.Builder bq4 = new BooleanQuery.Builder();
    bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT);
    bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST);
    addPercolatorQuery("7", bq4.build());
    PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
    pq1.add(new Term("field", "lazy"));
    pq1.add(new Term("field", "dog"));
    addPercolatorQuery("8", pq1.build());

    indexWriter.close();/*from  w  ww .  java  2s .c  o m*/
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();

    PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"),
            percolateSearcher);
    builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
    // no scoring, wrapping it in a constant score query:
    Query query = new ConstantScoreQuery(builder.build());
    TopDocs topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(5));
    assertThat(topDocs.scoreDocs.length, equalTo(5));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
    Explanation explanation = shardSearcher.explain(query, 0);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));

    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(false));

    assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));

    assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
    explanation = shardSearcher.explain(query, 3);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));

    explanation = shardSearcher.explain(query, 4);
    assertThat(explanation.isMatch(), is(false));

    assertThat(topDocs.scoreDocs[3].doc, equalTo(5));
    explanation = shardSearcher.explain(query, 5);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[3].score));

    explanation = shardSearcher.explain(query, 6);
    assertThat(explanation.isMatch(), is(false));

    assertThat(topDocs.scoreDocs[4].doc, equalTo(7));
    explanation = shardSearcher.explain(query, 7);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[4].score));
}

From source file:org.elasticsearch.index.query.PercolateQueryTests.java

License:Apache License

public void testVariousQueries_withScoring() throws Exception {
    SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
    snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
    snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
    snp.addClause(new SpanTermQuery(new Term("field", "dog")));
    snp.setSlop(2);/*from ww  w.  j ava  2  s .  co m*/
    addPercolatorQuery("1", snp.build());
    PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
    pq1.add(new Term("field", "quick"));
    pq1.add(new Term("field", "brown"));
    pq1.add(new Term("field", "jumps"));
    pq1.setSlop(1);
    addPercolatorQuery("2", pq1.build());
    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
    bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    addPercolatorQuery("3", bq1.build());

    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();

    PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"),
            percolateSearcher);
    builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
    Query query = builder.build();
    TopDocs topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(3));

    assertThat(topDocs.scoreDocs[0].doc, equalTo(2));
    Explanation explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));

    assertThat(topDocs.scoreDocs[1].doc, equalTo(1));
    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));

    assertThat(topDocs.scoreDocs[2].doc, equalTo(0));
    explanation = shardSearcher.explain(query, 0);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
}

From source file:org.elasticsearch.index.query.PercolateQueryTests.java

License:Apache License

private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();
    PercolateQuery.Builder builder1 = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"),
            percolateSearcher);//from   w w w.  j  av  a2s.  c  o m
    // enables the optimization that prevents queries from being evaluated that don't match
    builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
    TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10);

    PercolateQuery.Builder builder2 = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"),
            percolateSearcher);
    builder2.setPercolateTypeQuery(new MatchAllDocsQuery());
    TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10);
    assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits));
    assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length));
    for (int j = 0; j < topDocs1.scoreDocs.length; j++) {
        assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc));
        assertThat(topDocs1.scoreDocs[j].score, equalTo(topDocs2.scoreDocs[j].score));
        Explanation explain1 = shardSearcher.explain(builder1.build(), topDocs1.scoreDocs[j].doc);
        Explanation explain2 = shardSearcher.explain(builder2.build(), topDocs2.scoreDocs[j].doc);
        assertThat(explain1.toHtml(), equalTo(explain2.toHtml()));
    }
}