Example usage for org.apache.lucene.index.memory MemoryIndex addField

List of usage examples for org.apache.lucene.index.memory MemoryIndex addField

Introduction

In this page you can find the example usage for org.apache.lucene.index.memory MemoryIndex addField.

Prototype

public void addField(String fieldName, TokenStream stream, int positionIncrementGap) 

Source Link

Document

Iterates over the given token stream and adds the resulting terms to the index; Equivalent to adding a tokenized, indexed, termVectorStored, unstored, Lucene org.apache.lucene.document.Field .

Usage

From source file:org.elasticsearch.index.query.PercolateQueryTests.java

License:Apache License

public void testVariousQueries() throws Exception {
    addPercolatorQuery("1", new TermQuery(new Term("field", "brown")));
    addPercolatorQuery("2", new TermQuery(new Term("field", "monkey")));
    addPercolatorQuery("3", new TermQuery(new Term("field", "fox")));
    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
    bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD);
    bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD);
    addPercolatorQuery("4", bq1.build());
    BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
    bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST);
    addPercolatorQuery("5", bq2.build());
    BooleanQuery.Builder bq3 = new BooleanQuery.Builder();
    bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT);
    addPercolatorQuery("6", bq3.build());
    BooleanQuery.Builder bq4 = new BooleanQuery.Builder();
    bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT);
    bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST);
    addPercolatorQuery("7", bq4.build());
    PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
    pq1.add(new Term("field", "lazy"));
    pq1.add(new Term("field", "dog"));
    addPercolatorQuery("8", pq1.build());

    indexWriter.close();/*from  ww w.  ja va  2 s  .  c  o m*/
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();

    PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"),
            percolateSearcher);
    builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
    // no scoring, wrapping it in a constant score query:
    Query query = new ConstantScoreQuery(builder.build());
    TopDocs topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(5));
    assertThat(topDocs.scoreDocs.length, equalTo(5));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
    Explanation explanation = shardSearcher.explain(query, 0);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));

    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(false));

    assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));

    assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
    explanation = shardSearcher.explain(query, 3);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));

    explanation = shardSearcher.explain(query, 4);
    assertThat(explanation.isMatch(), is(false));

    assertThat(topDocs.scoreDocs[3].doc, equalTo(5));
    explanation = shardSearcher.explain(query, 5);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[3].score));

    explanation = shardSearcher.explain(query, 6);
    assertThat(explanation.isMatch(), is(false));

    assertThat(topDocs.scoreDocs[4].doc, equalTo(7));
    explanation = shardSearcher.explain(query, 7);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[4].score));
}

From source file:org.elasticsearch.index.query.PercolateQueryTests.java

License:Apache License

public void testVariousQueries_withScoring() throws Exception {
    SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
    snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
    snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
    snp.addClause(new SpanTermQuery(new Term("field", "dog")));
    snp.setSlop(2);//from   ww w  .j  a  va2  s .  c  o  m
    addPercolatorQuery("1", snp.build());
    PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
    pq1.add(new Term("field", "quick"));
    pq1.add(new Term("field", "brown"));
    pq1.add(new Term("field", "jumps"));
    pq1.setSlop(1);
    addPercolatorQuery("2", pq1.build());
    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
    bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    addPercolatorQuery("3", bq1.build());

    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();

    PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"),
            percolateSearcher);
    builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
    Query query = builder.build();
    TopDocs topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(3));

    assertThat(topDocs.scoreDocs[0].doc, equalTo(2));
    Explanation explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));

    assertThat(topDocs.scoreDocs[1].doc, equalTo(1));
    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));

    assertThat(topDocs.scoreDocs[2].doc, equalTo(0));
    explanation = shardSearcher.explain(query, 0);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
}

From source file:org.elasticsearch.index.query.PercolateQueryTests.java

License:Apache License

public void testDuel() throws Exception {
    int numQueries = scaledRandomIntBetween(32, 256);
    for (int i = 0; i < numQueries; i++) {
        String id = Integer.toString(i);
        Query query;/*from w  ww  .j a v  a2 s .c  o  m*/
        if (randomBoolean()) {
            query = new PrefixQuery(new Term("field", id));
        } else if (randomBoolean()) {
            query = new WildcardQuery(new Term("field", id + "*"));
        } else if (randomBoolean()) {
            query = new CustomQuery(new Term("field", id + "*"));
        } else if (randomBoolean()) {
            query = new SpanTermQuery(new Term("field", id));
        } else {
            query = new TermQuery(new Term("field", id));
        }
        addPercolatorQuery(id, query);
    }

    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    for (int i = 0; i < numQueries; i++) {
        MemoryIndex memoryIndex = new MemoryIndex();
        String id = Integer.toString(i);
        memoryIndex.addField("field", id, new WhitespaceAnalyzer());
        duelRun(memoryIndex, shardSearcher);
    }
}

From source file:org.elasticsearch.index.query.PercolateQueryTests.java

License:Apache License

public void testDuelSpecificQueries() throws Exception {
    CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD,
            BooleanClause.Occur.SHOULD, 128);
    commonTermsQuery.add(new Term("field", "quick"));
    commonTermsQuery.add(new Term("field", "brown"));
    commonTermsQuery.add(new Term("field", "fox"));
    addPercolatorQuery("_id1", commonTermsQuery);

    BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery(
            new Term[] { new Term("field", "quick"), new Term("field", "brown"), new Term("field", "fox") },
            false);/*from   w  w  w.  ja  va2 s . com*/
    addPercolatorQuery("_id2", blendedTermQuery);

    SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true)
            .addClause(new SpanTermQuery(new Term("field", "quick")))
            .addClause(new SpanTermQuery(new Term("field", "brown")))
            .addClause(new SpanTermQuery(new Term("field", "fox"))).build();
    addPercolatorQuery("_id3", spanNearQuery);

    SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true)
            .addClause(new SpanTermQuery(new Term("field", "the")))
            .addClause(new SpanTermQuery(new Term("field", "lazy")))
            .addClause(new SpanTermQuery(new Term("field", "doc"))).build();
    SpanOrQuery spanOrQuery = new SpanOrQuery(spanNearQuery, spanNearQuery2);
    addPercolatorQuery("_id4", spanOrQuery);

    SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery);
    addPercolatorQuery("_id5", spanNotQuery);

    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    duelRun(memoryIndex, shardSearcher);
}

From source file:org.elasticsearch.index.query.PercolatorQueryBuilder.java

License:Apache License

private void indexDoc(DocumentMapper documentMapper, Analyzer defaultAnalyzer, ParseContext.Document document,
        MemoryIndex memoryIndex) {
    for (IndexableField field : document.getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }//from w ww  . j  av a  2  s  .c o  m

        Analyzer analyzer = defaultAnalyzer;
        if (documentMapper != null && documentMapper.mappers().getMapper(field.name()) != null) {
            analyzer = documentMapper.mappers().indexAnalyzer();
        }
        try {
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
            }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
}

From source file:org.elasticsearch.index.query.PercolatorQueryTests.java

License:Apache License

public void testVariousQueries() throws Exception {
    addPercolatorQuery("1", new TermQuery(new Term("field", "brown")));
    addPercolatorQuery("2", new TermQuery(new Term("field", "monkey")));
    addPercolatorQuery("3", new TermQuery(new Term("field", "fox")));
    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
    bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD);
    bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD);
    addPercolatorQuery("4", bq1.build());
    BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
    bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST);
    addPercolatorQuery("5", bq2.build());
    BooleanQuery.Builder bq3 = new BooleanQuery.Builder();
    bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT);
    addPercolatorQuery("6", bq3.build());
    BooleanQuery.Builder bq4 = new BooleanQuery.Builder();
    bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT);
    bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST);
    addPercolatorQuery("7", bq4.build());
    PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
    pq1.add(new Term("field", "lazy"));
    pq1.add(new Term("field", "dog"));
    addPercolatorQuery("8", pq1.build());

    indexWriter.close();/*from   w  w w .ja va 2  s. co m*/
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();

    PercolatorQuery.Builder builder = new PercolatorQuery.Builder("docType", queryRegistry,
            new BytesArray("{}"), percolateSearcher, new MatchAllDocsQuery());
    builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
    TopDocs topDocs = shardSearcher.search(builder.build(), 10);
    assertThat(topDocs.totalHits, equalTo(5));
    assertThat(topDocs.scoreDocs.length, equalTo(5));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
    assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
    assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
    assertThat(topDocs.scoreDocs[3].doc, equalTo(5));
    assertThat(topDocs.scoreDocs[4].doc, equalTo(7));
}

From source file:org.elasticsearch.index.query.PercolatorQueryTests.java

License:Apache License

public void testDuel() throws Exception {
    int numQueries = scaledRandomIntBetween(32, 256);
    for (int i = 0; i < numQueries; i++) {
        String id = Integer.toString(i);
        Query query;//from ww  w.  j  a  v a2s. c o m
        if (randomBoolean()) {
            query = new PrefixQuery(new Term("field", id));
        } else if (randomBoolean()) {
            query = new WildcardQuery(new Term("field", id + "*"));
        } else if (randomBoolean()) {
            query = new CustomQuery(new Term("field", id + "*"));
        } else {
            query = new TermQuery(new Term("field", id));
        }
        addPercolatorQuery(id, query);
    }

    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);

    for (int i = 0; i < numQueries; i++) {
        MemoryIndex memoryIndex = new MemoryIndex();
        String id = Integer.toString(i);
        memoryIndex.addField("field", id, new WhitespaceAnalyzer());
        IndexSearcher percolateSearcher = memoryIndex.createSearcher();

        PercolatorQuery.Builder builder1 = new PercolatorQuery.Builder("docType", queryRegistry,
                new BytesArray("{}"), percolateSearcher, new MatchAllDocsQuery());
        // enables the optimization that prevents queries from being evaluated that don't match
        builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
        TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10);

        PercolatorQuery.Builder builder2 = new PercolatorQuery.Builder("docType", queryRegistry,
                new BytesArray("{}"), percolateSearcher, new MatchAllDocsQuery());
        TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10);

        assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits));
        assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length));
        for (int j = 0; j < topDocs1.scoreDocs.length; j++) {
            assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc));
        }
    }
}

From source file:org.elasticsearch.index.termvectors.ShardTermVectorsService.java

License:Apache License

private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets,
        @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields) throws IOException {
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field) == false) {
            // some fields are returned even when not asked for, eg. _timestamp
            continue;
        }//from  w  w w  .j  a  va  2  s  . c  o m
        Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
        for (Object text : getField.getValues()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}

From source file:org.elasticsearch.index.termvectors.TermVectorsService.java

License:Apache License

private static Fields generateTermVectors(IndexShard indexShard, Map<String, Object> source,
        Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer,
        Set<String> fields) throws IOException {
    Map<String, Collection<Object>> values = new HashMap<>();
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field)) { // some fields are returned even when not asked for, eg. _timestamp
            values.put(field, getField.getValues());
        }//from ww w.  ja  v  a 2s  .co m
    }
    if (source != null) {
        for (String field : fields) {
            if (values.containsKey(field) == false) {
                List<Object> v = XContentMapValues.extractRawValues(field, source);
                if (v.isEmpty() == false) {
                    values.put(field, v);
                }
            }
        }
    }

    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (Map.Entry<String, Collection<Object>> entry : values.entrySet()) {
        String field = entry.getKey();
        Analyzer analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer);
        for (Object text : entry.getValue()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}

From source file:org.elasticsearch.percolator.ExtractQueryTermsServiceTests.java

License:Apache License

public void testCreateQueryMetadataQuery() throws Exception {
    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());

    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    TermsQuery query = (TermsQuery) createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD,
            new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));

    PrefixCodedTerms terms = query.getTermData();
    assertThat(terms.size(), equalTo(15L));
    PrefixCodedTerms.TermIterator termIterator = terms.iterator();
    assertTermIterator(termIterator, "_field3\u0000me", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "_field3\u0000unhide", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000brown", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000dog", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000fox", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000jumps", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000lazy", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000over", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000quick", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000the", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field2\u0000more", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
}