Example usage for org.apache.lucene.index.memory MemoryIndex MemoryIndex

List of usage examples for org.apache.lucene.index.memory MemoryIndex MemoryIndex

Introduction

In this page you can find the example usage for org.apache.lucene.index.memory MemoryIndex MemoryIndex.

Prototype

public MemoryIndex(boolean storeOffsets) 

Source Link

Document

Constructs an empty instance that can optionally store the start and end character offset of each token term in the text.

Usage

From source file:org.elasticsearch.index.percolator.ExtractQueryTermsServiceTests.java

License:Apache License

public void testCreateQueryMetadataQuery() throws Exception {
    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());

    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    Query query = ExtractQueryTermsService.createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD,
            UNKNOWN_QUERY_FIELD);
    assertThat(query, instanceOf(TermsQuery.class));

    // no easy way to get to the terms in TermsQuery,
    // if there a less then 16 terms then it gets rewritten to bq and then we can easily check the terms
    BooleanQuery booleanQuery = (BooleanQuery) ((ConstantScoreQuery) query.rewrite(indexReader)).getQuery();
    assertThat(booleanQuery.clauses().size(), equalTo(15));
    assertClause(booleanQuery, 0, QUERY_TERMS_FIELD, "_field3\u0000me");
    assertClause(booleanQuery, 1, QUERY_TERMS_FIELD, "_field3\u0000unhide");
    assertClause(booleanQuery, 2, QUERY_TERMS_FIELD, "field1\u0000brown");
    assertClause(booleanQuery, 3, QUERY_TERMS_FIELD, "field1\u0000dog");
    assertClause(booleanQuery, 4, QUERY_TERMS_FIELD, "field1\u0000fox");
    assertClause(booleanQuery, 5, QUERY_TERMS_FIELD, "field1\u0000jumps");
    assertClause(booleanQuery, 6, QUERY_TERMS_FIELD, "field1\u0000lazy");
    assertClause(booleanQuery, 7, QUERY_TERMS_FIELD, "field1\u0000over");
    assertClause(booleanQuery, 8, QUERY_TERMS_FIELD, "field1\u0000quick");
    assertClause(booleanQuery, 9, QUERY_TERMS_FIELD, "field1\u0000the");
    assertClause(booleanQuery, 10, QUERY_TERMS_FIELD, "field2\u0000more");
    assertClause(booleanQuery, 11, QUERY_TERMS_FIELD, "field2\u0000some");
    assertClause(booleanQuery, 12, QUERY_TERMS_FIELD, "field2\u0000text");
    assertClause(booleanQuery, 13, QUERY_TERMS_FIELD, "field4\u0000123");
    assertClause(booleanQuery, 14, UNKNOWN_QUERY_FIELD, "");
}

From source file:org.elasticsearch.index.query.PercolatorQueryBuilder.java

License:Apache License

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
    if (indexedDocumentIndex != null || indexedDocumentType != null || indexedDocumentId != null) {
        throw new IllegalStateException("query builder must be rewritten first");
    }/*from   w  w w . j  a  va 2 s  . c  om*/

    if (document == null) {
        throw new IllegalStateException("nothing to percolator");
    }

    MapperService mapperService = context.getMapperService();
    DocumentMapperForType docMapperForType = mapperService.documentMapperWithAutoCreate(documentType);
    DocumentMapper docMapper = docMapperForType.getDocumentMapper();

    ParsedDocument doc = docMapper
            .parse(source(document).index(context.index().getName()).id("_temp_id").type(documentType));

    Analyzer defaultAnalyzer = context.getAnalysisService().defaultIndexAnalyzer();
    final IndexSearcher docSearcher;
    if (doc.docs().size() > 1) {
        assert docMapper.hasNestedObjects();
        docSearcher = createMultiDocumentSearcher(docMapper, defaultAnalyzer, doc);
    } else {
        // TODO: we may want to bring to MemoryIndex thread local cache back...
        // but I'm unsure about the real benefits.
        MemoryIndex memoryIndex = new MemoryIndex(true);
        indexDoc(docMapper, defaultAnalyzer, doc.rootDoc(), memoryIndex);
        docSearcher = memoryIndex.createSearcher();
        docSearcher.setQueryCache(null);
    }

    PercolatorQueryCache registry = context.getPercolatorQueryCache();
    if (registry == null) {
        throw new QueryShardException(context, "no percolator query registry");
    }

    Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, PercolatorFieldMapper.TYPE_NAME));
    PercolatorQuery.Builder builder = new PercolatorQuery.Builder(documentType, registry, document, docSearcher,
            percolateTypeQuery);
    Settings indexSettings = registry.getIndexSettings().getSettings();
    if (indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null).onOrAfter(Version.V_5_0_0)) {
        builder.extractQueryTermsQuery(PercolatorFieldMapper.EXTRACTED_TERMS_FULL_FIELD_NAME,
                PercolatorFieldMapper.UNKNOWN_QUERY_FULL_FIELD_NAME);
    }
    return builder.build();
}

From source file:org.elasticsearch.index.query.PercolatorQueryBuilder.java

License:Apache License

private IndexSearcher createMultiDocumentSearcher(DocumentMapper docMapper, Analyzer defaultAnalyzer,
        ParsedDocument doc) {//from  ww w.  ja v  a2  s .  co  m
    IndexReader[] memoryIndices = new IndexReader[doc.docs().size()];
    List<ParseContext.Document> docs = doc.docs();
    int rootDocIndex = docs.size() - 1;
    assert rootDocIndex > 0;
    for (int i = 0; i < docs.size(); i++) {
        ParseContext.Document d = docs.get(i);
        MemoryIndex memoryIndex = new MemoryIndex(true);
        indexDoc(docMapper, defaultAnalyzer, d, memoryIndex);
        memoryIndices[i] = memoryIndex.createSearcher().getIndexReader();
    }
    try {
        MultiReader mReader = new MultiReader(memoryIndices, true);
        LeafReader slowReader = SlowCompositeReaderWrapper.wrap(mReader);
        final IndexSearcher slowSearcher = new IndexSearcher(slowReader) {

            @Override
            public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException {
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                bq.add(query, BooleanClause.Occur.MUST);
                bq.add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT);
                return super.createNormalizedWeight(bq.build(), needsScores);
            }

        };
        slowSearcher.setQueryCache(null);
        return slowSearcher;
    } catch (IOException e) {
        throw new ElasticsearchException("Failed to create index for percolator with nested document ", e);
    }
}

From source file:org.elasticsearch.index.termvectors.ShardTermVectorsService.java

License:Apache License

private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets,
        @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields) throws IOException {
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field) == false) {
            // some fields are returned even when not asked for, eg. _timestamp
            continue;
        }/*from www .  j av  a  2s.  c om*/
        Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
        for (Object text : getField.getValues()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}

From source file:org.elasticsearch.index.termvectors.TermVectorsService.java

License:Apache License

private static Fields generateTermVectors(IndexShard indexShard, Map<String, Object> source,
        Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer,
        Set<String> fields) throws IOException {
    Map<String, Collection<Object>> values = new HashMap<>();
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field)) { // some fields are returned even when not asked for, eg. _timestamp
            values.put(field, getField.getValues());
        }//from ww  w .  j  a  v a2s.com
    }
    if (source != null) {
        for (String field : fields) {
            if (values.containsKey(field) == false) {
                List<Object> v = XContentMapValues.extractRawValues(field, source);
                if (v.isEmpty() == false) {
                    values.put(field, v);
                }
            }
        }
    }

    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (Map.Entry<String, Collection<Object>> entry : values.entrySet()) {
        String field = entry.getKey();
        Analyzer analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer);
        for (Object text : entry.getValue()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}

From source file:org.elasticsearch.percolator.ExtractQueryTermsServiceTests.java

License:Apache License

public void testCreateQueryMetadataQuery() throws Exception {
    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());

    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    TermsQuery query = (TermsQuery) createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD,
            new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));

    PrefixCodedTerms terms = query.getTermData();
    assertThat(terms.size(), equalTo(15L));
    PrefixCodedTerms.TermIterator termIterator = terms.iterator();
    assertTermIterator(termIterator, "_field3\u0000me", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "_field3\u0000unhide", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000brown", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000dog", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000fox", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000jumps", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000lazy", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000over", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000quick", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field1\u0000the", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field2\u0000more", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
    assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
}

From source file:org.elasticsearch.percolator.MultiDocumentPercolatorIndex.java

License:Apache License

MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer) {
    MemoryIndex memoryIndex = new MemoryIndex(true);
    for (IndexableField field : d.getFields()) {
        if (!field.fieldType().indexed() && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }/*from   ww  w  .  j  av  a2  s .c  o m*/
        try {
            TokenStream tokenStream = field.tokenStream(analyzer);
            if (tokenStream != null) {
                memoryIndex.addField(field.name(), tokenStream, field.boost());
            }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
    return memoryIndex;
}

From source file:org.elasticsearch.percolator.PercolatorFieldMapperTests.java

License:Apache License

public void testCreateCandidateQuery() throws Exception {
    addQueryMapping();/*from ww w  .ja  v a 2  s . c  o m*/

    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
    memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());

    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();

    TermsQuery termsQuery = (TermsQuery) fieldType.createCandidateQuery(indexReader);

    PrefixCodedTerms terms = termsQuery.getTermData();
    assertThat(terms.size(), equalTo(15L));
    PrefixCodedTerms.TermIterator termIterator = terms.iterator();
    assertTermIterator(termIterator, "_field3\u0000me", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "_field3\u0000unhide", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000brown", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000dog", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000fox", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000jumps", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000lazy", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000over", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000quick", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000the", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2\u0000more", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2\u0000some", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2\u0000text", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field4\u0000123", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, EXTRACTION_FAILED, fieldType.extractionResultField.name());
}