List of usage examples for org.apache.lucene.index.memory MemoryIndex MemoryIndex
public MemoryIndex(boolean storeOffsets)
From source file:org.elasticsearch.index.percolator.ExtractQueryTermsServiceTests.java
License:Apache License
public void testCreateQueryMetadataQuery() throws Exception { MemoryIndex memoryIndex = new MemoryIndex(false); memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer()); memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer()); memoryIndex.addField("field4", "123", new WhitespaceAnalyzer()); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); Query query = ExtractQueryTermsService.createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD); assertThat(query, instanceOf(TermsQuery.class)); // no easy way to get to the terms in TermsQuery, // if there a less then 16 terms then it gets rewritten to bq and then we can easily check the terms BooleanQuery booleanQuery = (BooleanQuery) ((ConstantScoreQuery) query.rewrite(indexReader)).getQuery(); assertThat(booleanQuery.clauses().size(), equalTo(15)); assertClause(booleanQuery, 0, QUERY_TERMS_FIELD, "_field3\u0000me"); assertClause(booleanQuery, 1, QUERY_TERMS_FIELD, "_field3\u0000unhide"); assertClause(booleanQuery, 2, QUERY_TERMS_FIELD, "field1\u0000brown"); assertClause(booleanQuery, 3, QUERY_TERMS_FIELD, "field1\u0000dog"); assertClause(booleanQuery, 4, QUERY_TERMS_FIELD, "field1\u0000fox"); assertClause(booleanQuery, 5, QUERY_TERMS_FIELD, "field1\u0000jumps"); assertClause(booleanQuery, 6, QUERY_TERMS_FIELD, "field1\u0000lazy"); assertClause(booleanQuery, 7, QUERY_TERMS_FIELD, "field1\u0000over"); assertClause(booleanQuery, 8, QUERY_TERMS_FIELD, "field1\u0000quick"); assertClause(booleanQuery, 9, QUERY_TERMS_FIELD, "field1\u0000the"); assertClause(booleanQuery, 10, QUERY_TERMS_FIELD, "field2\u0000more"); assertClause(booleanQuery, 11, QUERY_TERMS_FIELD, "field2\u0000some"); assertClause(booleanQuery, 12, QUERY_TERMS_FIELD, "field2\u0000text"); assertClause(booleanQuery, 13, QUERY_TERMS_FIELD, "field4\u0000123"); assertClause(booleanQuery, 14, UNKNOWN_QUERY_FIELD, ""); }
From source file:org.elasticsearch.index.query.PercolatorQueryBuilder.java
License:Apache License
@Override protected Query doToQuery(QueryShardContext context) throws IOException { if (indexedDocumentIndex != null || indexedDocumentType != null || indexedDocumentId != null) { throw new IllegalStateException("query builder must be rewritten first"); }/*from w w w . j a va 2 s . c om*/ if (document == null) { throw new IllegalStateException("nothing to percolator"); } MapperService mapperService = context.getMapperService(); DocumentMapperForType docMapperForType = mapperService.documentMapperWithAutoCreate(documentType); DocumentMapper docMapper = docMapperForType.getDocumentMapper(); ParsedDocument doc = docMapper .parse(source(document).index(context.index().getName()).id("_temp_id").type(documentType)); Analyzer defaultAnalyzer = context.getAnalysisService().defaultIndexAnalyzer(); final IndexSearcher docSearcher; if (doc.docs().size() > 1) { assert docMapper.hasNestedObjects(); docSearcher = createMultiDocumentSearcher(docMapper, defaultAnalyzer, doc); } else { // TODO: we may want to bring to MemoryIndex thread local cache back... // but I'm unsure about the real benefits. MemoryIndex memoryIndex = new MemoryIndex(true); indexDoc(docMapper, defaultAnalyzer, doc.rootDoc(), memoryIndex); docSearcher = memoryIndex.createSearcher(); docSearcher.setQueryCache(null); } PercolatorQueryCache registry = context.getPercolatorQueryCache(); if (registry == null) { throw new QueryShardException(context, "no percolator query registry"); } Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, PercolatorFieldMapper.TYPE_NAME)); PercolatorQuery.Builder builder = new PercolatorQuery.Builder(documentType, registry, document, docSearcher, percolateTypeQuery); Settings indexSettings = registry.getIndexSettings().getSettings(); if (indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null).onOrAfter(Version.V_5_0_0)) { builder.extractQueryTermsQuery(PercolatorFieldMapper.EXTRACTED_TERMS_FULL_FIELD_NAME, PercolatorFieldMapper.UNKNOWN_QUERY_FULL_FIELD_NAME); } return builder.build(); }
From source file:org.elasticsearch.index.query.PercolatorQueryBuilder.java
License:Apache License
private IndexSearcher createMultiDocumentSearcher(DocumentMapper docMapper, Analyzer defaultAnalyzer, ParsedDocument doc) {//from ww w. ja v a2 s . co m IndexReader[] memoryIndices = new IndexReader[doc.docs().size()]; List<ParseContext.Document> docs = doc.docs(); int rootDocIndex = docs.size() - 1; assert rootDocIndex > 0; for (int i = 0; i < docs.size(); i++) { ParseContext.Document d = docs.get(i); MemoryIndex memoryIndex = new MemoryIndex(true); indexDoc(docMapper, defaultAnalyzer, d, memoryIndex); memoryIndices[i] = memoryIndex.createSearcher().getIndexReader(); } try { MultiReader mReader = new MultiReader(memoryIndices, true); LeafReader slowReader = SlowCompositeReaderWrapper.wrap(mReader); final IndexSearcher slowSearcher = new IndexSearcher(slowReader) { @Override public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException { BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(query, BooleanClause.Occur.MUST); bq.add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT); return super.createNormalizedWeight(bq.build(), needsScores); } }; slowSearcher.setQueryCache(null); return slowSearcher; } catch (IOException e) { throw new ElasticsearchException("Failed to create index for percolator with nested document ", e); } }
From source file:org.elasticsearch.index.termvectors.ShardTermVectorsService.java
License:Apache License
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields) throws IOException { /* store document in memory index */ MemoryIndex index = new MemoryIndex(withOffsets); for (GetField getField : getFields) { String field = getField.getName(); if (fields.contains(field) == false) { // some fields are returned even when not asked for, eg. _timestamp continue; }/*from www . j av a 2s. c om*/ Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer); for (Object text : getField.getValues()) { index.addField(field, text.toString(), analyzer); } } /* and read vectors from it */ return MultiFields.getFields(index.createSearcher().getIndexReader()); }
From source file:org.elasticsearch.index.termvectors.TermVectorsService.java
License:Apache License
private static Fields generateTermVectors(IndexShard indexShard, Map<String, Object> source, Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields) throws IOException { Map<String, Collection<Object>> values = new HashMap<>(); for (GetField getField : getFields) { String field = getField.getName(); if (fields.contains(field)) { // some fields are returned even when not asked for, eg. _timestamp values.put(field, getField.getValues()); }//from ww w . j a v a2s.com } if (source != null) { for (String field : fields) { if (values.containsKey(field) == false) { List<Object> v = XContentMapValues.extractRawValues(field, source); if (v.isEmpty() == false) { values.put(field, v); } } } } /* store document in memory index */ MemoryIndex index = new MemoryIndex(withOffsets); for (Map.Entry<String, Collection<Object>> entry : values.entrySet()) { String field = entry.getKey(); Analyzer analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer); for (Object text : entry.getValue()) { index.addField(field, text.toString(), analyzer); } } /* and read vectors from it */ return MultiFields.getFields(index.createSearcher().getIndexReader()); }
From source file:org.elasticsearch.percolator.ExtractQueryTermsServiceTests.java
License:Apache License
public void testCreateQueryMetadataQuery() throws Exception { MemoryIndex memoryIndex = new MemoryIndex(false); memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer()); memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer()); memoryIndex.addField("field4", "123", new WhitespaceAnalyzer()); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); TermsQuery query = (TermsQuery) createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED)); PrefixCodedTerms terms = query.getTermData(); assertThat(terms.size(), equalTo(15L)); PrefixCodedTerms.TermIterator termIterator = terms.iterator(); assertTermIterator(termIterator, "_field3\u0000me", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "_field3\u0000unhide", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000brown", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000dog", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000fox", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000jumps", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000lazy", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000over", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000quick", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field1\u0000the", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field2\u0000more", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD); assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD); assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD); }
From source file:org.elasticsearch.percolator.MultiDocumentPercolatorIndex.java
License:Apache License
MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer) { MemoryIndex memoryIndex = new MemoryIndex(true); for (IndexableField field : d.getFields()) { if (!field.fieldType().indexed() && field.name().equals(UidFieldMapper.NAME)) { continue; }/*from ww w . j av a2 s .c o m*/ try { TokenStream tokenStream = field.tokenStream(analyzer); if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.boost()); } } catch (IOException e) { throw new ElasticsearchException("Failed to create token stream", e); } } return memoryIndex; }
From source file:org.elasticsearch.percolator.PercolatorFieldMapperTests.java
License:Apache License
public void testCreateCandidateQuery() throws Exception { addQueryMapping();/*from ww w .ja v a 2 s . c o m*/ MemoryIndex memoryIndex = new MemoryIndex(false); memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer()); memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer()); memoryIndex.addField("field4", "123", new WhitespaceAnalyzer()); memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer()); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); TermsQuery termsQuery = (TermsQuery) fieldType.createCandidateQuery(indexReader); PrefixCodedTerms terms = termsQuery.getTermData(); assertThat(terms.size(), equalTo(15L)); PrefixCodedTerms.TermIterator termIterator = terms.iterator(); assertTermIterator(termIterator, "_field3\u0000me", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "_field3\u0000unhide", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000brown", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000dog", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000fox", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000jumps", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000lazy", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000over", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000quick", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field1\u0000the", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field2\u0000more", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field2\u0000some", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field2\u0000text", fieldType.queryTermsField.name()); assertTermIterator(termIterator, "field4\u0000123", fieldType.queryTermsField.name()); assertTermIterator(termIterator, EXTRACTION_FAILED, fieldType.extractionResultField.name()); }