List of usage examples for org.apache.lucene.index.memory MemoryIndex MemoryIndex
public MemoryIndex()
From source file:org.apache.uima.lucas.ProspectiveSearchAE.java
License:Apache License
@Override public void process(CAS aCAS) throws AnalysisEngineProcessException { // First create the index of the document text MemoryIndex index = new MemoryIndex(); List fields = createDocument(aCAS).getFields(); for (Iterator it = fields.iterator(); it.hasNext();) { Field field = (Field) it.next(); if (field.isIndexed() && field.tokenStreamValue() != null) { index.addField(field.name(), field.tokenStreamValue()); }// w ww . ja v a2 s.c o m } // Search all queries against the one document index for (SearchQuery query : searchQueryProvider.getSearchQueries(aCAS)) { float score = index.search(query.query()); if (score > matchingThreshold) { // Add a FS to the CAS with the search result FeatureStructure searchResult = aCAS.createFS(searchResultType); searchResult.setLongValue(searchResultIdFeature, query.id()); aCAS.addFsToIndexes(searchResult); // Find matching tokens and link their annotations // in case the user wants search term highlighting if (searchResultMatchingTextFeature != null) { fields = createDocument(aCAS).getFields(); for (Iterator it = fields.iterator(); it.hasNext();) { Field field = (Field) it.next(); if (field.isIndexed() && field.tokenStreamValue() != null) { TokenStream tokenStream = field.tokenStreamValue(); Collection<AnnotationFS> matchingTextAnnotations = new LinkedList<AnnotationFS>(); QueryScorer scorer = new QueryScorer(query.query(), field.name()); scorer.startFragment(new TextFragment(new StringBuffer(aCAS.getDocumentText()), 0, 0)); try { scorer.init(tokenStream); OffsetAttribute offsetAttr = null; while (tokenStream.incrementToken()) { offsetAttr = (OffsetAttribute) tokenStream.getAttribute(OffsetAttribute.class); float tokenScore = scorer.getTokenScore(); if (tokenScore > 0) { AnnotationFS annotation = aCAS.createAnnotation(matchingTextType, offsetAttr.startOffset(), offsetAttr.endOffset()); matchingTextAnnotations.add(annotation); } } } catch (IOException e) { throw new AnalysisEngineProcessException(e); } ArrayFS matchtingTextArray = aCAS.createArrayFS(matchingTextAnnotations.size()); int matchtingTextArrayIndex = 0; for (AnnotationFS matchingTextAnnotation : matchingTextAnnotations) { matchtingTextArray.set(matchtingTextArrayIndex++, matchingTextAnnotation); } searchResult.setFeatureValue(searchResultMatchingTextFeature, matchtingTextArray); } } } } } }
From source file:org.elasticsearch.index.query.MoreLikeThisQueryBuilderTests.java
License:Apache License
/** * Here we could go overboard and use a pre-generated indexed random document for a given Item, * but for now we'd prefer to simply return the id as the content of the document and that for * every field.//w ww .j a v a2 s .c om */ private static Fields generateFields(String[] fieldNames, String text) throws IOException { MemoryIndex index = new MemoryIndex(); for (String fieldName : fieldNames) { index.addField(fieldName, text, new WhitespaceAnalyzer()); } return MultiFields.getFields(index.createSearcher().getIndexReader()); }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
public void testVariousQueries() throws Exception { addPercolatorQuery("1", new TermQuery(new Term("field", "brown"))); addPercolatorQuery("2", new TermQuery(new Term("field", "monkey"))); addPercolatorQuery("3", new TermQuery(new Term("field", "fox"))); BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD); bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD); addPercolatorQuery("4", bq1.build()); BooleanQuery.Builder bq2 = new BooleanQuery.Builder(); bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST); addPercolatorQuery("5", bq2.build()); BooleanQuery.Builder bq3 = new BooleanQuery.Builder(); bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT); addPercolatorQuery("6", bq3.build()); BooleanQuery.Builder bq4 = new BooleanQuery.Builder(); bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT); bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST); addPercolatorQuery("7", bq4.build()); PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); pq1.add(new Term("field", "lazy")); pq1.add(new Term("field", "dog")); addPercolatorQuery("8", pq1.build()); indexWriter.close();// w w w . j a v a 2s .c om directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher); builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); // no scoring, wrapping it in a constant score query: Query query = new ConstantScoreQuery(builder.build()); TopDocs topDocs = shardSearcher.search(query, 10); assertThat(topDocs.totalHits, equalTo(5)); assertThat(topDocs.scoreDocs.length, equalTo(5)); assertThat(topDocs.scoreDocs[0].doc, equalTo(0)); Explanation explanation = shardSearcher.explain(query, 0); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score)); explanation = shardSearcher.explain(query, 1); assertThat(explanation.isMatch(), is(false)); assertThat(topDocs.scoreDocs[1].doc, equalTo(2)); explanation = shardSearcher.explain(query, 2); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score)); assertThat(topDocs.scoreDocs[2].doc, equalTo(3)); explanation = shardSearcher.explain(query, 3); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score)); explanation = shardSearcher.explain(query, 4); assertThat(explanation.isMatch(), is(false)); assertThat(topDocs.scoreDocs[3].doc, equalTo(5)); explanation = shardSearcher.explain(query, 5); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[3].score)); explanation = shardSearcher.explain(query, 6); assertThat(explanation.isMatch(), is(false)); assertThat(topDocs.scoreDocs[4].doc, equalTo(7)); explanation = shardSearcher.explain(query, 7); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[4].score)); }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
public void testVariousQueries_withScoring() throws Exception { SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true); snp.addClause(new SpanTermQuery(new Term("field", "jumps"))); snp.addClause(new SpanTermQuery(new Term("field", "lazy"))); snp.addClause(new SpanTermQuery(new Term("field", "dog"))); snp.setSlop(2);// w w w .j ava2 s. co m addPercolatorQuery("1", snp.build()); PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); pq1.add(new Term("field", "quick")); pq1.add(new Term("field", "brown")); pq1.add(new Term("field", "jumps")); pq1.setSlop(1); addPercolatorQuery("2", pq1.build()); BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST); bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST); bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); addPercolatorQuery("3", bq1.build()); indexWriter.close(); directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); PercolateQuery.Builder builder = new PercolateQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher); builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); Query query = builder.build(); TopDocs topDocs = shardSearcher.search(query, 10); assertThat(topDocs.totalHits, equalTo(3)); assertThat(topDocs.scoreDocs[0].doc, equalTo(2)); Explanation explanation = shardSearcher.explain(query, 2); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); assertThat(topDocs.scoreDocs[1].doc, equalTo(1)); explanation = shardSearcher.explain(query, 1); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); assertThat(topDocs.scoreDocs[2].doc, equalTo(0)); explanation = shardSearcher.explain(query, 0); assertThat(explanation.isMatch(), is(true)); assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score)); assertThat(explanation.getDetails(), arrayWithSize(1)); }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
public void testDuel() throws Exception { int numQueries = scaledRandomIntBetween(32, 256); for (int i = 0; i < numQueries; i++) { String id = Integer.toString(i); Query query;//from www.j a v a 2 s.c o m if (randomBoolean()) { query = new PrefixQuery(new Term("field", id)); } else if (randomBoolean()) { query = new WildcardQuery(new Term("field", id + "*")); } else if (randomBoolean()) { query = new CustomQuery(new Term("field", id + "*")); } else if (randomBoolean()) { query = new SpanTermQuery(new Term("field", id)); } else { query = new TermQuery(new Term("field", id)); } addPercolatorQuery(id, query); } indexWriter.close(); directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); for (int i = 0; i < numQueries; i++) { MemoryIndex memoryIndex = new MemoryIndex(); String id = Integer.toString(i); memoryIndex.addField("field", id, new WhitespaceAnalyzer()); duelRun(memoryIndex, shardSearcher); } }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
public void testDuelSpecificQueries() throws Exception { CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); commonTermsQuery.add(new Term("field", "quick")); commonTermsQuery.add(new Term("field", "brown")); commonTermsQuery.add(new Term("field", "fox")); addPercolatorQuery("_id1", commonTermsQuery); BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery( new Term[] { new Term("field", "quick"), new Term("field", "brown"), new Term("field", "fox") }, false);// w ww .j ava2 s . c o m addPercolatorQuery("_id2", blendedTermQuery); SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true) .addClause(new SpanTermQuery(new Term("field", "quick"))) .addClause(new SpanTermQuery(new Term("field", "brown"))) .addClause(new SpanTermQuery(new Term("field", "fox"))).build(); addPercolatorQuery("_id3", spanNearQuery); SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true) .addClause(new SpanTermQuery(new Term("field", "the"))) .addClause(new SpanTermQuery(new Term("field", "lazy"))) .addClause(new SpanTermQuery(new Term("field", "doc"))).build(); SpanOrQuery spanOrQuery = new SpanOrQuery(spanNearQuery, spanNearQuery2); addPercolatorQuery("_id4", spanOrQuery); SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery); addPercolatorQuery("_id5", spanNotQuery); indexWriter.close(); directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); duelRun(memoryIndex, shardSearcher); }
From source file:org.elasticsearch.index.query.PercolatorQueryTests.java
License:Apache License
public void testVariousQueries() throws Exception { addPercolatorQuery("1", new TermQuery(new Term("field", "brown"))); addPercolatorQuery("2", new TermQuery(new Term("field", "monkey"))); addPercolatorQuery("3", new TermQuery(new Term("field", "fox"))); BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.SHOULD); bq1.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.SHOULD); addPercolatorQuery("4", bq1.build()); BooleanQuery.Builder bq2 = new BooleanQuery.Builder(); bq2.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); bq2.add(new TermQuery(new Term("field", "monkey")), BooleanClause.Occur.MUST); addPercolatorQuery("5", bq2.build()); BooleanQuery.Builder bq3 = new BooleanQuery.Builder(); bq3.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST); bq3.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST_NOT); addPercolatorQuery("6", bq3.build()); BooleanQuery.Builder bq4 = new BooleanQuery.Builder(); bq4.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST_NOT); bq4.add(new TermQuery(new Term("field", "apes")), BooleanClause.Occur.MUST); addPercolatorQuery("7", bq4.build()); PhraseQuery.Builder pq1 = new PhraseQuery.Builder(); pq1.add(new Term("field", "lazy")); pq1.add(new Term("field", "dog")); addPercolatorQuery("8", pq1.build()); indexWriter.close();/*from www . j a va2 s . c o m*/ directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer()); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); PercolatorQuery.Builder builder = new PercolatorQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher, new MatchAllDocsQuery()); builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); TopDocs topDocs = shardSearcher.search(builder.build(), 10); assertThat(topDocs.totalHits, equalTo(5)); assertThat(topDocs.scoreDocs.length, equalTo(5)); assertThat(topDocs.scoreDocs[0].doc, equalTo(0)); assertThat(topDocs.scoreDocs[1].doc, equalTo(2)); assertThat(topDocs.scoreDocs[2].doc, equalTo(3)); assertThat(topDocs.scoreDocs[3].doc, equalTo(5)); assertThat(topDocs.scoreDocs[4].doc, equalTo(7)); }
From source file:org.elasticsearch.index.query.PercolatorQueryTests.java
License:Apache License
public void testDuel() throws Exception { int numQueries = scaledRandomIntBetween(32, 256); for (int i = 0; i < numQueries; i++) { String id = Integer.toString(i); Query query;/*from w ww . j ava2 s . co m*/ if (randomBoolean()) { query = new PrefixQuery(new Term("field", id)); } else if (randomBoolean()) { query = new WildcardQuery(new Term("field", id + "*")); } else if (randomBoolean()) { query = new CustomQuery(new Term("field", id + "*")); } else { query = new TermQuery(new Term("field", id)); } addPercolatorQuery(id, query); } indexWriter.close(); directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); for (int i = 0; i < numQueries; i++) { MemoryIndex memoryIndex = new MemoryIndex(); String id = Integer.toString(i); memoryIndex.addField("field", id, new WhitespaceAnalyzer()); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); PercolatorQuery.Builder builder1 = new PercolatorQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher, new MatchAllDocsQuery()); // enables the optimization that prevents queries from being evaluated that don't match builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME); TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10); PercolatorQuery.Builder builder2 = new PercolatorQuery.Builder("docType", queryRegistry, new BytesArray("{}"), percolateSearcher, new MatchAllDocsQuery()); TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10); assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits)); assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length)); for (int j = 0; j < topDocs1.scoreDocs.length; j++) { assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc)); } } }
From source file:org.elasticsearch.index.query.WrapperQueryBuilderTests.java
License:Apache License
@Override protected Query rewrite(Query query) throws IOException { // WrapperQueryBuilder adds some optimization if the wrapper and query builder have boosts / query names that wraps // the actual QueryBuilder that comes from the binary blob into a BooleanQueryBuilder to give it an outer boost / name // this causes some queries to be not exactly equal but equivalent such that we need to rewrite them before comparing. if (query != null) { MemoryIndex idx = new MemoryIndex(); return idx.createSearcher().rewrite(query); }/*www . ja va 2 s . co m*/ return new MatchAllDocsQuery(); // null == *:* }
From source file:org.elasticsearch.percolator.CandidateQueryTests.java
License:Apache License
public void testDuel() throws Exception { List<Function<String, Query>> queryFunctions = new ArrayList<>(); queryFunctions.add((id) -> new PrefixQuery(new Term("field", id))); queryFunctions.add((id) -> new WildcardQuery(new Term("field", id + "*"))); queryFunctions.add((id) -> new CustomQuery(new Term("field", id))); queryFunctions.add((id) -> new SpanTermQuery(new Term("field", id))); queryFunctions.add((id) -> new TermQuery(new Term("field", id))); queryFunctions.add((id) -> {/*from w w w . jav a 2 s . co m*/ BooleanQuery.Builder builder = new BooleanQuery.Builder(); return builder.build(); }); queryFunctions.add((id) -> { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST); if (randomBoolean()) { builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); } if (randomBoolean()) { builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST); } return builder.build(); }); queryFunctions.add((id) -> { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); if (randomBoolean()) { builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); } if (randomBoolean()) { builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); } return builder.build(); }); queryFunctions.add((id) -> { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); if (randomBoolean()) { builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); } return builder.build(); }); queryFunctions.add((id) -> { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); if (randomBoolean()) { builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); } return builder.build(); }); queryFunctions.add((id) -> { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4)); builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); return builder.build(); }); queryFunctions.add((id) -> new MatchAllDocsQuery()); queryFunctions.add((id) -> new MatchNoDocsQuery("no reason at all")); int numDocs = randomIntBetween(queryFunctions.size(), queryFunctions.size() * 3); List<ParseContext.Document> documents = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { String id = Integer.toString(i); Query query = queryFunctions.get(i % queryFunctions.size()).apply(id); addQuery(query, documents); } indexWriter.addDocuments(documents); indexWriter.close(); directoryReader = DirectoryReader.open(directory); IndexSearcher shardSearcher = newSearcher(directoryReader); // Disable query cache, because ControlQuery cannot be cached... shardSearcher.setQueryCache(null); for (int i = 0; i < numDocs; i++) { String id = Integer.toString(i); Iterable<? extends IndexableField> doc = Collections .singleton(new StringField("field", id, Field.Store.NO)); MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); duelRun(queryStore, memoryIndex, shardSearcher); } Iterable<? extends IndexableField> doc = Collections .singleton(new StringField("field", "value", Field.Store.NO)); MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer()); duelRun(queryStore, memoryIndex, shardSearcher); // Empty percolator doc: memoryIndex = new MemoryIndex(); duelRun(queryStore, memoryIndex, shardSearcher); }