Java tutorial
/* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. ElasticSearch licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.facet.terms.strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TObjectIntHashMap; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldData; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetPhaseExecutionException; import org.elasticsearch.search.facet.terms.TermsFacet; import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.ArrayDeque; import java.util.Arrays; import java.util.Deque; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * */ public class TermsStringFacetCollector extends AbstractFacetCollector { static ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>> cache = new ThreadLocal<ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>>>() { @Override protected ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<String>>> initialValue() { return new ThreadLocals.CleanableValue<Deque<TObjectIntHashMap<java.lang.String>>>( new ArrayDeque<TObjectIntHashMap<String>>()); } }; private final FieldDataCache fieldDataCache; private final String indexFieldName; private final TermsFacet.ComparatorType comparatorType; private final int size; private final int numberOfShards; private final FieldDataType fieldDataType; private FieldData fieldData; private final StaticAggregatorValueProc aggregator; private final SearchScript script; public TermsStringFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, ImmutableSet<BytesRef> excluded, Pattern pattern, String scriptLang, String script, Map<String, Object> params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; this.comparatorType = comparatorType; this.numberOfShards = context.numberOfShards(); MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName); if (smartMappers == null || !smartMappers.hasMapper()) { this.indexFieldName = fieldName; this.fieldDataType = FieldDataType.DefaultTypes.STRING; } else { // add type filter if there is exact doc mapper associated with it if (smartMappers.hasDocMapper()) { setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); } this.indexFieldName = smartMappers.mapper().names().indexName(); this.fieldDataType = smartMappers.mapper().fieldDataType(); } if (script != null) { this.script = context.scriptService().search(context.lookup(), scriptLang, script, params); } else { this.script = null; } if (excluded.isEmpty() && pattern == null && this.script == null) { aggregator = new StaticAggregatorValueProc(CacheRecycler.<BytesRef>popObjectIntMap()); } else { aggregator = new AggregatorValueProc(CacheRecycler.<BytesRef>popObjectIntMap(), excluded, pattern, this.script); } if (allTerms) { try { for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { FieldData fieldData = fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { throw new FacetPhaseExecutionException(facetName, "failed to load all terms", e); } } } @Override public void setScorer(Scorer scorer) throws IOException { if (script != null) { script.setScorer(scorer); } } @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { script.setNextReader(context); } } @Override protected void doCollect(int doc) throws IOException { fieldData.forEachValueInDoc(doc, aggregator); } @Override public Facet facet() { TObjectIntHashMap<BytesRef> facets = aggregator.facets(); if (facets.isEmpty()) { CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalStringTermsFacet.TermEntry>of(), aggregator.missing(), aggregator.total()); } else { if (size < EntryPriorityQueue.LIMIT) { EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); for (TObjectIntIterator<BytesRef> it = facets.iterator(); it.hasNext();) { it.advance(); ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = ((InternalStringTermsFacet.TermEntry) ordered.pop()); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total()); } else { BoundedTreeSet<InternalStringTermsFacet.TermEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.TermEntry>( comparatorType.comparator(), size); for (TObjectIntIterator<BytesRef> it = facets.iterator(); it.hasNext();) { it.advance(); ordered.add(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total()); } } } public static class AggregatorValueProc extends StaticAggregatorValueProc { private final ImmutableSet<BytesRef> excluded; private final Matcher matcher; private final SearchScript script; public AggregatorValueProc(TObjectIntHashMap<BytesRef> facets, ImmutableSet<BytesRef> excluded, Pattern pattern, SearchScript script) { super(facets); this.excluded = excluded; this.matcher = pattern != null ? pattern.matcher("") : null; this.script = script; } @Override public void onValue(int docId, BytesRef value) { if (excluded != null && excluded.contains(value)) { return; } // LUCENE 4 UPGRADE: use Lucene's RegexCapabilities if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) { return; } if (script != null) { script.setNextDocId(docId); // LUCENE 4 UPGRADE: needs optimization script.setNextVar("term", value.utf8ToString()); Object scriptValue = script.run(); if (scriptValue == null) { return; } if (scriptValue instanceof Boolean) { if (!((Boolean) scriptValue)) { return; } } else { // LUCENE 4 UPGRADE: should be possible to convert directly to BR value = new BytesRef(scriptValue.toString()); } } super.onValue(docId, value); } } public static class StaticAggregatorValueProc implements FieldData.StringValueInDocProc, FieldData.StringValueProc { // LUCENE 4 UPGRADE: check if hashcode is not too expensive private final TObjectIntHashMap<BytesRef> facets; private int missing = 0; private int total = 0; public StaticAggregatorValueProc(TObjectIntHashMap<BytesRef> facets) { this.facets = facets; } @Override public void onValue(BytesRef value) { facets.putIfAbsent(value, 0); } @Override public void onValue(int docId, BytesRef value) { facets.adjustOrPutValue(value, 1, 1); total++; } @Override public void onMissing(int docId) { missing++; } public final TObjectIntHashMap<BytesRef> facets() { return facets; } public final int missing() { return this.missing; } public int total() { return this.total; } } }