Java tutorial
/* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. ElasticSearch licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.facet.terms.ints; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TIntHashSet; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldData; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.ints.IntFieldData; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.terms.TermsFacet; import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * */ public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector { private final FieldDataCache fieldDataCache; private final String indexFieldName; private final TermsFacet.ComparatorType comparatorType; private final int size; private final int numberOfShards; private final int minCount; private final FieldDataType fieldDataType; private IntFieldData fieldData; private final List<ReaderAggregator> aggregators; private ReaderAggregator current; long missing; long total; private final TIntHashSet excluded; public TermsIntOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, ImmutableSet<BytesRef> excluded) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; this.comparatorType = comparatorType; this.numberOfShards = context.numberOfShards(); MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(fieldName); if (smartMappers == null || !smartMappers.hasMapper()) { throw new ElasticSearchIllegalArgumentException( "Field [" + fieldName + "] doesn't have a type, can't run terms int facet collector on it"); } // add type filter if there is exact doc mapper associated with it if (smartMappers.explicitTypeInNameWithDocMapper()) { setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); } if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.INT) { throw new ElasticSearchIllegalArgumentException( "Field [" + fieldName + "] is not of int type, can't run terms int facet collector on it"); } this.indexFieldName = smartMappers.mapper().names().indexName(); this.fieldDataType = smartMappers.mapper().fieldDataType(); if (excluded == null || excluded.isEmpty()) { this.excluded = null; } else { this.excluded = new TIntHashSet(excluded.size()); for (BytesRef s : excluded) { this.excluded.add(Integer.parseInt(s.utf8ToString())); } } // minCount is offset by -1 if (allTerms) { minCount = -1; } else { minCount = 0; } this.aggregators = new ArrayList<ReaderAggregator>(context.searcher().getIndexReader().leaves().size()); } @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; if (current.values.length > 1) { aggregators.add(current); } } fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @Override protected void doCollect(int doc) throws IOException { fieldData.forEachOrdinalInDoc(doc, current); } @Override public Facet facet() { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; // if we have values for this one, add it if (current.values.length > 1) { aggregators.add(current); } } AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); for (ReaderAggregator aggregator : aggregators) { if (aggregator.nextPosition()) { queue.add(aggregator); } } // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes if (size < EntryPriorityQueue.LIMIT) { // optimize to use priority size EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); while (queue.size() > 0) { ReaderAggregator agg = queue.top(); int value = agg.current; int count = 0; do { count += agg.counts[agg.position]; if (agg.nextPosition()) { agg = queue.updateTop(); } else { // we are done with this reader queue.pop(); agg = queue.top(); } } while (agg != null && value == agg.current); if (count > minCount) { if (excluded == null || !excluded.contains(value)) { InternalIntTermsFacet.IntEntry entry = new InternalIntTermsFacet.IntEntry(value, count); ordered.insertWithOverflow(entry); } } } InternalIntTermsFacet.IntEntry[] list = new InternalIntTermsFacet.IntEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = (InternalIntTermsFacet.IntEntry) ordered.pop(); } for (ReaderAggregator aggregator : aggregators) { CacheRecycler.pushIntArray(aggregator.counts); } return new InternalIntTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total); } BoundedTreeSet<InternalIntTermsFacet.IntEntry> ordered = new BoundedTreeSet<InternalIntTermsFacet.IntEntry>( comparatorType.comparator(), size); while (queue.size() > 0) { ReaderAggregator agg = queue.top(); int value = agg.current; int count = 0; do { count += agg.counts[agg.position]; if (agg.nextPosition()) { agg = queue.updateTop(); } else { // we are done with this reader queue.pop(); agg = queue.top(); } } while (agg != null && value == agg.current); if (count > minCount) { if (excluded == null || !excluded.contains(value)) { InternalIntTermsFacet.IntEntry entry = new InternalIntTermsFacet.IntEntry(value, count); ordered.add(entry); } } } for (ReaderAggregator aggregator : aggregators) { CacheRecycler.pushIntArray(aggregator.counts); } return new InternalIntTermsFacet(facetName, comparatorType, size, ordered, missing, total); } public static class ReaderAggregator implements FieldData.OrdinalInDocProc { final int[] values; final int[] counts; int position = 0; int current; int total = 0; public ReaderAggregator(IntFieldData fieldData) { this.values = fieldData.values(); this.counts = CacheRecycler.popIntArray(fieldData.values().length); } @Override public void onOrdinal(int docId, int ordinal) { counts[ordinal]++; total++; } public boolean nextPosition() { if (++position >= values.length) { return false; } current = values[position]; return true; } } public static class AggregatorPriorityQueue extends PriorityQueue<ReaderAggregator> { public AggregatorPriorityQueue(int size) { super(size); } @Override protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) { return a.current < b.current; } } }