Java tutorial
/* * Copyright 2013 Robert Gacki <robert.gacki@cgi.com> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.otto.mongodb.profiler.collection; import com.mongodb.*; import java.util.Comparator; import java.util.Map; import java.util.TreeMap; import static com.google.common.base.Preconditions.checkNotNull; /** * A utility type to calculate the distribution sizes of arrays / lists inside documents of * a collection. * * @author Robert Gacki */ public class ListSizeDistribution { /** * The Javascript map function. */ public static final String MAP_FUNCTION = "function () { emit(this.%s.length, {count: 1}); }"; /** * The Javascript reduce function. */ public static final String REDUCE_FUNCTION = "function (key, values) { var total = 0; for(var i = 0; i < values.length; i++) { total += values[i].count; } return {count: total}; }"; /** * The query that is used to scan the documents. */ public static final DBObject QUERY_ALL = new BasicDBObject(); private static final Comparator<Long> KEYS_BY_VALUE = new Comparator<Long>() { @Override public int compare(Long left, Long right) { if (left == right) { return 0; } if (left == null) { return -1; } if (right == null) { return 1; } if (left.equals(right)) { return 0; } return left.longValue() < right.longValue() ? -1 : 1; } }; /** * Calculates the distribution sizes of arrays / lists inside documents of a collection. Executes a map-reduce * operation to aggregate the values. Returns a map where the key determines the length of the array / list * and the value the amount of documents with that length. * * @param collection the collection * @param attribute the name of the attribute that is an array inside documents of the collection * @param resultCollectionName the name of the result collection of the map-reduce * @return the map of array lengths to the amount of documents */ public static Map<Long, Long> calculate(final DBCollection collection, final String attribute, final String resultCollectionName) { checkNotNull(collection); checkNotNull(attribute); final String mapFunction = String.format(MAP_FUNCTION, attribute); final MapReduceCommand.OutputType outputType = resultCollectionName != null ? MapReduceCommand.OutputType.REPLACE : MapReduceCommand.OutputType.INLINE; final MapReduceOutput output = collection.mapReduce(mapFunction, REDUCE_FUNCTION, resultCollectionName, outputType, QUERY_ALL); final Map<Long, Long> result = new TreeMap<>(KEYS_BY_VALUE); for (DBObject dbo : output.results()) { final Number id = ((Number) dbo.get("_id")); final Number count = ((Number) ((DBObject) dbo.get("value")).get("count")); result.put(Long.valueOf(id.longValue()), Long.valueOf(count.longValue())); } return result; } }