de.otto.mongodb.profiler.collection.ListSizeDistribution.java Source code

Java tutorial

Introduction

Here is the source code for de.otto.mongodb.profiler.collection.ListSizeDistribution.java

Source

/*
 *  Copyright 2013 Robert Gacki <robert.gacki@cgi.com>
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package de.otto.mongodb.profiler.collection;

import com.mongodb.*;

import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;

import static com.google.common.base.Preconditions.checkNotNull;

/**
 * A utility type to calculate the distribution sizes of arrays / lists inside documents of
 * a collection.
 *
 * @author Robert Gacki
 */
public class ListSizeDistribution {

    /**
     * The Javascript map function.
     */
    public static final String MAP_FUNCTION = "function () { emit(this.%s.length, {count: 1}); }";

    /**
     * The Javascript reduce function.
     */
    public static final String REDUCE_FUNCTION = "function (key, values) { var total = 0; for(var i = 0; i < values.length; i++) { total += values[i].count; } return {count: total}; }";

    /**
     * The query that is used to scan the documents.
     */
    public static final DBObject QUERY_ALL = new BasicDBObject();

    private static final Comparator<Long> KEYS_BY_VALUE = new Comparator<Long>() {

        @Override
        public int compare(Long left, Long right) {

            if (left == right) {
                return 0;
            }

            if (left == null) {
                return -1;
            }

            if (right == null) {
                return 1;
            }

            if (left.equals(right)) {
                return 0;
            }

            return left.longValue() < right.longValue() ? -1 : 1;
        }
    };

    /**
     * Calculates the distribution sizes of arrays / lists inside documents of a collection. Executes a map-reduce
     * operation to aggregate the values. Returns a map where the key determines the length of the array / list
     * and the value the amount of documents with that length.
     *
     * @param collection           the collection
     * @param attribute            the name of the attribute that is an array inside documents of the collection
     * @param resultCollectionName the name of the result collection of the map-reduce
     * @return the map of array lengths to the amount of documents
     */
    public static Map<Long, Long> calculate(final DBCollection collection, final String attribute,
            final String resultCollectionName) {

        checkNotNull(collection);
        checkNotNull(attribute);

        final String mapFunction = String.format(MAP_FUNCTION, attribute);

        final MapReduceCommand.OutputType outputType = resultCollectionName != null
                ? MapReduceCommand.OutputType.REPLACE
                : MapReduceCommand.OutputType.INLINE;

        final MapReduceOutput output = collection.mapReduce(mapFunction, REDUCE_FUNCTION, resultCollectionName,
                outputType, QUERY_ALL);

        final Map<Long, Long> result = new TreeMap<>(KEYS_BY_VALUE);
        for (DBObject dbo : output.results()) {
            final Number id = ((Number) dbo.get("_id"));
            final Number count = ((Number) ((DBObject) dbo.get("value")).get("count"));
            result.put(Long.valueOf(id.longValue()), Long.valueOf(count.longValue()));
        }

        return result;
    }

}