org.carrot2.output.metrics.IdealPartitioningBasedMetric.java Source code

Java tutorial

Introduction

Here is the source code for org.carrot2.output.metrics.IdealPartitioningBasedMetric.java

Source

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2015, Dawid Weiss, Stanisaw Osiski.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.output.metrics;

import java.util.*;

import org.carrot2.core.Document;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.attribute.*;

import com.google.common.base.Function;
import com.google.common.collect.*;

/**
 * A base class for metrics based on some reference partitioning.
 */
@Bindable
abstract class IdealPartitioningBasedMetric implements IClusteringMetric {
    /**
     * Partition id field name.
     */
    @Input
    @Processing
    @Attribute
    public String partitionIdFieldName = Document.PARTITIONS;

    Set<Object> getPartitions(List<Document> documents) {
        final HashSet<Object> partitions = Sets.newHashSet();
        for (Document document : documents) {
            final Collection<Object> documentPartitions = document
                    .<Collection<Object>>getField(partitionIdFieldName);
            if (documentPartitions != null) {
                partitions.addAll(documentPartitions);
            }
        }
        return partitions;
    }

    /**
     * Returns the number of distinct {@link Document#PARTITIONS}s in a collection of
     * documents. Note if that at least one of the document has a <code>null</code>
     * partition, 0 will be returned.
     */
    int getPartitionsCount(List<Document> documents) {
        return getPartitions(documents).size();
    }

    /**
     * Returns documents grouped by partitions.
     */
    SetMultimap<Object, Document> getDocumentsByPartition(List<Document> documents) {
        final SetMultimap<Object, Document> index = HashMultimap.create();
        for (Document document : documents) {
            final Collection<Object> partitions = document.getField(partitionIdFieldName);
            for (Object partition : partitions) {
                index.put(partition, document);
            }
        }

        return ImmutableSetMultimap.copyOf(index);
    }

    /**
     * Returns document counts for each partition.
     */
    Map<Object, Integer> getDocumentCountByPartition(List<Document> documents) {
        return ImmutableMap.copyOf(Maps.transformValues(getDocumentsByPartition(documents).asMap(),
                new Function<Collection<Document>, Integer>() {
                    public Integer apply(Collection<Document> documents) {
                        return documents.size();
                    }
                }));
    }
}