org.apache.flink.graph.library.clustering.directed.TriadicCensus.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flink.graph.library.clustering.directed.TriadicCensus.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.graph.library.clustering.directed;

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.graph.AbstractGraphAnalytic;
import org.apache.flink.graph.AnalyticHelper;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.Vertex;
import org.apache.flink.graph.asm.degree.annotate.directed.VertexDegrees;
import org.apache.flink.graph.asm.degree.annotate.directed.VertexDegrees.Degrees;
import org.apache.flink.graph.asm.result.PrintableResult;
import org.apache.flink.graph.library.clustering.directed.TriadicCensus.Result;
import org.apache.flink.types.CopyableValue;
import org.apache.flink.util.Preconditions;

import java.io.IOException;
import java.math.BigInteger;
import java.text.NumberFormat;

import static org.apache.flink.api.common.ExecutionConfig.PARALLELISM_DEFAULT;

/**
 * A triad is formed by three connected or unconnected vertices in a graph.
 * The triadic census counts the occurrences of each type of triad.
 * <p>
 * http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf
 *
 * @param <K> graph ID type
 * @param <VV> vertex value type
 * @param <EV> edge value type
 */
public class TriadicCensus<K extends Comparable<K> & CopyableValue<K>, VV, EV>
        extends AbstractGraphAnalytic<K, VV, EV, Result> {

    private TriangleListingHelper<K> triangleListingHelper;

    private VertexDegreesHelper<K> vertexDegreesHelper;

    // Optional configuration
    private int littleParallelism = PARALLELISM_DEFAULT;

    /**
     * Override the parallelism of operators processing small amounts of data.
     *
     * @param littleParallelism operator parallelism
     * @return this
     */
    public TriadicCensus<K, VV, EV> setLittleParallelism(int littleParallelism) {
        this.littleParallelism = littleParallelism;

        return this;
    }

    @Override
    public TriadicCensus<K, VV, EV> run(Graph<K, VV, EV> input) throws Exception {
        super.run(input);

        triangleListingHelper = new TriangleListingHelper<>();

        input.run(new TriangleListing<K, VV, EV>().setLittleParallelism(littleParallelism))
                .output(triangleListingHelper).name("Triangle counts");

        vertexDegreesHelper = new VertexDegreesHelper<>();

        input.run(new VertexDegrees<K, VV, EV>().setParallelism(littleParallelism)).output(vertexDegreesHelper)
                .name("Edge and triplet counts");

        return this;
    }

    @Override
    public Result getResult() {
        BigInteger one = BigInteger.ONE;
        BigInteger two = BigInteger.valueOf(2);
        BigInteger three = BigInteger.valueOf(3);
        BigInteger six = BigInteger.valueOf(6);

        BigInteger vertexCount = BigInteger.valueOf((Long) vertexDegreesHelper.getAccumulator(env, "vc"));
        BigInteger unidirectionalEdgeCount = BigInteger
                .valueOf((Long) vertexDegreesHelper.getAccumulator(env, "uec") / 2);
        BigInteger bidirectionalEdgeCount = BigInteger
                .valueOf((Long) vertexDegreesHelper.getAccumulator(env, "bec") / 2);
        BigInteger triplet021dCount = BigInteger.valueOf((Long) vertexDegreesHelper.getAccumulator(env, "021d"));
        BigInteger triplet021uCount = BigInteger.valueOf((Long) vertexDegreesHelper.getAccumulator(env, "021u"));
        BigInteger triplet021cCount = BigInteger.valueOf((Long) vertexDegreesHelper.getAccumulator(env, "021c"));
        BigInteger triplet111dCount = BigInteger.valueOf((Long) vertexDegreesHelper.getAccumulator(env, "111d"));
        BigInteger triplet111uCount = BigInteger.valueOf((Long) vertexDegreesHelper.getAccumulator(env, "111u"));
        BigInteger triplet201Count = BigInteger.valueOf((Long) vertexDegreesHelper.getAccumulator(env, "201"));

        // triads with three connecting edges = closed triplet = triangle
        BigInteger triangle030tCount = BigInteger.valueOf((Long) triangleListingHelper.getAccumulator(env, "030t"));
        BigInteger triangle030cCount = BigInteger.valueOf((Long) triangleListingHelper.getAccumulator(env, "030c"));
        BigInteger triangle120dCount = BigInteger.valueOf((Long) triangleListingHelper.getAccumulator(env, "120d"));
        BigInteger triangle120uCount = BigInteger.valueOf((Long) triangleListingHelper.getAccumulator(env, "120u"));
        BigInteger triangle120cCount = BigInteger.valueOf((Long) triangleListingHelper.getAccumulator(env, "120c"));
        BigInteger triangle210Count = BigInteger.valueOf((Long) triangleListingHelper.getAccumulator(env, "210"));
        BigInteger triangle300Count = BigInteger.valueOf((Long) triangleListingHelper.getAccumulator(env, "300"));

        // triads with two connecting edges = open triplet;
        // each triangle deducts the count of three triplets
        triplet201Count = triplet201Count.subtract(triangle300Count.multiply(three));

        triplet201Count = triplet201Count.subtract(triangle210Count);
        triplet111dCount = triplet111dCount.subtract(triangle210Count);
        triplet111uCount = triplet111uCount.subtract(triangle210Count);

        triplet111dCount = triplet111dCount.subtract(triangle120cCount);
        triplet111uCount = triplet111uCount.subtract(triangle120cCount);
        triplet021cCount = triplet021cCount.subtract(triangle120cCount);

        triplet111uCount = triplet111uCount.subtract(triangle120uCount.multiply(two));
        triplet021uCount = triplet021uCount.subtract(triangle120uCount);

        triplet111dCount = triplet111dCount.subtract(triangle120dCount.multiply(two));
        triplet021dCount = triplet021dCount.subtract(triangle120dCount);

        triplet021cCount = triplet021cCount.subtract(triangle030cCount.multiply(three));

        triplet021cCount = triplet021cCount.subtract(triangle030tCount);
        triplet021uCount = triplet021uCount.subtract(triangle030tCount);
        triplet021dCount = triplet021dCount.subtract(triangle030tCount);

        // triads with one connecting edge; each edge pairs with `vertex count - 2` vertices;
        // each triangle deducts from three and each open triplet from two edges
        BigInteger edge102 = bidirectionalEdgeCount.multiply(vertexCount.subtract(two)).subtract(triplet111dCount)
                .subtract(triplet111uCount).subtract(triplet201Count.multiply(two)).subtract(triangle120dCount)
                .subtract(triangle120uCount).subtract(triangle120cCount).subtract(triangle210Count.multiply(two))
                .subtract(triangle300Count.multiply(three));

        BigInteger edge012 = unidirectionalEdgeCount.multiply(vertexCount.subtract(two))
                .subtract(triplet021dCount.multiply(two)).subtract(triplet021uCount.multiply(two))
                .subtract(triplet021cCount.multiply(two)).subtract(triplet111dCount).subtract(triplet111uCount)
                .subtract(triangle030tCount.multiply(three)).subtract(triangle030cCount.multiply(three))
                .subtract(triangle120dCount.multiply(two)).subtract(triangle120uCount.multiply(two))
                .subtract(triangle120cCount.multiply(two)).subtract(triangle210Count);

        // triads with zero connecting edges;
        // (vertex count choose 3) minus earlier counts
        BigInteger triad003 = vertexCount.multiply(vertexCount.subtract(one)).multiply(vertexCount.subtract(two))
                .divide(six).subtract(edge012).subtract(edge102).subtract(triplet021dCount)
                .subtract(triplet021uCount).subtract(triplet021cCount).subtract(triplet111dCount)
                .subtract(triplet111uCount).subtract(triangle030tCount).subtract(triangle030cCount)
                .subtract(triplet201Count).subtract(triangle120dCount).subtract(triangle120uCount)
                .subtract(triangle120cCount).subtract(triangle210Count).subtract(triangle300Count);

        return new Result(triad003, edge012, edge102, triplet021dCount, triplet021uCount, triplet021cCount,
                triplet111dCount, triplet111uCount, triangle030tCount, triangle030cCount, triplet201Count,
                triangle120dCount, triangle120uCount, triangle120cCount, triangle210Count, triangle300Count);
    }

    /**
     * Helper class to collect triadic census metrics from the triangle listing.
     *
     * @param <T> ID type
     */
    private static class TriangleListingHelper<T> extends AnalyticHelper<TriangleListing.Result<T>> {
        private long[] triangleCount = new long[64];

        @Override
        public void writeRecord(TriangleListing.Result<T> record) throws IOException {
            triangleCount[record.f3.getValue()]++;
        }

        @Override
        public void close() throws IOException {
            // see table from Batagelj and Mrvar, "A subquadratic triad census algorithm for large
            // sparse networks with small maximum degree" (this Flink algorithm does not use their
            // algorithm as we do not assume a small maximum degree)
            int[] typeTable = new int[] { 1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9, 9, 13,
                    6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11,
                    7, 12, 14, 15, 8, 14, 13, 15, 11, 15, 15, 16 };

            long triangle030tCount = 0;
            long triangle030cCount = 0;
            long triangle120dCount = 0;
            long triangle120uCount = 0;
            long triangle120cCount = 0;
            long triangle210Count = 0;
            long triangle300tCount = 0;

            for (int i = 0; i < typeTable.length; i++) {
                if (typeTable[i] == 9) {
                    triangle030tCount += triangleCount[i];
                } else if (typeTable[i] == 10) {
                    triangle030cCount += triangleCount[i];
                } else if (typeTable[i] == 12) {
                    triangle120dCount += triangleCount[i];
                } else if (typeTable[i] == 13) {
                    triangle120uCount += triangleCount[i];
                } else if (typeTable[i] == 14) {
                    triangle120cCount += triangleCount[i];
                } else if (typeTable[i] == 15) {
                    triangle210Count += triangleCount[i];
                } else if (typeTable[i] == 16) {
                    triangle300tCount += triangleCount[i];
                } else {
                    assert triangleCount[i] == 0;
                }
            }

            addAccumulator("030t", new LongCounter(triangle030tCount));
            addAccumulator("030c", new LongCounter(triangle030cCount));
            addAccumulator("120d", new LongCounter(triangle120dCount));
            addAccumulator("120u", new LongCounter(triangle120uCount));
            addAccumulator("120c", new LongCounter(triangle120cCount));
            addAccumulator("210", new LongCounter(triangle210Count));
            addAccumulator("300", new LongCounter(triangle300tCount));
        }
    }

    /**
     * Helper class to collect triadic census metrics from vertex degrees.
     *
     * @param <T> ID type
     */
    private static class VertexDegreesHelper<T> extends AnalyticHelper<Vertex<T, Degrees>> {
        private long vertexCount;
        private long unidirectionalEdgeCount;
        private long bidirectionalEdgeCount;
        private long triplet021dCount;
        private long triplet021uCount;
        private long triplet021cCount;
        private long triplet111dCount;
        private long triplet111uCount;
        private long triplet201Count;

        @Override
        public void writeRecord(Vertex<T, Degrees> record) throws IOException {
            long degree = record.f1.getDegree().getValue();
            long outDegree = record.f1.getOutDegree().getValue();
            long inDegree = record.f1.getInDegree().getValue();

            long unidirectionalEdgesAsSource = degree - inDegree;
            long unidirectionalEdgesAsTarget = degree - outDegree;
            long bidirectionalEdges = inDegree + outDegree - degree;

            vertexCount++;
            unidirectionalEdgeCount += unidirectionalEdgesAsSource + unidirectionalEdgesAsTarget;
            bidirectionalEdgeCount += bidirectionalEdges;

            triplet021dCount += unidirectionalEdgesAsSource * (unidirectionalEdgesAsSource - 1) / 2;
            triplet021uCount += unidirectionalEdgesAsTarget * (unidirectionalEdgesAsTarget - 1) / 2;
            triplet021cCount += unidirectionalEdgesAsSource * unidirectionalEdgesAsTarget;
            triplet111dCount += unidirectionalEdgesAsTarget * bidirectionalEdges;
            triplet111uCount += unidirectionalEdgesAsSource * bidirectionalEdges;
            triplet201Count += bidirectionalEdges * (bidirectionalEdges - 1) / 2;
        }

        @Override
        public void close() throws IOException {
            addAccumulator("vc", new LongCounter(vertexCount));
            addAccumulator("uec", new LongCounter(unidirectionalEdgeCount));
            addAccumulator("bec", new LongCounter(bidirectionalEdgeCount));
            addAccumulator("021d", new LongCounter(triplet021dCount));
            addAccumulator("021u", new LongCounter(triplet021uCount));
            addAccumulator("021c", new LongCounter(triplet021cCount));
            addAccumulator("111d", new LongCounter(triplet111dCount));
            addAccumulator("111u", new LongCounter(triplet111uCount));
            addAccumulator("201", new LongCounter(triplet201Count));
        }
    }

    /**
     * Wraps triadic census metrics.
     */
    public static class Result implements PrintableResult {
        private final BigInteger[] counts;

        public Result(BigInteger... counts) {
            Preconditions.checkArgument(counts.length == 16, "Expected 16 counts but received " + counts.length);

            this.counts = counts;
        }

        public Result(long... counts) {
            Preconditions.checkArgument(counts.length == 16, "Expected 16 counts but received " + counts.length);

            this.counts = new BigInteger[counts.length];

            for (int i = 0; i < counts.length; i++) {
                this.counts[i] = BigInteger.valueOf(counts[i]);
            }
        }

        /**
         * Get the count of "003" triads which have zero connecting vertices.
         *
         * @return count of "003" triads
         */
        public BigInteger getCount003() {
            return counts[0];
        }

        /**
         * Get the count of "012" triads which have one unidirectional edge among the vertices.
         *
         * @return count of "012" triads
         */
        public BigInteger getCount012() {
            return counts[1];
        }

        /**
         * Get the count of "102" triads which have one bidirectional edge among the vertices.
         *
         * @return count of "102" triads
         */
        public BigInteger getCount102() {
            return counts[2];
        }

        /**
         * Get the count of "021d" triads which have two unidirectional edges among the vertices,
         * forming an open triplet; both edges source the center vertex.
         *
         * @return count of "021d" triads
         */
        public BigInteger getCount021d() {
            return counts[3];
        }

        /**
         * Get the count of "021u" triads which have two unidirectional edges among the vertices,
         * forming an open triplet; both edges target the center vertex.
         *
         * @return count of "021u" triads
         */
        public BigInteger getCount021u() {
            return counts[4];
        }

        /**
         * Get the count of "021c" triads which have two unidirectional edges among the vertices,
         * forming an open triplet; one edge sources and one edge targets the center vertex.
         *
         * @return count of "021c" triads
         */
        public BigInteger getCount021c() {
            return counts[5];
        }

        /**
         * Get the count of "111d" triads which have one unidirectional and one bidirectional edge
         * among the vertices, forming an open triplet; the unidirectional edge targets the center vertex.
         *
         * @return count of "111d" triads
         */
        public BigInteger getCount111d() {
            return counts[6];
        }

        /**
         * Get the count of "111u" triads which have one unidirectional and one bidirectional edge
         * among the vertices, forming an open triplet; the unidirectional edge sources the center vertex.
         *
         * @return count of "111u" triads
         */
        public BigInteger getCount111u() {
            return counts[7];
        }

        /**
         * Get the count of "030t" triads which have three unidirectional edges among the vertices,
         * forming a closed triplet, a triangle; two of the unidirectional edges source/target the
         * same vertex.
         *
         * @return count of "030t" triads
         */
        public BigInteger getCount030t() {
            return counts[8];
        }

        /**
         * Get the count of "030c" triads which have three unidirectional edges among the vertices,
         * forming a closed triplet, a triangle; the three unidirectional edges both source and target
         * different vertices.
         *
         * @return count of "030c" triads
         */
        public BigInteger getCount030c() {
            return counts[9];
        }

        /**
         * Get the count of "201" triads which have two unidirectional edges among the vertices,
         * forming an open triplet.
         *
         * @return count of "201" triads
         */
        public BigInteger getCount201() {
            return counts[10];
        }

        /**
         * Get the count of "120d" triads which have two unidirectional edges and one bidirectional edge
         * among the vertices, forming a closed triplet, a triangle; both unidirectional edges source
         * the same vertex.
         *
         * @return count of "120d" triads
         */
        public BigInteger getCount120d() {
            return counts[11];
        }

        /**
         * Get the count of "120u" triads which have two unidirectional and one bidirectional edges
         * among the vertices, forming a closed triplet, a triangle; both unidirectional edges target
         * the same vertex.
         *
         * @return count of "120u" triads
         */
        public BigInteger getCount120u() {
            return counts[12];
        }

        /**
         * Get the count of "120c" triads which have two unidirectional edges and one bidirectional edge
         * among the vertices, forming a closed triplet, a triangle; one vertex is sourced by and targeted
         * by the unidirectional edges.
         *
         * @return count of "120c" triads
         */
        public BigInteger getCount120c() {
            return counts[13];
        }

        /**
         * Get the count of "210" triads which have one unidirectional edge and two bidirectional edges
         * among the vertices, forming a closed triplet, a triangle.
         *
         * @return count of "210" triads
         */
        public BigInteger getCount210() {
            return counts[14];
        }

        /**
         * Get the count of "300" triads which have three bidirectional edges among the vertices,
         * forming a closed triplet, a triangle.
         *
         * @return count of "300" triads
         */
        public BigInteger getCount300() {
            return counts[15];
        }

        /**
         * Get the array of counts.
         *
         * The order of the counts is from least to most connected:
         *   003, 012, 102, 021d, 021u, 021c, 111d, 111u,
         *   030t, 030c, 201, 120d, 120u, 120c, 210, 300
         *
         * @return array of counts
         */
        public BigInteger[] getCounts() {
            return counts;
        }

        @Override
        public String toPrintableString() {
            NumberFormat nf = NumberFormat.getInstance();

            return "003: " + nf.format(getCount003()) + "; 012: " + nf.format(getCount012()) + "; 102: "
                    + nf.format(getCount102()) + "; 021d: " + nf.format(getCount021d()) + "; 021u: "
                    + nf.format(getCount021u()) + "; 021c: " + nf.format(getCount021c()) + "; 111d: "
                    + nf.format(getCount111d()) + "; 111u: " + nf.format(getCount111u()) + "; 030t: "
                    + nf.format(getCount030t()) + "; 030c: " + nf.format(getCount030c()) + "; 201: "
                    + nf.format(getCount201()) + "; 120d: " + nf.format(getCount120d()) + "; 120u: "
                    + nf.format(getCount120u()) + "; 120c: " + nf.format(getCount120c()) + "; 210: "
                    + nf.format(getCount210()) + "; 300: " + nf.format(getCount300());
        }

        @Override
        public int hashCode() {
            return new HashCodeBuilder().append(counts).hashCode();
        }

        @Override
        public boolean equals(Object obj) {
            if (obj == null) {
                return false;
            }
            if (obj == this) {
                return true;
            }
            if (obj.getClass() != getClass()) {
                return false;
            }

            Result rhs = (Result) obj;

            return new EqualsBuilder().append(counts, rhs.counts).isEquals();
        }
    }
}