Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.graph.generator; import org.apache.commons.math3.random.RandomGenerator; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.graph.Edge; import org.apache.flink.graph.Graph; import org.apache.flink.graph.Vertex; import org.apache.flink.graph.generator.random.BlockInfo; import org.apache.flink.graph.generator.random.RandomGenerableFactory; import org.apache.flink.types.LongValue; import org.apache.flink.types.NullValue; import org.apache.flink.util.Collector; import org.apache.flink.util.Preconditions; import java.util.List; /* * @see <a href="http://www.cs.cmu.edu/~christos/PUBLICATIONS/siam04.pdf">R-MAT: A Recursive Model for Graph Mining</a> */ public class RMatGraph<T extends RandomGenerator> extends AbstractGraphGenerator<LongValue, NullValue, NullValue> { public static final int MINIMUM_VERTEX_COUNT = 1; public static final int MINIMUM_EDGE_COUNT = 1; // Default RMat constants public static final float DEFAULT_A = 0.57f; public static final float DEFAULT_B = 0.19f; public static final float DEFAULT_C = 0.19f; public static final float DEFAULT_NOISE = 0.10f; // Required to create the DataSource private ExecutionEnvironment env; // Required configuration private final RandomGenerableFactory<T> randomGenerableFactory; private final long vertexCount; private final long edgeCount; // Optional configuration public float A = DEFAULT_A; public float B = DEFAULT_B; public float C = DEFAULT_C; private boolean noiseEnabled = false; public float noise = DEFAULT_NOISE; /** * Generate a directed or undirected power-law {@link Graph} using the * Recursive Matrix (R-Mat) model. * * @param env the Flink execution environment * @param randomGeneratorFactory source of randomness * @param vertexCount number of vertices * @param edgeCount number of edges */ public RMatGraph(ExecutionEnvironment env, RandomGenerableFactory<T> randomGeneratorFactory, long vertexCount, long edgeCount) { Preconditions.checkArgument(vertexCount >= MINIMUM_VERTEX_COUNT, "Vertex count must be at least " + MINIMUM_VERTEX_COUNT); Preconditions.checkArgument(edgeCount >= MINIMUM_EDGE_COUNT, "Edge count must be at least " + MINIMUM_EDGE_COUNT); this.env = env; this.randomGenerableFactory = randomGeneratorFactory; this.vertexCount = vertexCount; this.edgeCount = edgeCount; } /** * The parameters for recursively subdividing the adjacency matrix. * * Setting A = B = C = 0.25 emulates the ErdsRnyi model. * * Graph500 uses A = 0.57, B = C = 0.19. * * @param A likelihood of source bit = 0, target bit = 0 * @param B likelihood of source bit = 0, target bit = 1 * @param C likelihood of source bit = 1, target bit = 0 * @return this */ public RMatGraph<T> setConstants(float A, float B, float C) { Preconditions.checkArgument(A >= 0.0f && B >= 0.0f && C >= 0.0f && A + B + C <= 1.0f, "RMat parameters A, B, and C must be non-negative and sum to less than or equal to one"); this.A = A; this.B = B; this.C = C; return this; } /** * Enable and configure noise. Each edge is generated independently, but * when noise is enabled the parameters A, B, and C are randomly increased * or decreased, then normalized, by a fraction of the noise factor during * the computation of each bit. * * @param noiseEnabled whether to enable noise perturbation * @param noise strength of noise perturbation * @return this */ public RMatGraph<T> setNoise(boolean noiseEnabled, float noise) { Preconditions.checkArgument(noise >= 0.0f && noise <= 2.0f, "RMat parameter noise must be non-negative and less than or equal to 2.0"); this.noiseEnabled = noiseEnabled; this.noise = noise; return this; } @Override public Graph<LongValue, NullValue, NullValue> generate() { int scale = Long.SIZE - Long.numberOfLeadingZeros(vertexCount - 1); // Edges int cyclesPerEdge = noiseEnabled ? 5 * scale : scale; List<BlockInfo<T>> generatorBlocks = randomGenerableFactory.getRandomGenerables(edgeCount, cyclesPerEdge); DataSet<Edge<LongValue, NullValue>> edges = env.fromCollection(generatorBlocks).name("Random generators") .rebalance().setParallelism(parallelism).name("Rebalance") .flatMap(new GenerateEdges<T>(vertexCount, scale, A, B, C, noiseEnabled, noise)) .setParallelism(parallelism).name("RMat graph edges"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSet(edges, parallelism); // Graph return Graph.fromDataSet(vertices, edges, env); } private static final class GenerateEdges<T extends RandomGenerator> implements FlatMapFunction<BlockInfo<T>, Edge<LongValue, NullValue>> { // Configuration private final long vertexCount; private final int scale; private final float A; private final float B; private final float C; private final float D; private final boolean noiseEnabled; private final float noise; // Output private LongValue source = new LongValue(); private LongValue target = new LongValue(); private Edge<LongValue, NullValue> sourceToTarget = new Edge<>(source, target, NullValue.getInstance()); private Edge<LongValue, NullValue> targetToSource = new Edge<>(target, source, NullValue.getInstance()); public GenerateEdges(long vertexCount, int scale, float A, float B, float C, boolean noiseEnabled, float noise) { this.vertexCount = vertexCount; this.scale = scale; this.A = A; this.B = B; this.C = C; this.D = 1.0f - A - B - C; this.noiseEnabled = noiseEnabled; this.noise = noise; } @Override public void flatMap(BlockInfo<T> blockInfo, Collector<Edge<LongValue, NullValue>> out) throws Exception { RandomGenerator rng = blockInfo.getRandomGenerable().generator(); long edgesToGenerate = blockInfo.getElementCount(); while (edgesToGenerate > 0) { long x = 0; long y = 0; // matrix constants are reset for each edge float a = A; float b = B; float c = C; float d = D; for (int bit = 0; bit < scale; bit++) { // generated next bit for source and target x <<= 1; y <<= 1; float random = rng.nextFloat(); if (random <= a) { } else if (random <= a + b) { y += 1; } else if (random <= a + b + c) { x += 1; } else { x += 1; y += 1; } if (noiseEnabled) { // noise is bounded such that all parameters remain non-negative a *= 1.0 - noise / 2 + rng.nextFloat() * noise; b *= 1.0 - noise / 2 + rng.nextFloat() * noise; c *= 1.0 - noise / 2 + rng.nextFloat() * noise; d *= 1.0 - noise / 2 + rng.nextFloat() * noise; // normalize back to a + b + c + d = 1.0 float norm = 1.0f / (a + b + c + d); a *= norm; b *= norm; c *= norm; // could multiply by norm, but subtract to minimize rounding error d = 1.0f - a - b - c; } } // if vertexCount is not a power-of-2 then discard edges outside the vertex range if (x < vertexCount && y < vertexCount) { source.setValue(x); target.setValue(y); out.collect(sourceToTarget); edgesToGenerate--; } } } } }