org.janusgraph.graphdb.olap.computer.FulgoraGraphComputer.java Source code

Java tutorial

Introduction

Here is the source code for org.janusgraph.graphdb.olap.computer.FulgoraGraphComputer.java

Source

// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.janusgraph.graphdb.olap.computer;

import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.janusgraph.core.JanusGraphException;
import org.janusgraph.core.JanusGraphComputer;
import org.janusgraph.core.JanusGraphTransaction;
import org.janusgraph.core.schema.JanusGraphManagement;
import org.janusgraph.diskstorage.configuration.Configuration;
import org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics;
import org.janusgraph.diskstorage.keycolumnvalue.scan.StandardScanner;
import org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration;
import org.janusgraph.graphdb.database.StandardJanusGraph;
import org.janusgraph.graphdb.util.WorkerPool;
import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
import org.apache.tinkerpop.gremlin.process.computer.GraphFilter;
import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
import org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey;
import org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult;
import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
import org.apache.tinkerpop.gremlin.process.computer.util.VertexProgramHelper;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.VertexProperty;
import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
import org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * @author Matthias Broecheler (me@matthiasb.com)
 */
public class FulgoraGraphComputer implements JanusGraphComputer {

    private static final Logger log = LoggerFactory.getLogger(FulgoraGraphComputer.class);

    private VertexProgram<?> vertexProgram;
    private final Set<MapReduce> mapReduces = new HashSet<>();

    private final StandardJanusGraph graph;
    private int expectedNumVertices = 10000;
    private FulgoraMemory memory;
    private FulgoraVertexMemory vertexMemory;
    private boolean executed = false;

    private int numThreads = 1;//Math.max(1,Runtime.getRuntime().availableProcessors());
    private int readBatchSize = 10000;
    private int writeBatchSize;

    private ResultGraph resultGraphMode = null;
    private Persist persistMode = null;

    private static final AtomicInteger computerCounter = new AtomicInteger(0);
    private String name;
    private String jobId;

    private final GraphFilter graphFilter = new GraphFilter();

    public FulgoraGraphComputer(final StandardJanusGraph graph, final Configuration configuration) {
        this.graph = graph;
        this.writeBatchSize = configuration.get(GraphDatabaseConfiguration.BUFFER_SIZE);
        this.readBatchSize = this.writeBatchSize * 10;
        this.name = "compute" + computerCounter.incrementAndGet();
    }

    @Override
    public GraphComputer vertices(final Traversal<Vertex, Vertex> vertexFilter) {
        this.graphFilter.setVertexFilter(vertexFilter);
        return this;
    }

    @Override
    public GraphComputer edges(final Traversal<Vertex, Edge> edgeFilter) {
        this.graphFilter.setEdgeFilter(edgeFilter);
        return this;
    }

    @Override
    public GraphComputer result(ResultGraph resultGraph) {
        Preconditions.checkArgument(resultGraph != null, "Need to specify mode");
        this.resultGraphMode = resultGraph;
        return this;
    }

    @Override
    public GraphComputer persist(Persist persist) {
        Preconditions.checkArgument(persist != null, "Need to specify mode");
        this.persistMode = persist;
        return this;
    }

    @Override
    public JanusGraphComputer workers(int threads) {
        Preconditions.checkArgument(threads > 0, "Invalid number of threads: %s", threads);
        numThreads = threads;
        return this;
    }

    @Override
    public GraphComputer program(final VertexProgram vertexProgram) {
        Preconditions.checkState(this.vertexProgram == null, "A vertex program has already been set");
        this.vertexProgram = vertexProgram;
        return this;
    }

    @Override
    public GraphComputer mapReduce(final MapReduce mapReduce) {
        this.mapReduces.add(mapReduce);
        return this;
    }

    @Override
    public Future<ComputerResult> submit() {
        if (executed)
            throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
        else
            executed = true;

        // it is not possible execute a computer if it has no vertex program nor mapreducers
        if (null == vertexProgram && mapReduces.isEmpty())
            throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
        // it is possible to run mapreducers without a vertex program
        if (null != vertexProgram) {
            GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
            this.mapReduces.addAll(this.vertexProgram.getMapReducers());
        }

        // if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
        this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram),
                Optional.ofNullable(this.persistMode));
        this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram),
                Optional.ofNullable(this.resultGraphMode));
        // determine the legality persistence and result graph options
        if (!this.features().supportsResultGraphPersistCombination(this.resultGraphMode, this.persistMode))
            throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraphMode,
                    this.persistMode);
        // ensure requested workers are not larger than supported workers
        if (this.numThreads > this.features().getMaxWorkers())
            throw GraphComputer.Exceptions.computerRequiresMoreWorkersThanSupported(this.numThreads,
                    this.features().getMaxWorkers());

        memory = new FulgoraMemory(vertexProgram, mapReduces);

        return CompletableFuture.<ComputerResult>supplyAsync(() -> {
            final long time = System.currentTimeMillis();
            if (null != vertexProgram) {
                // ##### Execute vertex program
                vertexMemory = new FulgoraVertexMemory(expectedNumVertices, graph.getIDManager(), vertexProgram);
                // execute the vertex program
                vertexProgram.setup(memory);

                try (VertexProgramScanJob.Executor job = VertexProgramScanJob.getVertexProgramScanJob(graph, memory,
                        vertexMemory, vertexProgram)) {
                    for (int iteration = 1;; iteration++) {
                        memory.completeSubRound();
                        vertexMemory.nextIteration(vertexProgram.getMessageScopes(memory));

                        jobId = name + "#" + iteration;
                        StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
                        scanBuilder.setJobId(jobId);
                        scanBuilder.setNumProcessingThreads(numThreads);
                        scanBuilder.setWorkBlockSize(readBatchSize);
                        scanBuilder.setJob(job);
                        PartitionedVertexProgramExecutor pvpe = new PartitionedVertexProgramExecutor(graph, memory,
                                vertexMemory, vertexProgram);
                        try {
                            //Iterates over all vertices and computes the vertex program on all non-partitioned vertices. For partitioned ones, the data is aggregated
                            ScanMetrics jobResult = scanBuilder.execute().get();
                            long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                            if (failures > 0) {
                                throw new JanusGraphException("Failed to process [" + failures
                                        + "] vertices in vertex program iteration [" + iteration
                                        + "]. Computer is aborting.");
                            }
                            //Runs the vertex program on all aggregated, partitioned vertices.
                            pvpe.run(numThreads, jobResult);
                            failures = jobResult
                                    .getCustom(PartitionedVertexProgramExecutor.PARTITION_VERTEX_POSTFAIL);
                            if (failures > 0) {
                                throw new JanusGraphException("Failed to process [" + failures
                                        + "] partitioned vertices in vertex program iteration [" + iteration
                                        + "]. Computer is aborting.");
                            }
                        } catch (Exception e) {
                            throw new JanusGraphException(e);
                        }

                        vertexMemory.completeIteration();
                        memory.completeSubRound();
                        try {
                            if (this.vertexProgram.terminate(this.memory)) {
                                break;
                            }
                        } finally {
                            memory.incrIteration();
                        }
                    }
                }
            }

            // ##### Execute mapreduce jobs
            // Collect map jobs
            Map<MapReduce, FulgoraMapEmitter> mapJobs = new HashMap<>(mapReduces.size());
            for (MapReduce mapReduce : mapReduces) {
                if (mapReduce.doStage(MapReduce.Stage.MAP)) {
                    FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(
                            mapReduce.doStage(MapReduce.Stage.REDUCE));
                    mapJobs.put(mapReduce, mapEmitter);
                }
            }
            // Execute map jobs
            jobId = name + "#map";
            try (VertexMapJob.Executor job = VertexMapJob.getVertexMapJob(graph, vertexMemory, mapJobs)) {
                StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
                scanBuilder.setJobId(jobId);
                scanBuilder.setNumProcessingThreads(numThreads);
                scanBuilder.setWorkBlockSize(readBatchSize);
                scanBuilder.setJob(job);
                try {
                    ScanMetrics jobResult = scanBuilder.execute().get();
                    long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                    if (failures > 0) {
                        throw new JanusGraphException("Failed to process [" + failures
                                + "] vertices in map phase. Computer is aborting.");
                    }
                    failures = jobResult.getCustom(VertexMapJob.MAP_JOB_FAILURE);
                    if (failures > 0) {
                        throw new JanusGraphException(
                                "Failed to process [" + failures + "] individual map jobs. Computer is aborting.");
                    }
                } catch (Exception e) {
                    throw new JanusGraphException(e);
                }
                // Execute reduce phase and add to memory
                for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
                    FulgoraMapEmitter<?, ?> mapEmitter = mapJob.getValue();
                    MapReduce mapReduce = mapJob.getKey();
                    mapEmitter.complete(mapReduce); // sort results if a map output sort is defined
                    if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
                        final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
                        try (WorkerPool workers = new WorkerPool(numThreads)) {
                            workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
                            for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
                                if (null == queueEntry)
                                    break;
                                workers.submit(() -> mapReduce.reduce(queueEntry.getKey(),
                                        ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
                            }
                            workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
                        } catch (Exception e) {
                            throw new JanusGraphException("Exception while executing reduce phase", e);
                        }
                        //                    mapEmitter.reduceMap.entrySet().parallelStream().forEach(entry -> mapReduce.reduce(entry.getKey(), entry.getValue().iterator(), reduceEmitter));

                        reduceEmitter.complete(mapReduce); // sort results if a reduce output sort is defined
                        mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
                    } else {
                        mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
                    }
                }
            }
            memory.attachReferenceElements(graph);

            // #### Write mutated properties back into graph
            Graph resultgraph = graph;
            if (persistMode == Persist.NOTHING && resultGraphMode == ResultGraph.NEW) {
                resultgraph = EmptyGraph.instance();
            } else if (persistMode != Persist.NOTHING && vertexProgram != null
                    && !vertexProgram.getVertexComputeKeys().isEmpty()) {
                //First, create property keys in graph if they don't already exist
                JanusGraphManagement mgmt = graph.openManagement();
                try {
                    for (VertexComputeKey key : vertexProgram.getVertexComputeKeys()) {
                        if (!mgmt.containsPropertyKey(key.getKey()))
                            log.warn(
                                    "Property key [{}] is not part of the schema and will be created. It is advised to initialize all keys.",
                                    key.getKey());
                        mgmt.getOrCreatePropertyKey(key.getKey());
                    }
                    mgmt.commit();
                } finally {
                    if (mgmt != null && mgmt.isOpen())
                        mgmt.rollback();
                }

                //TODO: Filter based on VertexProgram
                Map<Long, Map<String, Object>> mutatedProperties = Maps.transformValues(
                        vertexMemory.getMutableVertexProperties(),
                        new Function<Map<String, Object>, Map<String, Object>>() {
                            @Nullable
                            @Override
                            public Map<String, Object> apply(@Nullable Map<String, Object> o) {
                                return Maps.filterKeys(o, s -> !VertexProgramHelper.isTransientVertexComputeKey(s,
                                        vertexProgram.getVertexComputeKeys()));
                            }
                        });

                if (resultGraphMode == ResultGraph.ORIGINAL) {
                    AtomicInteger failures = new AtomicInteger(0);
                    try (WorkerPool workers = new WorkerPool(numThreads)) {
                        List<Map.Entry<Long, Map<String, Object>>> subset = new ArrayList<>(
                                writeBatchSize / vertexProgram.getVertexComputeKeys().size());
                        int currentSize = 0;
                        for (Map.Entry<Long, Map<String, Object>> entry : mutatedProperties.entrySet()) {
                            subset.add(entry);
                            currentSize += entry.getValue().size();
                            if (currentSize >= writeBatchSize) {
                                workers.submit(new VertexPropertyWriter(subset, failures));
                                subset = new ArrayList<>(subset.size());
                                currentSize = 0;
                            }
                        }
                        if (!subset.isEmpty())
                            workers.submit(new VertexPropertyWriter(subset, failures));
                    } catch (Exception e) {
                        throw new JanusGraphException("Exception while attempting to persist result into graph", e);
                    }
                    if (failures.get() > 0)
                        throw new JanusGraphException(
                                "Could not persist program results to graph. Check log for details.");
                } else if (resultGraphMode == ResultGraph.NEW) {
                    resultgraph = graph.newTransaction();
                    for (Map.Entry<Long, Map<String, Object>> vprop : mutatedProperties.entrySet()) {
                        Vertex v = resultgraph.vertices(vprop.getKey()).next();
                        for (Map.Entry<String, Object> prop : vprop.getValue().entrySet()) {
                            v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
                        }
                    }
                }
            }
            // update runtime and return the newly computed graph
            this.memory.setRuntime(System.currentTimeMillis() - time);
            this.memory.complete();
            return new DefaultComputerResult(resultgraph, this.memory);
        });
    }

    private class VertexPropertyWriter implements Runnable {

        private final List<Map.Entry<Long, Map<String, Object>>> properties;
        private final AtomicInteger failures;

        private VertexPropertyWriter(List<Map.Entry<Long, Map<String, Object>>> properties,
                AtomicInteger failures) {
            assert properties != null && !properties.isEmpty() && failures != null;
            this.properties = properties;
            this.failures = failures;
        }

        @Override
        public void run() {
            JanusGraphTransaction tx = graph.buildTransaction().enableBatchLoading().start();
            try {
                for (Map.Entry<Long, Map<String, Object>> vprop : properties) {
                    Vertex v = tx.getVertex(vprop.getKey());
                    for (Map.Entry<String, Object> prop : vprop.getValue().entrySet()) {
                        v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
                    }
                }
                tx.commit();
            } catch (Throwable e) {
                failures.incrementAndGet();
                log.error("Encountered exception while trying to write properties: ", e);
            } finally {
                if (tx != null && tx.isOpen())
                    tx.rollback();
            }
        }
    }

    @Override
    public String toString() {
        return StringFactory.graphComputerString(this);
    }

    @Override
    public Features features() {
        return new Features() {
            @Override
            public boolean supportsVertexAddition() {
                return false;
            }

            @Override
            public boolean supportsVertexRemoval() {
                return false;
            }

            @Override
            public boolean supportsVertexPropertyAddition() {
                return true;
            }

            @Override
            public boolean supportsVertexPropertyRemoval() {
                return false;
            }

            @Override
            public boolean supportsEdgeAddition() {
                return false;
            }

            @Override
            public boolean supportsEdgeRemoval() {
                return false;
            }

            @Override
            public boolean supportsEdgePropertyAddition() {
                return false;
            }

            @Override
            public boolean supportsEdgePropertyRemoval() {
                return false;
            }

            @Override
            public boolean supportsGraphFilter() {
                return false;
            }

        };
    }
}