org.data2semantics.giraph.pagerank.numerical.RandomWalkWorkerContext.java Source code

Java tutorial

Introduction

Here is the source code for org.data2semantics.giraph.pagerank.numerical.RandomWalkWorkerContext.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.data2semantics.giraph.pagerank.numerical;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Set;

import org.apache.giraph.worker.WorkerContext;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

import com.google.common.collect.ImmutableSet;

/**
 * Worker context for random walks.
 */
public class RandomWalkWorkerContext extends WorkerContext {
    /** Default maximum number of iterations */
    private static final int DEFAULT_MAX_SUPERSTEPS = 30;
    /** Default teleportation probability */
    private static final float DEFAULT_TELEPORTATION_PROBABILITY = 0.15f;
    /** Maximum number of iterations */
    private static int MAX_SUPERSTEPS;
    /** Teleportation probability */
    private static double TELEPORTATION_PROBABILITY;
    /** Preference vector */
    private static Set<String> SOURCES;

    /** Configuration parameter for the source vertex */
    private static final String SOURCE_VERTEX = RandomWalkWithRestartComputation.class.getName() + ".sourceVertex";

    /** Logger */
    private static final Logger LOG = Logger.getLogger(RandomWalkWorkerContext.class);

    /**
     * @return The maximum number of iterations to perform.
     */
    public int getMaxSupersteps() {
        if (MAX_SUPERSTEPS == 0) {
            throw new IllegalStateException(RandomWalkWorkerContext.class.getSimpleName()
                    + " was not initialized. Realunch your job " + "by setting the appropriate WorkerContext");
        }
        return MAX_SUPERSTEPS;
    }

    /**
     * @return The teleportation probability.
     */
    public double getTeleportationProbability() {
        if (TELEPORTATION_PROBABILITY == 0) {
            throw new IllegalStateException(RandomWalkWorkerContext.class.getSimpleName()
                    + " was not initialized. Realunch your job " + "by setting the appropriate WorkerContext");
        }
        return TELEPORTATION_PROBABILITY;
    }

    /**
     * Checks if a vertex is a source.
     * 
     * @param id
     *            The vertex ID to check.
     * @return True if the vertex is a source in the preference vector.
     */
    public boolean isSource(String id) {
        return SOURCES.contains(id);
    }

    /**
     * @return The number of sources in the preference vector.
     */
    public int numSources() {
        return SOURCES.size();
    }

    /**
     * Initialize sources for Random Walk with Restart. First option
     * (preferential) is single source given from the command line as a
     * parameter. Second option is a file with a list of vertex IDs, one per
     * line. In this second case the preference vector is a uniform distribution
     * over these vertexes.
     * 
     * @param configuration
     *            The configuration.
     * @return a (possibly empty) set of source vertices
     */
    private ImmutableSet<String> initializeSources(Configuration configuration) {
        ImmutableSet.Builder<String> builder = ImmutableSet.builder();
        String sourceVertex = configuration.get(SOURCE_VERTEX);
        if (sourceVertex != null) {
            return ImmutableSet.of(sourceVertex);
        } else {
            Path sourceFile = null;
            try {

                Path[] cacheFiles = DistributedCache.getLocalCacheFiles(configuration);
                if (cacheFiles == null || cacheFiles.length == 0) {
                    // empty set if no source vertices configured
                    return ImmutableSet.of();
                }

                sourceFile = cacheFiles[0];
                FileSystem fs = FileSystem.getLocal(configuration);
                BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(sourceFile)));
                String line;
                while ((line = in.readLine()) != null) {
                    builder.add(line);
                }
                in.close();
            } catch (IOException e) {
                getContext().setStatus("Could not load local cache files: " + sourceFile);
                LOG.error("Could not load local cache files: " + sourceFile, e);
            }
        }
        return builder.build();
    }

    @Override
    public void preApplication() throws InstantiationException, IllegalAccessException {
        Configuration configuration = getContext().getConfiguration();
        MAX_SUPERSTEPS = configuration.getInt(RandomWalkComputation.MAX_SUPERSTEPS, DEFAULT_MAX_SUPERSTEPS);
        TELEPORTATION_PROBABILITY = configuration.getFloat(RandomWalkComputation.TELEPORTATION_PROBABILITY,
                DEFAULT_TELEPORTATION_PROBABILITY);
        SOURCES = initializeSources(configuration);
    }

    @Override
    public void preSuperstep() {
    }

    @Override
    public void postSuperstep() {
    }

    @Override
    public void postApplication() {
    }
}