Java tutorial
/** * Copyright 2015 Delft University of Technology * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package nl.tudelft.graphalytics.graphlab; import nl.tudelft.graphalytics.Platform; import nl.tudelft.graphalytics.PlatformExecutionException; import nl.tudelft.graphalytics.configuration.ConfigurationUtil; import nl.tudelft.graphalytics.configuration.InvalidConfigurationException; import nl.tudelft.graphalytics.domain.*; import nl.tudelft.graphalytics.graphlab.bfs.BreadthFirstSearchJob; import nl.tudelft.graphalytics.graphlab.cd.CommunityDetectionJob; import nl.tudelft.graphalytics.graphlab.conn.ConnectedComponentsJob; import nl.tudelft.graphalytics.graphlab.stats.LocalClusteringCoefficientJob; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.commons.exec.CommandLine; import org.apache.commons.exec.DefaultExecutor; import org.apache.commons.exec.ExecuteException; import org.apache.commons.exec.PumpStreamHandler; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.*; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; /** * Entry point of the Graphalytics benchmark for Giraph. Provides the platform * API required by the Graphalytics core to perform operations such as uploading * graphs and executing specific algorithms on specific graphs. * @author Jorai Rijsdijk */ public class GraphLabPlatform implements Platform { private static final Logger LOG = LogManager.getLogger(); /** * Property key for setting whether to use hadoop or local execution/file storage. */ private static final String TARGET_KEY = "graphlab.target"; /** * Property key for setting the amount of virtual cores to use in the Hadoop environment. */ private static final String JOB_VIRTUALCORES_KEY = "graphlab.job.virtual-cores"; /** * Property key for setting the heap size for the Hadoop environment. */ private static final String JOB_HEAPSIZE_KEY = "graphlab.job.heap-size"; /** * Property key for the directory on HDFS in which to store all input and output. */ private static final String HDFS_DIRECTORY_KEY = "hadoop.hdfs.directory"; /** * Default value for the directory on HDFS in which to store all input and output. */ private static final String HDFS_DIRECTORY = "graphalytics"; private final String RELATIVE_PATH_TO_TARGET; private final String VIRTUAL_CORES; private final String HEAP_SIZE; private final String TARGET; private final boolean USE_HADOOP; private boolean saveGraphResult = false; private Map<String, String> pathsOfGraphs = new HashMap<>(); private org.apache.commons.configuration.Configuration graphlabConfig; private String hdfsDirectory; /** * Constructor that opens the Giraph-specific properties file for the public * API implementation to use. */ public GraphLabPlatform() { // Fill in the relative path from the working directory to the target directory containing the graphlab build output files, // or in the case of a distributed jar file, the location of the jar file itself. String absolutePath = GraphLabPlatform.class.getProtectionDomain().getCodeSource().getLocation().getPath(); absolutePath = absolutePath.substring(0, absolutePath.lastIndexOf('/')).replace("%20", " "); // The relative path is the path from the current working directory to the graphlab build output directory RELATIVE_PATH_TO_TARGET = new File(System.getProperty("user.dir")).toURI() .relativize(new File(absolutePath).toURI()).getPath(); loadConfiguration(); // Read the GraphLab specific configuration options that are the same for all algorithms TARGET = this.<String, String>getOption(TARGET_KEY, "local").toLowerCase(); USE_HADOOP = TARGET.equals("hadoop"); if (USE_HADOOP) { VIRTUAL_CORES = String.valueOf(getOption(JOB_VIRTUALCORES_KEY, 2)); HEAP_SIZE = String.valueOf(getOption(JOB_HEAPSIZE_KEY, 4096)); } else { VIRTUAL_CORES = ""; HEAP_SIZE = ""; } } private void loadConfiguration() { // Load GraphLab-specific configuration try { graphlabConfig = new PropertiesConfiguration("graphlab.properties"); } catch (ConfigurationException e) { // Fall-back to an empty properties file LOG.info("Could not find or load graphlab.properties."); graphlabConfig = new PropertiesConfiguration(); } hdfsDirectory = graphlabConfig.getString(HDFS_DIRECTORY_KEY, HDFS_DIRECTORY); } @Override public void uploadGraph(Graph graph, String graphFilePath) throws Exception { LOG.entry(graph, graphFilePath); if (USE_HADOOP) { String uploadPath = Paths.get(hdfsDirectory, getName(), "input", graph.getName()).toString(); // Upload the graph to HDFS FileSystem fs = FileSystem.get(new Configuration()); fs.copyFromLocalFile(new Path(graphFilePath), new Path(uploadPath)); fs.close(); // Track available datasets in a map pathsOfGraphs.put(graph.getName(), fs.getHomeDirectory().toUri() + "/" + uploadPath); } else { // Use local files, so just put the local file path in the map pathsOfGraphs.put(graph.getName(), graphFilePath); } LOG.exit(); } @Override public PlatformBenchmarkResult executeAlgorithmOnGraph(Algorithm algorithmType, Graph graph, Object parameters) throws PlatformExecutionException { LOG.entry(algorithmType, graph, parameters); int result; try { GraphLabJob job; String graphPath = pathsOfGraphs.get(graph.getName()); GraphFormat graphFormat = graph.getGraphFormat(); // Execute the GraphLab job switch (algorithmType) { case BFS: job = new BreadthFirstSearchJob(parameters, graphPath, graphFormat); break; case CD: job = new CommunityDetectionJob(parameters, graphPath, graphFormat); break; case CONN: job = new ConnectedComponentsJob(graphPath, graphFormat); break; // TODO: Implement ForestFireModel //case EVO: // job = new ForestFireModelJob(parameters, graphPath, graphFormat); // break; case STATS: job = new LocalClusteringCoefficientJob(graphPath, graphFormat); break; default: throw new IllegalArgumentException("Unsupported algorithm: " + algorithmType); } result = executePythonJob(job); // TODO: Clean up intermediate and output data, depending on some configuration. } catch (Exception e) { throw new PlatformExecutionException("GraphLab job failed with exception:", e); } if (result != 0) { throw new PlatformExecutionException("GraphLab job completed with exit code = " + result); } return LOG.exit(new PlatformBenchmarkResult(NestedConfiguration.empty())); } /** * Get a property from the GraphLab config. * The required type of the option is decided based on the type of the defaultValue. * The type of the property should be assignable from the type of the defaultValue, * as defined by: {@link Class#isAssignableFrom} * @param sourceProperty The key of the property * @param defaultValue The default value if the property is not set or of the wrong type * @param <T> The type of the property * @param <S> The type of the default value * @return The value of the property, if valid, or the default value */ private <T, S extends T> T getOption(String sourceProperty, S defaultValue) { try { ConfigurationUtil.ensureConfigurationKeyExists(graphlabConfig, sourceProperty); Object value = graphlabConfig.getProperty(sourceProperty); // Check if the value object is the same class or a superclass of the default value if (value.getClass().isAssignableFrom(defaultValue.getClass())) { return (T) value; } else { // If not, throw an exception throw new InvalidConfigurationException( "Invalid property type. Expected (superclass/instance of): \"" + defaultValue.getClass() + "\", but got: \"" + value.getClass() + "\"."); } } catch (InvalidConfigurationException e) { LOG.warn(e.getMessage() + " Defaulting to " + defaultValue + "."); } return defaultValue; } /** * Execute the python script belonging to a given AlgorithmType with the given graph location and extra arguments * and return the Process created by the Java Runtime. * @param job The GraphLab job to execute * @return The exit code of the python subprocess * @throws IOException When an I/O error occurs */ private int executePythonJob(GraphLabJob job) throws IOException { LOG.entry(job); if (job == null) { LOG.warn("GraphLab job set to execute is null, skipping execution."); return LOG.exit(-1); } // Extract the script resource file File scriptFile = extractFile(job.getPythonFile()); if (scriptFile == null) { return LOG.exit(-1); } // Construct the commandline execution pattern starting with the python executable CommandLine commandLine = new CommandLine("python2"); // Add the arguments that are the same for all jobs commandLine.addArgument(scriptFile.getAbsolutePath()); commandLine.addArgument("--target"); commandLine.addArgument(TARGET); if (USE_HADOOP) { commandLine.addArgument("--virtual-cores"); commandLine.addArgument(VIRTUAL_CORES, false); commandLine.addArgument("--heap-size"); commandLine.addArgument(HEAP_SIZE, false); } // Add the save_graph_result parameter is true (default false, but can be set to true for automated testing) if (saveGraphResult) { commandLine.addArgument("--save-result"); } // Let the job format it's arguments and add it to the commandline commandLine.addArguments(job.formatParametersAsStrings(), false); // Set the executor of the command, if desired this can be changed to a custom implementation DefaultExecutor executor = new DefaultExecutor(); // Set the OutputStream to enable printing the output of the algorithm ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); executor.setStreamHandler(new PumpStreamHandler(outputStream)); int result; try { // Execute the actual command and store the return code result = executor.execute(commandLine); // Print the command output System.out.println(outputStream.toString()); } catch (ExecuteException e) { // Catch the exception thrown when the process exits with result != 0 System.out.println(outputStream.toString()); LOG.catching(Level.ERROR, e); return LOG.exit(e.getExitValue()); } return LOG.exit(result); } /** * Extract a python file. * @param pythonFile The relative path to the python file * @return The File object of the extracted file * @throws IOException Whenever an IOException occurs in one of the file operations */ private File extractFile(String pythonFile) throws IOException { // Extract the script resource file File scriptFile = new File(RELATIVE_PATH_TO_TARGET, pythonFile); if (scriptFile.exists() && !scriptFile.canWrite()) { LOG.error("Cannot extract GraphLab " + pythonFile + " script to " + System.getProperty("user.dir") + RELATIVE_PATH_TO_TARGET + ", no write access on an already existing file."); return null; } else if (!scriptFile.exists() && !scriptFile.getParentFile().mkdirs() && !scriptFile.createNewFile()) { LOG.error("Cannot extract GraphLab " + pythonFile + " script to " + System.getProperty("user.dir") + RELATIVE_PATH_TO_TARGET + ", failed to create the appropriate files/directories."); return null; } // Actually extract the algorithm script InputStream pythonFileInputStream = GraphLabPlatform.class.getResourceAsStream(pythonFile); writeFileFromStream(pythonFileInputStream, scriptFile); return scriptFile; } /** * Extract a given resourceInputStream to the given outputFile, overwriting any preexisting file. * @param resourceInputStream The InputStream to copy from * @param outputFile The File to copy to * @throws IOException When an I/O error occurs */ private void writeFileFromStream(InputStream resourceInputStream, File outputFile) throws IOException { try (OutputStream outputStream = new FileOutputStream(outputFile)) { int read; byte[] bytes = new byte[1024]; while ((read = resourceInputStream.read(bytes)) != -1) { outputStream.write(bytes, 0, read); } } } /** * Set the parameter to enable/disable the algorithm argument to save the * processed graphs so they can be retrieved later. * @param saveGraphResult Whether or not to save the resulting graphs */ public void setSaveGraphResult(boolean saveGraphResult) { this.saveGraphResult = saveGraphResult; } @Override public void deleteGraph(String graphName) { if (USE_HADOOP) { // TODO: Clean up uploaded graph } } @Override public String getName() { return "graphlab"; } @Override public NestedConfiguration getPlatformConfiguration() { try { org.apache.commons.configuration.Configuration configuration = new PropertiesConfiguration( "graphlab.properties"); return NestedConfiguration.fromExternalConfiguration(configuration, "graphlab.properties"); } catch (ConfigurationException ex) { return NestedConfiguration.empty(); } } }