Example usage for org.apache.hadoop.conf Configuration getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue)

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:gov.jgi.meta.exec.BlatCommand.java

License:Open Source License

/**
 * new blast command based on values stored in the configuration.
 * <p/>/*  w  w  w . j a v  a  2 s .  c  o m*/
 * Looks for the following config values: blast.commandline,
 * blast.commandpath, and blast.tmpdir
 *
 * @param config is the hadoop configuration with overriding values
 *               for commandline options and paths
 */
public BlatCommand(Configuration config) throws IOException {

    String c;

    if ((c = config.get("blat.commandline")) != null) {
        commandLine = c;
    } else {
        commandLine = DEFAULTCOMMANDLINE;
    }
    if ((c = config.get("blat.commandpath")) != null) {
        commandPath = c;
    } else {
        commandPath = DEFAULTCOMMANDPATH;
    }
    if ((c = config.get("blat.tmpdir")) != null) {
        tmpDir = c;
    } else {
        tmpDir = DEFAULTTMPDIR;
    }

    doCleanup = config.getBoolean("blat.cleanup", true);
    paired = config.getBoolean("blat.paired", true);

    /*
    do sanity check to make sure all paths exist
    */
    //checkFileExists(commandLine);
    //checkFileExists(commandPath);
    //checkDirExists(tmpDir);

    /*
    if all is good, create a working space inside tmpDir
     */

    tmpDirFile = MetaUtils.createTempDir("blat_", tmpDir);

}

From source file:gov.jgi.meta.exec.CapCommand.java

License:Open Source License

/**
 * new cap3 command based on values stored in the configuration.
 * <p/>/*from   w w w .  j a va 2s  .  c o m*/
 * Looks for the following config values: blast.commandline,
 * blast.commandpath, and blast.tmpdir
 *
 * @param config is the hadoop configuration with overriding values
 *               for commandline options and paths
 * @throws IOException if command can't be configured
 */
public CapCommand(Configuration config) throws IOException {
    String c;

    if ((c = config.get("cap3.commandline")) != null) {
        commandLine = c;
    } else {
        commandLine = DEFAULTCOMMANDLINE;
    }
    if ((c = config.get("cap3.commandpath")) != null) {
        commandPath = c;
    } else {
        commandPath = DEFAULTCOMMANDPATH;
    }
    if ((c = config.get("assembler.tmpdir")) != null) {
        tmpDir = c;
    } else {
        tmpDir = DEFAULTTMPDIR;
    }

    if ((c = config.get("recoverypath")) != null) {
        recoveryPath = c;
    }

    doCleanup = config.getBoolean("assembler.cleanup", true);

    /*
     * do sanity check to make sure all paths exist
     */
    //checkFileExists(commandLine);
    //checkFileExists(commandPath);
    //checkDirExists(tmpDir);

    /*
     * if all is good, create a working space inside tmpDir
     */

    tmpDirFile = MetaUtils.createTempDir("cap3_", tmpDir);
}

From source file:gov.jgi.meta.exec.MinimusCommand.java

License:Open Source License

/**
 * new minimus command based on values stored in the configuration.
 * <p/>/*  w  w  w.j a v  a2 s  .  c o m*/
 *
 * @param config is the hadoop configuration with overriding values
 *               for commandline options and paths
 * @throws java.io.IOException if command can't be configured
 */
public MinimusCommand(Configuration config) throws IOException {
    String c;

    if ((c = config.get("minimus.commandline")) != null) {
        commandLine = c;
    } else {
        commandLine = DEFAULTCOMMANDLINE;
    }
    if ((c = config.get("minimus.commandpath")) != null) {
        commandPath = c;
    } else {
        commandPath = DEFAULTCOMMANDPATH;
    }
    if ((c = config.get("assembler.tmpdir")) != null) {
        tmpDir = c;
    } else {
        tmpDir = DEFAULTTMPDIR;
    }

    doCleanup = config.getBoolean("assembler.cleanup", true);

    /*
     * do sanity check to make sure all paths exist
     */
    //checkFileExists(commandLine);
    //checkFileExists(commandPath);
    //checkDirExists(tmpDir);

    /*
     * if all is good, create a working space inside tmpDir
     */

    tmpDirFile = MetaUtils.createTempDir("minimus_", tmpDir);
}

From source file:gov.jgi.meta.exec.NewblerCommand.java

License:Open Source License

/**
 * new blast command based on values stored in the configuration.
 * <p/>/*from w ww .ja  va2 s. c om*/
 * Looks for the following config values:
 * <ul>
 * <li> newbler.commandline </li>
 * <li> newbler.commandpath</li>
 * <li> assembler.tmpdir</li>
 * <li> assembler.cleanup</li>
 * </ul>
 *
 * @param config is the hadoop configuration with overriding values
 *               for commandline options and paths
 * @throws IOException if tmp working directory can't be created
 */
public NewblerCommand(Configuration config) throws IOException {
    log.info("initializing");
    String c;

    if ((c = config.get("newbler.commandline")) != null) {
        newbler_commandLine = c;
    } else {
        newbler_commandLine = DEFAULT_NEWBLER_COMMANDLINE;
    }
    if ((c = config.get("newbler.commandpath")) != null) {
        newbler_commandPath = c;
    } else {
        newbler_commandPath = DEFAULT_NEWBLER_COMMANDPATH;
    }

    if ((c = config.get("assembler.tmpdir")) != null) {
        tmpDir = c;
    } else {
        tmpDir = DEFAULTTMPDIR;
    }

    doCleanup = config.getBoolean("assembler.cleanup", true);

    /*
     * do sanity check to make sure all paths exist
     */
    //checkFileExists(commandLine);
    //checkFileExists(commandPath);
    //checkDirExists(tmpDir);

    /*
     * if all is good, create a working space inside tmpDir
     */

    tmpDirFile = MetaUtils.createTempDir("newbler_", tmpDir);
}

From source file:gov.jgi.meta.exec.TempCleanerCommand.java

License:Open Source License

public TempCleanerCommand(Configuration config) throws IOException {
    log.info("initializing");
    String c;/* w w w  . j av  a  2 s  . co  m*/

    if ((c = config.get("cleaner.commandline")) != null) {
        cleanup_commandLine = c;
    } else {
        cleanup_commandLine = "/bin/sh rm -rf ";
    }

    if ((c = config.get("assembler.tmpdir")) != null) {
        tmpDir = c;
    } else {
        tmpDir = DEFAULTTMPDIR;
    }

    doCleanup = config.getBoolean("assembler.cleanup", true);

}

From source file:gov.jgi.meta.exec.VelvetCommand.java

License:Open Source License

/**
 * new blast command based on values stored in the configuration.
 * <p/>//  ww  w  .  j  ava 2 s .co m
 * Looks for the following config values:
 * <ul>
 * <li> velveth.commandline </li>
 * <li> velveth.commandpath</li>
 * <li> velvetg.commandline</li>
 * <li> velvetg.commandpath</li>
 * <li> assembler.tmpdir</li>
 * <li> assembler.cleanup</li>
 * </ul>
 *
 * @param config is the hadoop configuration with overriding values
 *               for commandline options and paths
 * @throws IOException if tmp working directory can't be created
 */
public VelvetCommand(Configuration config) throws IOException {
    log.info("initializing");
    String c;

    if ((c = config.get("velveth.commandline")) != null) {
        velveth_commandLine = c;
    } else {
        velveth_commandLine = DEFAULT_VELVETH_COMMANDLINE;
    }
    if ((c = config.get("velveth.commandpath")) != null) {
        velveth_commandPath = c;
    } else {
        velveth_commandPath = DEFAULT_VELVETH_COMMANDPATH;
    }

    if ((c = config.get("velvetg.commandline")) != null) {
        velvetg_commandLine = c;
    } else {
        velvetg_commandLine = DEFAULT_VELVETG_COMMANDLINE;
    }
    if ((c = config.get("velvetg.commandpath")) != null) {
        velvetg_commandPath = c;
    } else {
        velvetg_commandPath = DEFAULT_VELVETG_COMMANDPATH;
    }

    if ((c = config.get("assembler.tmpdir")) != null) {
        tmpDir = c;
    } else {
        tmpDir = DEFAULTTMPDIR;
    }

    doCleanup = config.getBoolean("assembler.cleanup", true);

    /*
     * do sanity check to make sure all paths exist
     */
    //checkFileExists(commandLine);
    //checkFileExists(commandPath);
    //checkDirExists(tmpDir);

    /*
     * if all is good, create a working space inside tmpDir
     */

    tmpDirFile = MetaUtils.createTempDir("velvet_", tmpDir);
}

From source file:gr.ntua.h2rdf.loadTriples.TotalOrderPartitioner.java

License:Apache License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 *//*from   w w  w . j  av  a2 s  . co  m*/
@SuppressWarnings("unchecked") // keytype from conf not static
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);

        Job job = new Job(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length > job.getNumReduceTasks() - 1) {
            System.out.println(job.getNumReduceTasks());
            System.out.println(splitPoints.length);
            System.out.println("Wrong number of partitions in keyset:");
            throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length);
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are 
                    // represented reentrantly, and we develop a leaf for any trie
                    // node with only one split point, the only reason for a depth
                    // limit is to refute stack overflow or bloat in the pathological
                    // case where the split points are long and mostly look like bytes 
                    // iii...iixii...iii   .  Therefore, we make the default depth
                    // limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}

From source file:grakn.core.server.session.computer.GraknSparkComputer.java

License:Open Source License

@SuppressWarnings("PMD.UnusedFormalParameter")
private Future<ComputerResult> submitWithExecutor() {
    jobGroupId = Integer.toString(ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE));
    String jobDescription = this.vertexProgram == null ? this.mapReducers.toString()
            : this.vertexProgram + "+" + this.mapReducers;

    // Use different output locations
    this.sparkConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
            this.sparkConfiguration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + jobGroupId);

    updateConfigKeys(sparkConfiguration);

    final Future<ComputerResult> result = computerService.submit(() -> {
        final long startTime = System.currentTimeMillis();

        //////////////////////////////////////////////////
        /////// PROCESS SHIM AND SYSTEM PROPERTIES ///////
        //////////////////////////////////////////////////
        final String shimService = KryoSerializer.class.getCanonicalName()
                .equals(this.sparkConfiguration.getString(Constants.SPARK_SERIALIZER, null))
                        ? UnshadedKryoShimService.class.getCanonicalName()
                        : HadoopPoolShimService.class.getCanonicalName();
        this.sparkConfiguration.setProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, shimService);
        ///////////
        final StringBuilder params = new StringBuilder();
        this.sparkConfiguration.getKeys().forEachRemaining(key -> {
            if (KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES.contains(key)) {
                params.append(" -D").append("tinkerpop.").append(key).append("=")
                        .append(this.sparkConfiguration.getProperty(key));
                System.setProperty("tinkerpop." + key, this.sparkConfiguration.getProperty(key).toString());
            }// w ww  .  ja v  a2s  . c  om
        });
        if (params.length() > 0) {
            this.sparkConfiguration.setProperty(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS,
                    (this.sparkConfiguration.getString(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, "")
                            + params.toString()).trim());
            this.sparkConfiguration.setProperty(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
                    (this.sparkConfiguration.getString(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, "")
                            + params.toString()).trim());
        }
        KryoShimServiceLoader.applyConfiguration(this.sparkConfiguration);
        //////////////////////////////////////////////////
        //////////////////////////////////////////////////
        //////////////////////////////////////////////////
        // apache and hadoop configurations that are used throughout the graph computer computation
        final org.apache.commons.configuration.Configuration graphComputerConfiguration = new HadoopConfiguration(
                this.sparkConfiguration);
        if (!graphComputerConfiguration.containsKey(Constants.SPARK_SERIALIZER)) {
            graphComputerConfiguration.setProperty(Constants.SPARK_SERIALIZER,
                    KryoSerializer.class.getCanonicalName());
            if (!graphComputerConfiguration.containsKey(Constants.SPARK_KRYO_REGISTRATOR)) {
                graphComputerConfiguration.setProperty(Constants.SPARK_KRYO_REGISTRATOR,
                        GryoRegistrator.class.getCanonicalName());
            }
        }
        graphComputerConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES,
                this.persist.equals(GraphComputer.Persist.EDGES));

        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(graphComputerConfiguration);

        final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration);
        final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean skipPartitioner = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false);
        final boolean skipPersist = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false);

        if (inputFromHDFS) {
            String inputLocation = Constants
                    .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION),
                            fileSystemStorage)
                    .orElse(null);
            if (null != inputLocation) {
                try {
                    graphComputerConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                    hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                } catch (final IOException e) {
                    throw new IllegalStateException(e.getMessage(), e);
                }
            }
        }

        final InputRDD inputRDD;
        final OutputRDD outputRDD;
        final boolean filtered;
        try {
            inputRDD = InputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER,
                                    InputRDD.class, InputRDD.class).newInstance()
                            : InputFormatRDD.class.newInstance();
            outputRDD = OutputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER,
                                    OutputRDD.class, OutputRDD.class).newInstance()
                            : OutputFormatRDD.class.newInstance();

            // if the input class can filter on load, then set the filters
            if (inputRDD instanceof InputFormatRDD
                    && GraphFilterAware.class.isAssignableFrom(hadoopConfiguration.getClass(
                            Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class))) {
                GraphFilterAware.storeGraphFilter(graphComputerConfiguration, hadoopConfiguration,
                        this.graphFilter);
                filtered = false;
            } else if (inputRDD instanceof GraphFilterAware) {
                ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter);
                filtered = false;
            } else
                filtered = this.graphFilter.hasFilter();
        } catch (final InstantiationException | IllegalAccessException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }

        // create the spark context from the graph computer configuration
        final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration));
        final Storage sparkContextStorage = SparkContextStorage.open();

        sparkContext.setJobGroup(jobGroupId, jobDescription);

        GraknSparkMemory memory = null;
        // delete output location
        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
        if (null != outputLocation) {
            if (outputToHDFS && fileSystemStorage.exists(outputLocation)) {
                fileSystemStorage.rm(outputLocation);
            }
            if (outputToSpark && sparkContextStorage.exists(outputLocation)) {
                sparkContextStorage.rm(outputLocation);
            }
        }

        // the Spark application name will always be set by SparkContextStorage,
        // thus, INFO the name to make it easier to debug
        logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX
                + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "["
                + this.mapReducers + "]");

        // add the project jars to the cluster
        this.loadJars(hadoopConfiguration, sparkContext);
        updateLocalConfiguration(sparkContext, hadoopConfiguration);

        // create a message-passing friendly rdd from the input rdd
        boolean partitioned = false;
        JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(graphComputerConfiguration,
                sparkContext);

        // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting
        if (filtered) {
            this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter);
            loadedGraphRDD = GraknSparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter);
        }
        // if the loaded graph RDD is already partitioned use that partitioner,
        // else partition it with HashPartitioner
        if (loadedGraphRDD.partitioner().isPresent()) {
            this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: "
                    + loadedGraphRDD.partitioner().get());
        } else {
            if (!skipPartitioner) {
                final Partitioner partitioner = new HashPartitioner(
                        this.workersSet ? this.workers : loadedGraphRDD.partitions().size());
                this.logger.debug("Partitioning the loaded graphRDD: " + partitioner);
                loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner);
                partitioned = true;
                assert loadedGraphRDD.partitioner().isPresent();
            } else {
                // no easy way to test this with a test case
                assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent();

                this.logger.debug("Partitioning has been skipped for the loaded graphRDD via "
                        + Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            }
        }
        // if the loaded graphRDD was already partitioned previous,
        // then this coalesce/repartition will not take place
        if (this.workersSet) {
            // ensures that the loaded graphRDD does not have more partitions than workers
            if (loadedGraphRDD.partitions().size() > this.workers) {
                loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
            } else {
                // ensures that the loaded graphRDD does not have less partitions than workers
                if (loadedGraphRDD.partitions().size() < this.workers) {
                    loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
                }
            }
        }
        // persist the vertex program loaded graph as specified by configuration
        // or else use default cache() which is MEMORY_ONLY
        if (!skipPersist && (!inputFromSpark || partitioned || filtered)) {
            loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString(
                    hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
        }
        // final graph with view
        // (for persisting and/or mapReducing -- may be null and thus, possible to save space/time)
        JavaPairRDD<Object, VertexWritable> computedGraphRDD = null;
        try {
            ////////////////////////////////
            // process the vertex program //
            ////////////////////////////////
            if (null != this.vertexProgram) {
                memory = new GraknSparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                /////////////////
                // if there is a registered VertexProgramInterceptor, use it to bypass the GraphComputer semantics
                if (graphComputerConfiguration
                        .containsKey(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) {
                    try {
                        final GraknSparkVertexProgramInterceptor<VertexProgram> interceptor = (GraknSparkVertexProgramInterceptor) Class
                                .forName(graphComputerConfiguration
                                        .getString(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR))
                                .newInstance();
                        computedGraphRDD = interceptor.apply(this.vertexProgram, loadedGraphRDD, memory);
                    } catch (final ClassNotFoundException | IllegalAccessException | InstantiationException e) {
                        throw new IllegalStateException(e.getMessage());
                    }
                } else {
                    // standard GraphComputer semantics
                    // get a configuration that will be propagated to all workers
                    final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration();
                    this.vertexProgram.storeState(vertexProgramConfiguration);
                    // set up the vertex program and wire up configurations
                    this.vertexProgram.setup(memory);
                    JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;
                    memory.broadcastMemory(sparkContext);
                    // execute the vertex program
                    while (true) {
                        if (Thread.interrupted()) {
                            sparkContext.cancelAllJobs();
                            throw new TraversalInterruptedException();
                        }
                        memory.setInExecute(true);
                        viewIncomingRDD = GraknSparkExecutor.executeVertexProgramIteration(loadedGraphRDD,
                                viewIncomingRDD, memory, graphComputerConfiguration,
                                vertexProgramConfiguration);
                        memory.setInExecute(false);
                        if (this.vertexProgram.terminate(memory)) {
                            break;
                        } else {
                            memory.incrIteration();
                            memory.broadcastMemory(sparkContext);
                        }
                    }
                    // if the graph will be continued to be used (persisted or mapreduced),
                    // then generate a view+graph
                    if ((null != outputRDD && !this.persist.equals(Persist.NOTHING))
                            || !this.mapReducers.isEmpty()) {
                        computedGraphRDD = GraknSparkExecutor.prepareFinalGraphRDD(loadedGraphRDD,
                                viewIncomingRDD, this.vertexProgram.getVertexComputeKeys());
                        assert null != computedGraphRDD && computedGraphRDD != loadedGraphRDD;
                    } else {
                        // ensure that the computedGraphRDD was not created
                        assert null == computedGraphRDD;
                    }
                }
                /////////////////
                memory.complete(); // drop all transient memory keys
                // write the computed graph to the respective output (rdd or output format)
                if (null != outputRDD && !this.persist.equals(Persist.NOTHING)) {
                    // the logic holds that a computeGraphRDD must be created at this point
                    assert null != computedGraphRDD;

                    outputRDD.writeGraphRDD(graphComputerConfiguration, computedGraphRDD);
                }
            }

            final boolean computedGraphCreated = computedGraphRDD != null && computedGraphRDD != loadedGraphRDD;
            if (!computedGraphCreated) {
                computedGraphRDD = loadedGraphRDD;
            }

            final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory);

            //////////////////////////////
            // process the map reducers //
            //////////////////////////////
            if (!this.mapReducers.isEmpty()) {
                // create a mapReduceRDD for executing the map reduce jobs on
                JavaPairRDD<Object, VertexWritable> mapReduceRDD = computedGraphRDD;
                if (computedGraphCreated && !outputToSpark) {
                    // drop all the edges of the graph as they are not used in mapReduce processing
                    mapReduceRDD = computedGraphRDD.mapValues(vertexWritable -> {
                        vertexWritable.get().dropEdges(Direction.BOTH);
                        return vertexWritable;
                    });
                    // if there is only one MapReduce to execute, don't bother wasting the clock cycles.
                    if (this.mapReducers.size() > 1) {
                        mapReduceRDD = mapReduceRDD.persist(StorageLevel.fromString(hadoopConfiguration
                                .get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
                    }
                }

                for (final MapReduce mapReduce : this.mapReducers) {
                    // execute the map reduce job
                    final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration(
                            graphComputerConfiguration);
                    mapReduce.storeState(newApacheConfiguration);
                    // map
                    final JavaPairRDD mapRDD = GraknSparkExecutor.executeMap(mapReduceRDD, mapReduce,
                            newApacheConfiguration);
                    // combine
                    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE)
                            ? GraknSparkExecutor.executeCombine(mapRDD, newApacheConfiguration)
                            : mapRDD;
                    // reduce
                    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE)
                            ? GraknSparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration)
                            : combineRDD;
                    // write the map reduce output back to disk and computer result memory
                    if (null != outputRDD) {
                        mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(
                                graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));
                    }
                }
                // if the mapReduceRDD is not simply the computed graph, unpersist the mapReduceRDD
                if (computedGraphCreated && !outputToSpark) {
                    assert loadedGraphRDD != computedGraphRDD;
                    assert mapReduceRDD != computedGraphRDD;
                    mapReduceRDD.unpersist();
                } else {
                    assert mapReduceRDD == computedGraphRDD;
                }
            }

            // unpersist the loaded graph if it will not be used again (no PersistedInputRDD)
            // if the graphRDD was loaded from Spark, but then partitioned or filtered, its a different RDD
            if (!inputFromSpark || partitioned || filtered) {
                loadedGraphRDD.unpersist();
            }
            // unpersist the computed graph if it will not be used again (no PersistedOutputRDD)
            // if the computed graph is the loadedGraphRDD because it was not mutated and not-unpersisted,
            // then don't unpersist the computedGraphRDD/loadedGraphRDD
            if ((!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING))
                    && computedGraphCreated) {
                computedGraphRDD.unpersist();
            }
            // delete any file system or rdd data if persist nothing
            if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) {
                if (outputToHDFS) {
                    fileSystemStorage.rm(outputLocation);
                }
                if (outputToSpark) {
                    sparkContextStorage.rm(outputLocation);
                }
            }
            // update runtime and return the newly computed graph
            finalMemory.setRuntime(System.currentTimeMillis() - startTime);
            // clear properties that should not be propagated in an OLAP chain
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_GRAPH_FILTER);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            return new DefaultComputerResult(InputOutputHelper.getOutputGraph(graphComputerConfiguration,
                    this.resultGraph, this.persist), finalMemory.asImmutable());
        } catch (Exception e) {
            // So it throws the same exception as tinker does
            throw new RuntimeException(e);
        }
    });
    computerService.shutdown();
    return result;
}

From source file:grakn.core.server.session.reader.GraknBinaryInputFormat.java

License:Open Source License

@Override
public void setConf(final Configuration config) {
    super.setConf(config);

    // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config,
            janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) {
        ConfigHelper.setInputRpcPort(config,
                String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    }//from   w w w  . j  a va  2 s .c om
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) {
        ConfigHelper.setInputKeyspaceUserName(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
    }
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) {
        ConfigHelper.setInputKeyspacePassword(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD));
    }
    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config,
            janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE),
            mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows);
    log.debug("Set keyspace: {}", janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE));

    // Set the column slice bounds via Faunus' vertex query filter
    final SlicePredicate predicate = new SlicePredicate();
    final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}

From source file:hadoop.api.AggregateAndRecommendReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException {
    Configuration conf = context.getConfiguration();
    recommendationsPerUser = conf.getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS);
    booleanData = conf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
    indexItemIDMap = TasteHadoopUtils.readIDIndexMap(conf.get(ITEMID_INDEX_PATH), conf);

    String itemFilePathString = conf.get(ITEMS_FILE);
    if (itemFilePathString != null) {
        itemsToRecommendFor = new FastIDSet();
        for (String line : new FileLineIterable(HadoopUtil.openStream(new Path(itemFilePathString), conf))) {
            try {
                itemsToRecommendFor.add(Long.parseLong(line));
            } catch (NumberFormatException nfe) {
                log.warn("itemsFile line ignored: {}", line);
            }/* w  w  w  .  j a  v a2  s  .c o m*/
        }
    }
}