List of usage examples for org.apache.hadoop.conf Configuration getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:gov.jgi.meta.exec.BlatCommand.java
License:Open Source License
/** * new blast command based on values stored in the configuration. * <p/>/* w w w . j a v a 2 s . c o m*/ * Looks for the following config values: blast.commandline, * blast.commandpath, and blast.tmpdir * * @param config is the hadoop configuration with overriding values * for commandline options and paths */ public BlatCommand(Configuration config) throws IOException { String c; if ((c = config.get("blat.commandline")) != null) { commandLine = c; } else { commandLine = DEFAULTCOMMANDLINE; } if ((c = config.get("blat.commandpath")) != null) { commandPath = c; } else { commandPath = DEFAULTCOMMANDPATH; } if ((c = config.get("blat.tmpdir")) != null) { tmpDir = c; } else { tmpDir = DEFAULTTMPDIR; } doCleanup = config.getBoolean("blat.cleanup", true); paired = config.getBoolean("blat.paired", true); /* do sanity check to make sure all paths exist */ //checkFileExists(commandLine); //checkFileExists(commandPath); //checkDirExists(tmpDir); /* if all is good, create a working space inside tmpDir */ tmpDirFile = MetaUtils.createTempDir("blat_", tmpDir); }
From source file:gov.jgi.meta.exec.CapCommand.java
License:Open Source License
/** * new cap3 command based on values stored in the configuration. * <p/>/*from w w w . j a va 2s . c o m*/ * Looks for the following config values: blast.commandline, * blast.commandpath, and blast.tmpdir * * @param config is the hadoop configuration with overriding values * for commandline options and paths * @throws IOException if command can't be configured */ public CapCommand(Configuration config) throws IOException { String c; if ((c = config.get("cap3.commandline")) != null) { commandLine = c; } else { commandLine = DEFAULTCOMMANDLINE; } if ((c = config.get("cap3.commandpath")) != null) { commandPath = c; } else { commandPath = DEFAULTCOMMANDPATH; } if ((c = config.get("assembler.tmpdir")) != null) { tmpDir = c; } else { tmpDir = DEFAULTTMPDIR; } if ((c = config.get("recoverypath")) != null) { recoveryPath = c; } doCleanup = config.getBoolean("assembler.cleanup", true); /* * do sanity check to make sure all paths exist */ //checkFileExists(commandLine); //checkFileExists(commandPath); //checkDirExists(tmpDir); /* * if all is good, create a working space inside tmpDir */ tmpDirFile = MetaUtils.createTempDir("cap3_", tmpDir); }
From source file:gov.jgi.meta.exec.MinimusCommand.java
License:Open Source License
/** * new minimus command based on values stored in the configuration. * <p/>/* w w w.j a v a2 s . c o m*/ * * @param config is the hadoop configuration with overriding values * for commandline options and paths * @throws java.io.IOException if command can't be configured */ public MinimusCommand(Configuration config) throws IOException { String c; if ((c = config.get("minimus.commandline")) != null) { commandLine = c; } else { commandLine = DEFAULTCOMMANDLINE; } if ((c = config.get("minimus.commandpath")) != null) { commandPath = c; } else { commandPath = DEFAULTCOMMANDPATH; } if ((c = config.get("assembler.tmpdir")) != null) { tmpDir = c; } else { tmpDir = DEFAULTTMPDIR; } doCleanup = config.getBoolean("assembler.cleanup", true); /* * do sanity check to make sure all paths exist */ //checkFileExists(commandLine); //checkFileExists(commandPath); //checkDirExists(tmpDir); /* * if all is good, create a working space inside tmpDir */ tmpDirFile = MetaUtils.createTempDir("minimus_", tmpDir); }
From source file:gov.jgi.meta.exec.NewblerCommand.java
License:Open Source License
/** * new blast command based on values stored in the configuration. * <p/>/*from w ww .ja va2 s. c om*/ * Looks for the following config values: * <ul> * <li> newbler.commandline </li> * <li> newbler.commandpath</li> * <li> assembler.tmpdir</li> * <li> assembler.cleanup</li> * </ul> * * @param config is the hadoop configuration with overriding values * for commandline options and paths * @throws IOException if tmp working directory can't be created */ public NewblerCommand(Configuration config) throws IOException { log.info("initializing"); String c; if ((c = config.get("newbler.commandline")) != null) { newbler_commandLine = c; } else { newbler_commandLine = DEFAULT_NEWBLER_COMMANDLINE; } if ((c = config.get("newbler.commandpath")) != null) { newbler_commandPath = c; } else { newbler_commandPath = DEFAULT_NEWBLER_COMMANDPATH; } if ((c = config.get("assembler.tmpdir")) != null) { tmpDir = c; } else { tmpDir = DEFAULTTMPDIR; } doCleanup = config.getBoolean("assembler.cleanup", true); /* * do sanity check to make sure all paths exist */ //checkFileExists(commandLine); //checkFileExists(commandPath); //checkDirExists(tmpDir); /* * if all is good, create a working space inside tmpDir */ tmpDirFile = MetaUtils.createTempDir("newbler_", tmpDir); }
From source file:gov.jgi.meta.exec.TempCleanerCommand.java
License:Open Source License
public TempCleanerCommand(Configuration config) throws IOException { log.info("initializing"); String c;/* w w w . j av a 2 s . co m*/ if ((c = config.get("cleaner.commandline")) != null) { cleanup_commandLine = c; } else { cleanup_commandLine = "/bin/sh rm -rf "; } if ((c = config.get("assembler.tmpdir")) != null) { tmpDir = c; } else { tmpDir = DEFAULTTMPDIR; } doCleanup = config.getBoolean("assembler.cleanup", true); }
From source file:gov.jgi.meta.exec.VelvetCommand.java
License:Open Source License
/** * new blast command based on values stored in the configuration. * <p/>// ww w . j ava 2 s .co m * Looks for the following config values: * <ul> * <li> velveth.commandline </li> * <li> velveth.commandpath</li> * <li> velvetg.commandline</li> * <li> velvetg.commandpath</li> * <li> assembler.tmpdir</li> * <li> assembler.cleanup</li> * </ul> * * @param config is the hadoop configuration with overriding values * for commandline options and paths * @throws IOException if tmp working directory can't be created */ public VelvetCommand(Configuration config) throws IOException { log.info("initializing"); String c; if ((c = config.get("velveth.commandline")) != null) { velveth_commandLine = c; } else { velveth_commandLine = DEFAULT_VELVETH_COMMANDLINE; } if ((c = config.get("velveth.commandpath")) != null) { velveth_commandPath = c; } else { velveth_commandPath = DEFAULT_VELVETH_COMMANDPATH; } if ((c = config.get("velvetg.commandline")) != null) { velvetg_commandLine = c; } else { velvetg_commandLine = DEFAULT_VELVETG_COMMANDLINE; } if ((c = config.get("velvetg.commandpath")) != null) { velvetg_commandPath = c; } else { velvetg_commandPath = DEFAULT_VELVETG_COMMANDPATH; } if ((c = config.get("assembler.tmpdir")) != null) { tmpDir = c; } else { tmpDir = DEFAULTTMPDIR; } doCleanup = config.getBoolean("assembler.cleanup", true); /* * do sanity check to make sure all paths exist */ //checkFileExists(commandLine); //checkFileExists(commandPath); //checkDirExists(tmpDir); /* * if all is good, create a working space inside tmpDir */ tmpDirFile = MetaUtils.createTempDir("velvet_", tmpDir); }
From source file:gr.ntua.h2rdf.loadTriples.TotalOrderPartitioner.java
License:Apache License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys. *//*from w w w . j av a2 s . co m*/ @SuppressWarnings("unchecked") // keytype from conf not static public void setConf(Configuration conf) { try { this.conf = conf; String parts = getPartitionFile(conf); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); if (splitPoints.length > job.getNumReduceTasks() - 1) { System.out.println(job.getNumReduceTasks()); System.out.println(splitPoints.length); System.out.println("Wrong number of partitions in keyset:"); throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default depth // limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
From source file:grakn.core.server.session.computer.GraknSparkComputer.java
License:Open Source License
@SuppressWarnings("PMD.UnusedFormalParameter") private Future<ComputerResult> submitWithExecutor() { jobGroupId = Integer.toString(ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); String jobDescription = this.vertexProgram == null ? this.mapReducers.toString() : this.vertexProgram + "+" + this.mapReducers; // Use different output locations this.sparkConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, this.sparkConfiguration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + jobGroupId); updateConfigKeys(sparkConfiguration); final Future<ComputerResult> result = computerService.submit(() -> { final long startTime = System.currentTimeMillis(); ////////////////////////////////////////////////// /////// PROCESS SHIM AND SYSTEM PROPERTIES /////// ////////////////////////////////////////////////// final String shimService = KryoSerializer.class.getCanonicalName() .equals(this.sparkConfiguration.getString(Constants.SPARK_SERIALIZER, null)) ? UnshadedKryoShimService.class.getCanonicalName() : HadoopPoolShimService.class.getCanonicalName(); this.sparkConfiguration.setProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, shimService); /////////// final StringBuilder params = new StringBuilder(); this.sparkConfiguration.getKeys().forEachRemaining(key -> { if (KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES.contains(key)) { params.append(" -D").append("tinkerpop.").append(key).append("=") .append(this.sparkConfiguration.getProperty(key)); System.setProperty("tinkerpop." + key, this.sparkConfiguration.getProperty(key).toString()); }// w ww . ja v a2s . c om }); if (params.length() > 0) { this.sparkConfiguration.setProperty(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, (this.sparkConfiguration.getString(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, "") + params.toString()).trim()); this.sparkConfiguration.setProperty(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, (this.sparkConfiguration.getString(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, "") + params.toString()).trim()); } KryoShimServiceLoader.applyConfiguration(this.sparkConfiguration); ////////////////////////////////////////////////// ////////////////////////////////////////////////// ////////////////////////////////////////////////// // apache and hadoop configurations that are used throughout the graph computer computation final org.apache.commons.configuration.Configuration graphComputerConfiguration = new HadoopConfiguration( this.sparkConfiguration); if (!graphComputerConfiguration.containsKey(Constants.SPARK_SERIALIZER)) { graphComputerConfiguration.setProperty(Constants.SPARK_SERIALIZER, KryoSerializer.class.getCanonicalName()); if (!graphComputerConfiguration.containsKey(Constants.SPARK_KRYO_REGISTRATOR)) { graphComputerConfiguration.setProperty(Constants.SPARK_KRYO_REGISTRATOR, GryoRegistrator.class.getCanonicalName()); } } graphComputerConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES, this.persist.equals(GraphComputer.Persist.EDGES)); final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(graphComputerConfiguration); final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration); final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class)); final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class)); final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class)); final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class)); final boolean skipPartitioner = graphComputerConfiguration .getBoolean(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false); final boolean skipPersist = graphComputerConfiguration .getBoolean(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false); if (inputFromHDFS) { String inputLocation = Constants .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION), fileSystemStorage) .orElse(null); if (null != inputLocation) { try { graphComputerConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath() .toString()); hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath() .toString()); } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } } } final InputRDD inputRDD; final OutputRDD outputRDD; final boolean filtered; try { inputRDD = InputRDD.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class)) ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputRDD.class, InputRDD.class).newInstance() : InputFormatRDD.class.newInstance(); outputRDD = OutputRDD.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class)) ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, OutputRDD.class, OutputRDD.class).newInstance() : OutputFormatRDD.class.newInstance(); // if the input class can filter on load, then set the filters if (inputRDD instanceof InputFormatRDD && GraphFilterAware.class.isAssignableFrom(hadoopConfiguration.getClass( Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class))) { GraphFilterAware.storeGraphFilter(graphComputerConfiguration, hadoopConfiguration, this.graphFilter); filtered = false; } else if (inputRDD instanceof GraphFilterAware) { ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter); filtered = false; } else filtered = this.graphFilter.hasFilter(); } catch (final InstantiationException | IllegalAccessException e) { throw new IllegalStateException(e.getMessage(), e); } // create the spark context from the graph computer configuration final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration)); final Storage sparkContextStorage = SparkContextStorage.open(); sparkContext.setJobGroup(jobGroupId, jobDescription); GraknSparkMemory memory = null; // delete output location final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null); if (null != outputLocation) { if (outputToHDFS && fileSystemStorage.exists(outputLocation)) { fileSystemStorage.rm(outputLocation); } if (outputToSpark && sparkContextStorage.exists(outputLocation)) { sparkContextStorage.rm(outputLocation); } } // the Spark application name will always be set by SparkContextStorage, // thus, INFO the name to make it easier to debug logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "[" + this.mapReducers + "]"); // add the project jars to the cluster this.loadJars(hadoopConfiguration, sparkContext); updateLocalConfiguration(sparkContext, hadoopConfiguration); // create a message-passing friendly rdd from the input rdd boolean partitioned = false; JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(graphComputerConfiguration, sparkContext); // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting if (filtered) { this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter); loadedGraphRDD = GraknSparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter); } // if the loaded graph RDD is already partitioned use that partitioner, // else partition it with HashPartitioner if (loadedGraphRDD.partitioner().isPresent()) { this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: " + loadedGraphRDD.partitioner().get()); } else { if (!skipPartitioner) { final Partitioner partitioner = new HashPartitioner( this.workersSet ? this.workers : loadedGraphRDD.partitions().size()); this.logger.debug("Partitioning the loaded graphRDD: " + partitioner); loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner); partitioned = true; assert loadedGraphRDD.partitioner().isPresent(); } else { // no easy way to test this with a test case assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent(); this.logger.debug("Partitioning has been skipped for the loaded graphRDD via " + Constants.GREMLIN_SPARK_SKIP_PARTITIONER); } } // if the loaded graphRDD was already partitioned previous, // then this coalesce/repartition will not take place if (this.workersSet) { // ensures that the loaded graphRDD does not have more partitions than workers if (loadedGraphRDD.partitions().size() > this.workers) { loadedGraphRDD = loadedGraphRDD.coalesce(this.workers); } else { // ensures that the loaded graphRDD does not have less partitions than workers if (loadedGraphRDD.partitions().size() < this.workers) { loadedGraphRDD = loadedGraphRDD.repartition(this.workers); } } } // persist the vertex program loaded graph as specified by configuration // or else use default cache() which is MEMORY_ONLY if (!skipPersist && (!inputFromSpark || partitioned || filtered)) { loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString( hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY"))); } // final graph with view // (for persisting and/or mapReducing -- may be null and thus, possible to save space/time) JavaPairRDD<Object, VertexWritable> computedGraphRDD = null; try { //////////////////////////////// // process the vertex program // //////////////////////////////// if (null != this.vertexProgram) { memory = new GraknSparkMemory(this.vertexProgram, this.mapReducers, sparkContext); ///////////////// // if there is a registered VertexProgramInterceptor, use it to bypass the GraphComputer semantics if (graphComputerConfiguration .containsKey(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) { try { final GraknSparkVertexProgramInterceptor<VertexProgram> interceptor = (GraknSparkVertexProgramInterceptor) Class .forName(graphComputerConfiguration .getString(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) .newInstance(); computedGraphRDD = interceptor.apply(this.vertexProgram, loadedGraphRDD, memory); } catch (final ClassNotFoundException | IllegalAccessException | InstantiationException e) { throw new IllegalStateException(e.getMessage()); } } else { // standard GraphComputer semantics // get a configuration that will be propagated to all workers final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration(); this.vertexProgram.storeState(vertexProgramConfiguration); // set up the vertex program and wire up configurations this.vertexProgram.setup(memory); JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null; memory.broadcastMemory(sparkContext); // execute the vertex program while (true) { if (Thread.interrupted()) { sparkContext.cancelAllJobs(); throw new TraversalInterruptedException(); } memory.setInExecute(true); viewIncomingRDD = GraknSparkExecutor.executeVertexProgramIteration(loadedGraphRDD, viewIncomingRDD, memory, graphComputerConfiguration, vertexProgramConfiguration); memory.setInExecute(false); if (this.vertexProgram.terminate(memory)) { break; } else { memory.incrIteration(); memory.broadcastMemory(sparkContext); } } // if the graph will be continued to be used (persisted or mapreduced), // then generate a view+graph if ((null != outputRDD && !this.persist.equals(Persist.NOTHING)) || !this.mapReducers.isEmpty()) { computedGraphRDD = GraknSparkExecutor.prepareFinalGraphRDD(loadedGraphRDD, viewIncomingRDD, this.vertexProgram.getVertexComputeKeys()); assert null != computedGraphRDD && computedGraphRDD != loadedGraphRDD; } else { // ensure that the computedGraphRDD was not created assert null == computedGraphRDD; } } ///////////////// memory.complete(); // drop all transient memory keys // write the computed graph to the respective output (rdd or output format) if (null != outputRDD && !this.persist.equals(Persist.NOTHING)) { // the logic holds that a computeGraphRDD must be created at this point assert null != computedGraphRDD; outputRDD.writeGraphRDD(graphComputerConfiguration, computedGraphRDD); } } final boolean computedGraphCreated = computedGraphRDD != null && computedGraphRDD != loadedGraphRDD; if (!computedGraphCreated) { computedGraphRDD = loadedGraphRDD; } final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory); ////////////////////////////// // process the map reducers // ////////////////////////////// if (!this.mapReducers.isEmpty()) { // create a mapReduceRDD for executing the map reduce jobs on JavaPairRDD<Object, VertexWritable> mapReduceRDD = computedGraphRDD; if (computedGraphCreated && !outputToSpark) { // drop all the edges of the graph as they are not used in mapReduce processing mapReduceRDD = computedGraphRDD.mapValues(vertexWritable -> { vertexWritable.get().dropEdges(Direction.BOTH); return vertexWritable; }); // if there is only one MapReduce to execute, don't bother wasting the clock cycles. if (this.mapReducers.size() > 1) { mapReduceRDD = mapReduceRDD.persist(StorageLevel.fromString(hadoopConfiguration .get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY"))); } } for (final MapReduce mapReduce : this.mapReducers) { // execute the map reduce job final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration( graphComputerConfiguration); mapReduce.storeState(newApacheConfiguration); // map final JavaPairRDD mapRDD = GraknSparkExecutor.executeMap(mapReduceRDD, mapReduce, newApacheConfiguration); // combine final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE) ? GraknSparkExecutor.executeCombine(mapRDD, newApacheConfiguration) : mapRDD; // reduce final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE) ? GraknSparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration) : combineRDD; // write the map reduce output back to disk and computer result memory if (null != outputRDD) { mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD( graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD)); } } // if the mapReduceRDD is not simply the computed graph, unpersist the mapReduceRDD if (computedGraphCreated && !outputToSpark) { assert loadedGraphRDD != computedGraphRDD; assert mapReduceRDD != computedGraphRDD; mapReduceRDD.unpersist(); } else { assert mapReduceRDD == computedGraphRDD; } } // unpersist the loaded graph if it will not be used again (no PersistedInputRDD) // if the graphRDD was loaded from Spark, but then partitioned or filtered, its a different RDD if (!inputFromSpark || partitioned || filtered) { loadedGraphRDD.unpersist(); } // unpersist the computed graph if it will not be used again (no PersistedOutputRDD) // if the computed graph is the loadedGraphRDD because it was not mutated and not-unpersisted, // then don't unpersist the computedGraphRDD/loadedGraphRDD if ((!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING)) && computedGraphCreated) { computedGraphRDD.unpersist(); } // delete any file system or rdd data if persist nothing if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) { if (outputToHDFS) { fileSystemStorage.rm(outputLocation); } if (outputToSpark) { sparkContextStorage.rm(outputLocation); } } // update runtime and return the newly computed graph finalMemory.setRuntime(System.currentTimeMillis() - startTime); // clear properties that should not be propagated in an OLAP chain graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_GRAPH_FILTER); graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR); graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE); graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_PARTITIONER); return new DefaultComputerResult(InputOutputHelper.getOutputGraph(graphComputerConfiguration, this.resultGraph, this.persist), finalMemory.asImmutable()); } catch (Exception e) { // So it throws the same exception as tinker does throw new RuntimeException(e); } }); computerService.shutdown(); return result; }
From source file:grakn.core.server.session.reader.GraknBinaryInputFormat.java
License:Open Source License
@Override public void setConf(final Configuration config) { super.setConf(config); // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat ConfigHelper.setInputInitialAddress(config, janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]); if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) { ConfigHelper.setInputRpcPort(config, String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT))); }//from w w w . j a va 2 s .c om if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) { ConfigHelper.setInputKeyspaceUserName(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME)); } if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) { ConfigHelper.setInputKeyspacePassword(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD)); } // Copy keyspace, force the CF setting to edgestore, honor widerows when set final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false); // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false ConfigHelper.setInputColumnFamily(config, janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE), mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows); log.debug("Set keyspace: {}", janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE)); // Set the column slice bounds via Faunus' vertex query filter final SlicePredicate predicate = new SlicePredicate(); final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE); predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row ConfigHelper.setInputSlicePredicate(config, predicate); }
From source file:hadoop.api.AggregateAndRecommendReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); recommendationsPerUser = conf.getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS); booleanData = conf.getBoolean(RecommenderJob.BOOLEAN_DATA, false); indexItemIDMap = TasteHadoopUtils.readIDIndexMap(conf.get(ITEMID_INDEX_PATH), conf); String itemFilePathString = conf.get(ITEMS_FILE); if (itemFilePathString != null) { itemsToRecommendFor = new FastIDSet(); for (String line : new FileLineIterable(HadoopUtil.openStream(new Path(itemFilePathString), conf))) { try { itemsToRecommendFor.add(Long.parseLong(line)); } catch (NumberFormatException nfe) { log.warn("itemsFile line ignored: {}", line); }/* w w w . j a v a2 s .c o m*/ } } }