List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:ml.shifu.shifu.core.processor.VarSelectModelProcessor.java
License:Apache License
private void prepareSEJobConf(SourceType source, Configuration conf) throws IOException { // add jars to hadoop mapper and reducer new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() }); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true); conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString()); conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString()); conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default")); conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString()); // set mapreduce.job.max.split.locations to 30 to suppress warnings conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 30); // Tmp set to false because of some cluster by default use gzip while CombineInputFormat will split gzip file (a // bug)/*from www . j av a 2s. c o m*/ conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, false); conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.9")); Float wrapperRatio = this.modelConfig.getVarSelect().getWrapperRatio(); if (wrapperRatio == null) { log.warn("wrapperRatio in var select is not set. Using default value 0.05."); wrapperRatio = 0.05f; } if (wrapperRatio.compareTo(Float.valueOf(1.0f)) >= 0) { throw new IllegalArgumentException("WrapperRatio should be in (0, 1)."); } conf.setFloat(Constants.SHIFU_VARSELECT_WRAPPER_RATIO, wrapperRatio); String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if (StringUtils.isNotBlank(hdpVersion)) { // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); } }
From source file:msc.fall2015.stock.kmeans.hbase.mapreduce.pwd.PairWiseAlignment.java
License:Open Source License
public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: <sequence_file> <sequence_count> <block_size> <weight>"); System.exit(2);//from ww w. j a v a 2s . com } /* input parameters */ String sequenceFile = args[1]; System.out.println(sequenceFile); // we are limited to int's as java loops supports only them int noOfSequences = Integer.parseInt(args[2]); // int noOfSequences = 7322; int blockSize = Integer.parseInt(args[3]); boolean weightCalculate = Boolean.parseBoolean(args[4]); // int blockSize = 7322; Configuration conf = new Configuration(); Job job = new Job(conf, "Pairwise-analysis"); /* create the base dir for this job. Delete and recreates if it exists */ Path hdMainDir = new Path(msc.fall2015.stock.kmeans.utils.Constants.HDFS_HOME_PATH + "swg-hadoop"); FileSystem fs = FileSystem.get(conf); fs.delete(hdMainDir, true); Path hdInputDir = new Path(hdMainDir, "data"); if (!fs.mkdirs(hdInputDir)) { throw new IOException("Mkdirs failed to create" + "/swg-hadoop/data"); } int noOfDivisions = (int) Math.ceil(noOfSequences / (double) blockSize); int noOfBlocks = (noOfDivisions * (noOfDivisions + 1)) / 2; System.out.println("No of divisions :" + noOfDivisions + "\nNo of blocks :" + noOfBlocks + "\nBlock size :" + blockSize); // Retrieving the configuration form the job to set the properties // Setting properties to the original conf does not work (possible // Hadoop bug) Configuration jobConf = job.getConfiguration(); // Input dir in HDFS. Create this in newly created job base dir Path inputDir = new Path(hdMainDir, "input"); if (!fs.mkdirs(inputDir)) { throw new IOException("Mkdirs failed to create " + inputDir.toString()); } Long dataPartitionStartTime = System.nanoTime(); partitionData(sequenceFile, noOfSequences, blockSize, fs, noOfDivisions, jobConf, inputDir); distributeData(blockSize, conf, fs, hdInputDir, noOfDivisions); long dataPartTime = (System.nanoTime() - dataPartitionStartTime) / 1000000; System.out.println("Data Partition & Scatter Completed in (ms):" + dataPartTime); // Output dir in HDFS Path hdOutDir = new Path(hdMainDir, "out"); jobConf.setInt(Constants.BLOCK_SIZE, blockSize); jobConf.setInt(Constants.NO_OF_DIVISIONS, noOfDivisions); jobConf.setInt(Constants.NO_OF_SEQUENCES, noOfSequences); jobConf.setBoolean(Constants.WEIGHT_ENABLED, weightCalculate); job.setJarByClass(PairWiseAlignment.class); job.setMapperClass(SWGMap.class); job.setReducerClass(SWGReduce.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(SWGWritable.class); FileInputFormat.setInputPaths(job, hdInputDir); FileOutputFormat.setOutputPath(job, hdOutDir); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks((int) noOfDivisions); long startTime = System.currentTimeMillis(); int exitStatus = job.waitForCompletion(true) ? 0 : 1; double executionTime = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + executionTime + " seconds"); if (args.length == 5) { FileWriter writer = new FileWriter(args[4]); writer.write("# #seq\t#blockS\tTtime\tinput\tdataDistTime\toutput"); writer.write("\n"); writer.write(noOfSequences + "\t" + noOfBlocks + "\t" + executionTime + "\t" + sequenceFile + "\t" + dataPartTime + "\t" + hdMainDir); writer.write("\n"); writer.flush(); writer.close(); } return exitStatus; }
From source file:mvm.rya.accumulo.mr.fileinput.BulkNtripsInputTool.java
License:Apache License
@Override public int run(final String[] args) throws Exception { final Configuration conf = getConf(); try {// w w w . j a v a 2 s . c om //conf zk = conf.get(MRUtils.AC_ZK_PROP, zk); ttl = conf.get(MRUtils.AC_TTL_PROP, ttl); instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance); userName = conf.get(MRUtils.AC_USERNAME_PROP, userName); pwd = conf.get(MRUtils.AC_PWD_PROP, pwd); workDirBase = conf.get(WORKDIR_PROP, workDirBase); format = conf.get(MRUtils.FORMAT_PROP, format); conf.set(MRUtils.FORMAT_PROP, format); final String inputDir = args[0]; ZooKeeperInstance zooKeeperInstance = new ZooKeeperInstance(instance, zk); Connector connector = zooKeeperInstance.getConnector(userName, new PasswordToken(pwd)); TableOperations tableOperations = connector.tableOperations(); if (conf.get(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS) != null) { throw new IllegalArgumentException("Cannot use Bulk N Trips tool with Additional Indexers"); } String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); if (tablePrefix != null) RdfCloudTripleStoreConstants.prefixTables(tablePrefix); String[] tables = { tablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX, tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, tablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX }; Collection<Job> jobs = new ArrayList<Job>(); for (final String tableName : tables) { PrintStream out = null; try { String workDir = workDirBase + "/" + tableName; System.out.println("Loading data into table[" + tableName + "]"); Job job = new Job(new Configuration(conf), "Bulk Ingest load data to Generic RDF Table[" + tableName + "]"); job.setJarByClass(this.getClass()); //setting long job Configuration jobConf = job.getConfiguration(); jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false); jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256")); jobConf.setBoolean("mapred.compress.map.output", true); // jobConf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); //TODO: I would like LZO compression job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParseNtripsMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setCombinerClass(OutStmtMutationsReducer.class); job.setReducerClass(OutStmtMutationsReducer.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); // AccumuloFileOutputFormat.setZooKeeperInstance(jobConf, instance, zk); jobConf.set(ParseNtripsMapper.TABLE_PROPERTY, tableName); TextInputFormat.setInputPaths(job, new Path(inputDir)); FileSystem fs = FileSystem.get(conf); Path workPath = new Path(workDir); if (fs.exists(workPath)) fs.delete(workPath, true); //make failures dir Path failures = new Path(workDir, "failures"); fs.delete(failures, true); fs.mkdirs(new Path(workDir, "failures")); AccumuloFileOutputFormat.setOutputPath(job, new Path(workDir + "/files")); out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt")))); if (!tableOperations.exists(tableName)) tableOperations.create(tableName); Collection<Text> splits = tableOperations.getSplits(tableName, Integer.MAX_VALUE); for (Text split : splits) out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split)))); job.setNumReduceTasks(splits.size() + 1); out.close(); job.setPartitionerClass(KeyRangePartitioner.class); RangePartitioner.setSplitFile(job, workDir + "/splits.txt"); jobConf.set(WORKDIR_PROP, workDir); job.submit(); jobs.add(job); } catch (Exception re) { throw new RuntimeException(re); } finally { if (out != null) out.close(); } } for (Job job : jobs) { while (!job.isComplete()) { Thread.sleep(1000); } } for (String tableName : tables) { String workDir = workDirBase + "/" + tableName; String filesDir = workDir + "/files"; String failuresDir = workDir + "/failures"; FileSystem fs = FileSystem.get(conf); //make sure that the "accumulo" user can read/write/execute into these directories this path fs.setPermission(new Path(filesDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); fs.setPermission(new Path(failuresDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); tableOperations.importDirectory(tableName, filesDir, failuresDir, false); } } catch (Exception e) { throw new RuntimeException(e); } return 0; }
From source file:mvm.rya.accumulo.mr.fileinput.BulkNtripsInputToolIndexing.java
License:Apache License
@Override public int run(final String[] args) throws Exception { final Configuration conf = getConf(); // conf/*w ww. j a v a 2 s .c o m*/ zk = conf.get(MRUtils.AC_ZK_PROP, zk); instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance); userName = conf.get(MRUtils.AC_USERNAME_PROP, userName); pwd = conf.get(MRUtils.AC_PWD_PROP, pwd); format = conf.get(MRUtils.FORMAT_PROP, format); String auths = conf.get(MRUtils.AC_CV_PROP, ""); conf.set(MRUtils.FORMAT_PROP, format); Preconditions.checkNotNull(zk, MRUtils.AC_ZK_PROP + " not set"); Preconditions.checkNotNull(instance, MRUtils.AC_INSTANCE_PROP + " not set"); Preconditions.checkNotNull(userName, MRUtils.AC_USERNAME_PROP + " not set"); Preconditions.checkNotNull(pwd, MRUtils.AC_PWD_PROP + " not set"); // map the config values to free text configu values conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance); conf.set(ConfigUtils.CLOUDBASE_USER, userName); conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd); conf.set(ConfigUtils.CLOUDBASE_AUTHS, auths); final String inputDir = args[0]; String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); Preconditions.checkNotNull(tablePrefix, MRUtils.TABLE_PREFIX_PROPERTY + " not set"); String docTextTable = tablePrefix + "text"; conf.set(ConfigUtils.FREE_TEXT_DOC_TABLENAME, docTextTable); String docTermTable = tablePrefix + "terms"; conf.set(ConfigUtils.FREE_TEXT_TERM_TABLENAME, docTermTable); String geoTable = tablePrefix + "geo"; conf.set(ConfigUtils.GEO_TABLENAME, geoTable); System.out.println("Loading data into tables[freetext, geo]"); System.out.println("Loading data into tables[" + docTermTable + " " + docTextTable + " " + geoTable + "]"); Job job = new Job(new Configuration(conf), "Bulk Ingest load data into Indexing Tables"); job.setJarByClass(this.getClass()); // setting long job Configuration jobConf = job.getConfiguration(); jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false); jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256")); jobConf.setBoolean("mapred.compress.map.output", true); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParseNtripsMapper.class); // I'm not actually going to write output. job.setOutputFormatClass(NullOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); TextInputFormat.setInputPaths(job, new Path(inputDir)); job.setNumReduceTasks(0); job.waitForCompletion(true); return 0; }
From source file:mvm.rya.accumulo.mr.fileinput.RyaBatchWriterInputTool.java
License:Apache License
@Override public int run(final String[] args) throws Exception { String userName = null;/*from w w w .j av a 2 s. co m*/ String pwd = null; String instance = null; String zk = null; String format = null; final Configuration conf = getConf(); // conf zk = conf.get(MRUtils.AC_ZK_PROP, zk); instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance); userName = conf.get(MRUtils.AC_USERNAME_PROP, userName); pwd = conf.get(MRUtils.AC_PWD_PROP, pwd); format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.NTRIPLES.getName()); String auths = conf.get(MRUtils.AC_CV_PROP, ""); conf.set(MRUtils.FORMAT_PROP, format); Preconditions.checkNotNull(zk, MRUtils.AC_ZK_PROP + " not set"); Preconditions.checkNotNull(instance, MRUtils.AC_INSTANCE_PROP + " not set"); Preconditions.checkNotNull(userName, MRUtils.AC_USERNAME_PROP + " not set"); Preconditions.checkNotNull(pwd, MRUtils.AC_PWD_PROP + " not set"); // map the config values to free text configure values conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance); conf.set(ConfigUtils.CLOUDBASE_USER, userName); conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd); conf.set(ConfigUtils.CLOUDBASE_AUTHS, auths); final String inputDir = args[0]; String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); Preconditions.checkNotNull(tablePrefix, MRUtils.TABLE_PREFIX_PROPERTY + " not set"); String docTextTable = tablePrefix + "text"; conf.set(ConfigUtils.FREE_TEXT_DOC_TABLENAME, docTextTable); String docTermTable = tablePrefix + "terms"; conf.set(ConfigUtils.FREE_TEXT_TERM_TABLENAME, docTermTable); String geoTable = tablePrefix + "geo"; conf.set(ConfigUtils.GEO_TABLENAME, geoTable); logger.info("Loading data into tables[rya, freetext, geo]"); logger.info("Loading data into tables[" + docTermTable + " " + docTextTable + " " + geoTable + "]"); Job job = new Job(new Configuration(conf), "Batch Writer load data into Rya Core and Indexing Tables"); job.setJarByClass(this.getClass()); // setting long job Configuration jobConf = job.getConfiguration(); jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); jobConf.setInt("mapred.task.timeout", 1000 * 60 * 60 * 24); // timeout after 1 day job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParseNtripsMapper.class); job.setNumReduceTasks(0); // Use Rya Output Format job.setOutputFormatClass(RyaOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(StatementWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(StatementWritable.class); TextInputFormat.setInputPaths(job, new Path(inputDir)); job.waitForCompletion(true); return 0; }
From source file:mvm.rya.accumulo.mr.utils.AccumuloHDFSFileInputFormat.java
License:Apache License
public static void main(String[] args) { try {//from w ww.j a v a 2 s.com Job job = new Job(new Configuration()); job.setJarByClass(AccumuloHDFSFileInputFormat.class); Configuration conf = job.getConfiguration(); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); AccumuloInputFormat.setConnectorInfo(job, "root", new PasswordToken("secret")); AccumuloInputFormat.setInputTableName(job, "l_spo"); AccumuloInputFormat.setScanAuthorizations(job, Constants.NO_AUTHS); AccumuloInputFormat.setZooKeeperInstance(job, "acu13", "stratus25:2181"); AccumuloInputFormat.setRanges(job, Collections.singleton(ALLRANGE)); job.setMapperClass(NullMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); if (args.length == 0) { job.setInputFormatClass(AccumuloHDFSFileInputFormat.class); } else { job.setInputFormatClass(AccumuloInputFormat.class); } job.waitForCompletion(true); } catch (Exception e) { e.printStackTrace(); } }
From source file:mvm.rya.accumulo.pig.IndexWritingTool.java
License:Apache License
@Override public int run(final String[] args) throws Exception { Preconditions.checkArgument(args.length == 7, "java " + IndexWritingTool.class.getCanonicalName() + " hdfsSaveLocation sparqlFile cbinstance cbzk cbuser cbpassword rdfTablePrefix."); final String inputDir = args[0]; final String sparqlFile = args[1]; final String instStr = args[2]; final String zooStr = args[3]; final String userStr = args[4]; final String passStr = args[5]; final String tablePrefix = args[6]; String sparql = FileUtils.readFileToString(new File(sparqlFile)); Job job = new Job(getConf(), "Write HDFS Index to Accumulo"); job.setJarByClass(this.getClass()); Configuration jobConf = job.getConfiguration(); jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); setVarOrders(sparql, jobConf);/*w ww.j av a 2 s .co m*/ TextInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); job.setNumReduceTasks(0); String tableName; if (zooStr.equals("mock")) { tableName = tablePrefix; } else { tableName = tablePrefix + "INDEX_" + UUID.randomUUID().toString().replace("-", "").toUpperCase(); } setAccumuloOutput(instStr, zooStr, userStr, passStr, job, tableName); jobConf.set(sparql_key, sparql); int complete = job.waitForCompletion(true) ? 0 : -1; if (complete == 0) { String[] varOrders = jobConf.getStrings("varOrders"); String orders = Joiner.on("\u0000").join(varOrders); Instance inst; if (zooStr.equals("mock")) { inst = new MockInstance(instStr); } else { inst = new ZooKeeperInstance(instStr, zooStr); } Connector conn = inst.getConnector(userStr, passStr.getBytes()); BatchWriter bw = conn.createBatchWriter(tableName, 10, 5000, 1); Counters counters = job.getCounters(); Counter c1 = counters.findCounter(cardCounter, cardCounter); Mutation m = new Mutation("~SPARQL"); Value v = new Value(sparql.getBytes()); m.put(new Text("" + c1.getValue()), new Text(orders), v); bw.addMutation(m); bw.close(); return complete; } else { return complete; } }
From source file:mvm.rya.cloudbase.giraph.format.BspCase.java
License:Apache License
/** * Adjust the configuration to the basic test case *//*from w ww . j a va 2s . com*/ public final void setupConfiguration(GiraphJob job) { Configuration conf = job.getConfiguration(); conf.set("mapred.jar", getJarLocation()); // Allow this test to be run on a real Hadoop setup if (getJobTracker() != null) { System.out.println("setup: Sending job to job tracker " + getJobTracker() + " with jar path " + getJarLocation() + " for " + getName()); conf.set("mapred.job.tracker", getJobTracker()); job.setWorkerConfiguration(getNumWorkers(), getNumWorkers(), 100.0f); } else { System.out.println( "setup: Using local job runner with " + "location " + getJarLocation() + " for " + getName()); job.setWorkerConfiguration(1, 1, 100.0f); // Single node testing conf.setBoolean(GiraphJob.SPLIT_MASTER_WORKER, false); } conf.setInt(GiraphJob.POLL_ATTEMPTS, 10); conf.setInt(GiraphJob.POLL_MSECS, 3 * 1000); conf.setInt(GiraphJob.ZOOKEEPER_SERVERLIST_POLL_MSECS, 500); if (getZooKeeperList() != null) { job.setZooKeeperConfiguration(getZooKeeperList()); } // GeneratedInputSplit will generate 5 vertices conf.setLong(GeneratedVertexReader.READER_VERTICES, 5); }
From source file:mvm.rya.indexing.external.ExternalIndexMain.java
License:Apache License
private static Configuration getConf() { Configuration conf = new Configuration(); conf.set(ConfigUtils.CLOUDBASE_USER, userStr); conf.set(ConfigUtils.CLOUDBASE_PASSWORD, passStr); conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instStr); conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zooStr); conf.set(ConfigUtils.CLOUDBASE_AUTHS, AUTHS); conf.setBoolean(ConfigUtils.DISPLAY_QUERY_PLAN, true); return conf;//w w w . ja va 2s . co m }
From source file:mvm.rya.joinselect.mr.JoinSelectAggregate.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String inPath1 = conf.get(PROSPECTS_OUTPUTPATH); String inPath2 = conf.get(SPO_OUTPUTPATH); String auths = conf.get(AUTHS); String outPath = conf.get(OUTPUTPATH); assert inPath1 != null && inPath2 != null && outPath != null; Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); JoinSelectStatsUtil.initJoinMRJob(job, inPath1, inPath2, JoinSelectAggregateMapper.class, outPath, auths); job.setSortComparatorClass(JoinSelectSortComparator.class); job.setGroupingComparatorClass(JoinSelectGroupComparator.class); job.setPartitionerClass(JoinSelectPartitioner.class); job.setReducerClass(JoinReducer.class); job.setNumReduceTasks(32);//from w w w. ja va2 s . c o m job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }