List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:jobs.MatrixBlockSum.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[2])); if (args.length >= 4) conf.setInt("SR", Integer.parseInt(args[3])); if (args.length >= 5) conf.setInt("SC", Integer.parseInt(args[4])); conf.set("RESNAME", args[1]); //heap space - again - should be passed with the -D option conf.set("mapred.map.child.java.opts", "-Xmx3G"); conf.set("mapred.reduce.child.java.opts", "-Xmx3G"); //job/*from w w w. j ava2s . c o m*/ Job job1 = new Job(conf, "MatrixBlockSum"); job1.setJarByClass(MatrixBlockSum.class); // No Map FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(SequenceFileInputFormat.class); //Reduce job1.setCombinerClass(MatrixBlockSumReducer.class); job1.setReducerClass(MatrixBlockSumReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(MatrixBlock.class); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.setOutputFormatClass(SequenceFileOutputFormat.class); //job1.setOutputFormatClass(TextOutputFormat.class); return job1.waitForCompletion(false) ? 0 : 1; }
From source file:jobs.MatrixBlockToTextRows.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.setInt("N", Integer.parseInt(args[2])); conf.setInt("NC", Integer.parseInt(args[3])); conf.setInt("SC", Integer.parseInt(args[4])); conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[5])); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t"); //heap space - again - should be passed with the -D option conf.set("mapred.map.child.java.opts", "-Xmx3G"); conf.set("mapred.reduce.child.java.opts", "-Xmx3G"); //job//from w w w .j a v a 2 s . co m Job job1 = new Job(conf, "MatrixBlockToTextRows"); job1.setJarByClass(MatrixBlockToTextRows.class); // No Map FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setMapperClass(MatrixBlockRowPartMapper.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(RowPart.class); //Reduce job1.setReducerClass(RowPartTextRowsReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(MatrixBlock.class); FileOutputFormat.setOutputPath(job1, new Path(args[1])); //job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setOutputFormatClass(TextOutputFormat.class); return job1.waitForCompletion(true) ? 0 : 1; }
From source file:jobs.MatrixBlockTraceMult.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.setFloat("SCALAR", Float.parseFloat(args[3])); conf.setBoolean("LTRANS", Boolean.parseBoolean(args[4])); conf.setBoolean("RTRANS", Boolean.parseBoolean(args[5])); //set # of reducers conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[6])); //Get optional blocksize parameters if (args.length >= 8) conf.setInt("SRL", Integer.parseInt(args[7])); if (args.length >= 9) conf.setInt("SCL", Integer.parseInt(args[8])); if (args.length >= 10) conf.setInt("SRR", Integer.parseInt(args[9])); if (args.length >= 11) conf.setInt("SCR", Integer.parseInt(args[10])); conf.set("LEFTNAME", args[0]); conf.set("RIGHTNAME", args[1]); //heap space - should be entered with the -D format and not dealt with by the program. conf.set("mapred.map.child.java.opts", "-Xmx3G"); conf.set("mapred.reduce.child.java.opts", "-Xmx3G"); //job//ww w . j a va2s. c o m Job job1 = new Job(conf, "MatrixBlockTraceMult"); job1.setJarByClass(MatrixBlockMult.class); // Map FileInputFormat.addInputPath(job1, new Path(args[0])); FileInputFormat.addInputPath(job1, new Path(args[1])); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setMapperClass(SquareBlockTraceMultiplicationGroupingMapper.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(MatrixBlock.class); //Reduce job1.setReducerClass(SquareMatrixBlockTraceMultReducer.class); job1.setOutputKeyClass(NullWritable.class); job1.setOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job1, new Path(args[2])); job1.setOutputFormatClass(TextOutputFormat.class); return job1.waitForCompletion(false) ? 0 : 1; }
From source file:kafka.bridge.hadoop.KafkaOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, W> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Path outputPath = getOutputPath(context); if (outputPath == null) throw new IllegalArgumentException("no kafka output url specified"); URI uri = outputPath.toUri(); Configuration job = context.getConfiguration(); final String topic = uri.getPath().substring(1); // ignore the initial '/' in the path final int queueSize = job.getInt("kafka.output.queue_size", KAFKA_QUEUE_SIZE); final int timeout = job.getInt("kafka.output.connect_timeout", KAFKA_PRODUCER_CONNECT_TIMEOUT); final int interval = job.getInt("kafka.output.reconnect_interval", KAFKA_PRODUCER_RECONNECT_INTERVAL); final int bufSize = job.getInt("kafka.output.bufsize", KAFKA_PRODUCER_BUFFER_SIZE); final int maxSize = job.getInt("kafka.output.max_msgsize", KAFKA_PRODUCER_MAX_MESSAGE_SIZE); job.set("kafka.output.server", String.format("%s:%d", uri.getHost(), uri.getPort())); job.set("kafka.output.topic", topic); job.setInt("kafka.output.queue_size", queueSize); job.setInt("kafka.output.connect_timeout", timeout); job.setInt("kafka.output.reconnect_interval", interval); job.setInt("kafka.output.bufsize", bufSize); job.setInt("kafka.output.max_msgsize", maxSize); if (uri.getHost().isEmpty()) throw new IllegalArgumentException("missing kafka server"); if (uri.getPath().isEmpty()) throw new IllegalArgumentException("missing kafka topic"); Properties props = new Properties(); props.setProperty("host", uri.getHost()); props.setProperty("port", Integer.toString(uri.getPort())); props.setProperty("buffer.size", Integer.toString(bufSize)); props.setProperty("connect.timeout.ms", Integer.toString(timeout)); props.setProperty("reconnect.interval", Integer.toString(interval)); props.setProperty("max.message.size", Integer.toString(maxSize)); SyncProducer producer = new SyncProducer(new SyncProducerConfig(props)); return new KafkaRecordWriter<W>(producer, topic, queueSize); }
From source file:kogiri.hadoop.common.config.ClusterConfiguration.java
License:Open Source License
@JsonIgnore public void configureTo(Configuration conf) { for (ConfigurationParam param : this.externalParams) { if (param.isValueInt()) { conf.setInt(param.getKey(), param.getValueAsInt()); } else {//from w w w . j ava 2 s .c o m conf.set(param.getKey(), param.getValue()); } } }
From source file:ldbc.socialnet.dbgen.generator.MRGenerateUsers.java
License:Open Source License
public int runGenerateJob(Configuration conf) throws Exception { FileSystem fs = FileSystem.get(conf); String hadoopDir = new String(conf.get("outputDir") + "/hadoop"); String socialNetDir = new String(conf.get("outputDir") + "/social_network"); int numThreads = Integer.parseInt(conf.get("numThreads")); System.out.println("NUMBER OF THREADS " + numThreads); /// --------- Execute Jobs ------ long start = System.currentTimeMillis(); /// --------------- First job Generating users---------------- printProgress("Starting: Person generation"); conf.set("pass", Integer.toString(0)); Job job = new Job(conf, "SIB Generate Users & 1st Dimension"); job.setMapOutputKeyClass(TupleKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(GenerateUsersMapper.class); job.setMapperClass(GenerateUsersMapper.class); job.setNumReduceTasks(numThreads);//w w w. ja v a 2s . co m job.setInputFormatClass(NLineInputFormat.class); conf.setInt("mapred.line.input.format.linespermap", 1); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir) + "/mrInputFile"); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib")); job.waitForCompletion(true); /// --------------- Sorting by first dimension ---------------- printProgress("Starting: Sorting by first dimension"); HadoopFileRanker fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib", hadoopDir + "/sibSorting"); fs.delete(new Path(hadoopDir + "/sib"), true); /// --------------- job Generating First dimension Friendships ---------------- printProgress("Starting: Friendship generation 1."); conf.set("pass", Integer.toString(0)); conf.set("dimension", Integer.toString(1)); job = new Job(conf, "SIB Generate Friendship - Interest"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(DimensionReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib2")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sibSorting"), true); /// --------------- Sorting phase 2 ---------------- printProgress("Starting: Sorting by second dimension"); fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib2", hadoopDir + "/sibSorting2"); fs.delete(new Path(hadoopDir + "/sib2"), true); /// --------------- Second job Generating Friendships ---------------- printProgress("Starting: Friendship generation 2."); conf.set("pass", Integer.toString(1)); conf.set("dimension", Integer.toString(2)); job = new Job(conf, "SIB Generate Friendship - Interest"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(DimensionReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting2")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib3")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sibSorting2"), true); /// --------------- Sorting phase 3-------------- printProgress("Starting: Sorting by third dimension"); fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib3", hadoopDir + "/sibSorting3"); fs.delete(new Path(hadoopDir + "/sib3"), true); /// --------------- Third job Generating Friendships---------------- printProgress("Starting: Friendship generation 3."); conf.set("pass", Integer.toString(2)); conf.set("dimension", Integer.toString(2)); job = new Job(conf, "SIB Generate Friendship - Random"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(DimensionReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting3")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib4")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sibSorting3"), true); /// --------------- Sorting phase 3-------------- printProgress("Starting: Sorting by third dimension (for activity generation)"); fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib4", hadoopDir + "/sibSorting4"); fs.delete(new Path(hadoopDir + "/sib4"), true); /// --------------- Fourth job: Serialize static network ---------------- printProgress("Starting: Generating person activity"); job = new Job(conf, "Generate user activity"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(UserActivityReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting4")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib5")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sib5"), true); int numEvents = 0; long min = Long.MAX_VALUE; long max = Long.MIN_VALUE; if (conf.getBoolean("updateStreams", false)) { for (int i = 0; i < numThreads; ++i) { int numPartitions = conf.getInt("numUpdatePartitions", 1); for (int j = 0; j < numPartitions; ++j) { /// --------------- Fifth job: Sort update streams ---------------- conf.setInt("mapred.line.input.format.linespermap", 1000000); conf.setInt("reducerId", i); conf.setInt("partitionId", j); conf.set("streamType", "forum"); Job jobForum = new Job(conf, "Soring update streams " + j + " of reducer " + i); jobForum.setMapOutputKeyClass(LongWritable.class); jobForum.setMapOutputValueClass(Text.class); jobForum.setOutputKeyClass(LongWritable.class); jobForum.setOutputValueClass(Text.class); jobForum.setJarByClass(UpdateEventMapper.class); jobForum.setMapperClass(UpdateEventMapper.class); jobForum.setReducerClass(UpdateEventReducer.class); jobForum.setNumReduceTasks(1); jobForum.setInputFormatClass(SequenceFileInputFormat.class); jobForum.setOutputFormatClass(SequenceFileOutputFormat.class); jobForum.setPartitionerClass(UpdateEventPartitioner.class); FileInputFormat.addInputPath(jobForum, new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum")); FileOutputFormat.setOutputPath(jobForum, new Path(hadoopDir + "/sibEnd")); printProgress("Starting: Sorting update streams"); jobForum.waitForCompletion(true); fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum"), false); fs.delete(new Path(hadoopDir + "/sibEnd"), true); conf.setInt("mapred.line.input.format.linespermap", 1000000); conf.setInt("reducerId", i); conf.setInt("partitionId", j); conf.set("streamType", "person"); Job jobPerson = new Job(conf, "Soring update streams " + j + " of reducer " + i); jobPerson.setMapOutputKeyClass(LongWritable.class); jobPerson.setMapOutputValueClass(Text.class); jobPerson.setOutputKeyClass(LongWritable.class); jobPerson.setOutputValueClass(Text.class); jobPerson.setJarByClass(UpdateEventMapper.class); jobPerson.setMapperClass(UpdateEventMapper.class); jobPerson.setReducerClass(UpdateEventReducer.class); jobPerson.setNumReduceTasks(1); jobPerson.setInputFormatClass(SequenceFileInputFormat.class); jobPerson.setOutputFormatClass(SequenceFileOutputFormat.class); jobPerson.setPartitionerClass(UpdateEventPartitioner.class); FileInputFormat.addInputPath(jobPerson, new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person")); FileOutputFormat.setOutputPath(jobPerson, new Path(hadoopDir + "/sibEnd")); printProgress("Starting: Sorting update streams"); jobPerson.waitForCompletion(true); fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person"), false); fs.delete(new Path(hadoopDir + "/sibEnd"), true); if (conf.getBoolean("updateStreams", false)) { Properties properties = new Properties(); FSDataInputStream file = fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties")); properties.load(file); if (properties.getProperty("min_write_event_start_time") != null) { Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time")); min = auxMin < min ? auxMin : min; Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time")); max = auxMax > max ? auxMax : max; numEvents += Long.parseLong(properties.getProperty("num_events")); } file.close(); file = fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties")); properties.load(file); if (properties.getProperty("min_write_event_start_time") != null) { Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time")); min = auxMin < min ? auxMin : min; Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time")); max = auxMax > max ? auxMax : max; numEvents += Long.parseLong(properties.getProperty("num_events")); } file.close(); fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties"), true); fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties"), true); } } } if (conf.getBoolean("updateStreams", false)) { OutputStream output = fs .create(new Path(conf.get("outputDir") + "/social_network/updateStream.properties")); output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + conf.get("deltaTime") + "\n") .getBytes()); output.write( new String("ldbc.snb.interactive.min_write_event_start_time:" + min + "\n").getBytes()); output.write( new String("ldbc.snb.interactive.max_write_event_start_time:" + max + "\n").getBytes()); output.write(new String("ldbc.snb.interactive.update_interleave:" + (max - min) / numEvents + "\n") .getBytes()); output.write(new String("ldbc.snb.interactive.num_events:" + numEvents).getBytes()); output.close(); } } /// --------------- Sixth job: Materialize the friends lists ---------------- /* Job job6 = new Job(conf,"Dump the friends lists"); job6.setMapOutputKeyClass(ComposedKey.class); job6.setMapOutputValueClass(ReducedUserProfile.class); job6.setOutputKeyClass(ComposedKey.class); job6.setOutputValueClass(ReducedUserProfile.class); job6.setJarByClass(HadoopBlockMapper.class); job6.setMapperClass(HadoopBlockMapper.class); job6.setReducerClass(FriendListOutputReducer.class); job6.setNumReduceTasks(numThreads); job6.setInputFormatClass(SequenceFileInputFormat.class); job6.setOutputFormatClass(SequenceFileOutputFormat.class); job6.setPartitionerClass(HadoopBlockPartitioner.class); job6.setSortComparatorClass(ComposedKeyComparator.class); job6.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job6, new Path(hadoopDir + "/sibSorting4")); FileOutputFormat.setOutputPath(job6, new Path(hadoopDir + "/job6") ); printProgress("Starting: Materialize friends for substitution parameters"); int resMaterializeFriends = job6.waitForCompletion(true) ? 0 : 1; fs.delete(new Path(hadoopDir + "/sibSorting3"),true); */ long end = System.currentTimeMillis(); System.out.println(((end - start) / 1000) + " total seconds"); for (int i = 0; i < numThreads; ++i) { fs.copyToLocalFile(new Path(socialNetDir + "/m" + i + "factors.txt"), new Path("./")); fs.copyToLocalFile(new Path(socialNetDir + "/m0friendList" + i + ".csv"), new Path("./")); } return 0; }
From source file:libra.common.hadoop.io.format.fasta.FastaKmerInputFormat.java
License:Apache License
public static void setKmerSize(Configuration conf, int k) { conf.setInt(CONF_KMER_SIZE, k); }
From source file:ml.shifu.guagua.mapreduce.example.nn.NNMapReduceClient.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 6) { throw new IllegalArgumentException( "NNMapReduceClient: Must have at least 5 arguments <guagua.iteration.count> <guagua.zk.servers> <nn.test.scale> <nn.record.scales> <input path or folder> <guagua.nn.output>. "); }//from w ww.ja va 2 s .c o m conf.set(GuaguaConstants.WORKER_COMPUTABLE_CLASS, NNWorker.class.getName()); conf.set(GuaguaConstants.MASTER_COMPUTABLE_CLASS, NNMaster.class.getName()); conf.set(GuaguaConstants.GUAGUA_ITERATION_COUNT, otherArgs[0]); conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, otherArgs[1]); conf.set(NNConstants.NN_TEST_SCALE, otherArgs[2]); conf.set(NNConstants.NN_RECORD_SCALE, otherArgs[3]); conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, NNParams.class.getName()); conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, NNParams.class.getName()); conf.setInt(NNConstants.GUAGUA_NN_INPUT_NODES, NNConstants.GUAGUA_NN_DEFAULT_INPUT_NODES); conf.setInt(NNConstants.GUAGUA_NN_HIDDEN_NODES, NNConstants.GUAGUA_NN_DEFAULT_HIDDEN_NODES); conf.setInt(NNConstants.GUAGUA_NN_OUTPUT_NODES, NNConstants.GUAGUA_NN_DEFAULT_OUTPUT_NODES); conf.set(NNConstants.GUAGUA_NN_ALGORITHM, NNConstants.GUAGUA_NN_DEFAULT_ALGORITHM); conf.setInt(NNConstants.GUAGUA_NN_THREAD_COUNT, NNConstants.GUAGUA_NN_DEFAULT_THREAD_COUNT); conf.set(NNConstants.GUAGUA_NN_LEARNING_RATE, NNConstants.GUAGUA_NN_DEFAULT_LEARNING_RATE); conf.set(NNConstants.GUAGUA_NN_OUTPUT, otherArgs[5]); conf.set(GuaguaConstants.GUAGUA_MASTER_INTERCEPTERS, NNOutput.class.getName()); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false); conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 3600000); conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0); Job job = new Job(conf, "Guagua NN Master-Workers Job"); job.setJarByClass(NNMapReduceClient.class); job.setMapperClass(GuaguaMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(NNInputFormat.class); job.setOutputFormatClass(GuaguaOutputFormat.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(otherArgs[4])); job.waitForCompletion(true); }
From source file:ml.shifu.guagua.mapreduce.example.sum.SumMapReduceClient.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 3) { throw new IllegalArgumentException( "NNMapReduceClient: Must have at least 2 arguments <guagua.iteration.count> <guagua.zk.servers> <input path or folder>. "); }/*from w w w .j a va 2 s . c om*/ conf.set(GuaguaConstants.WORKER_COMPUTABLE_CLASS, SumWorker.class.getName()); conf.set(GuaguaConstants.MASTER_COMPUTABLE_CLASS, SumMaster.class.getName()); conf.set(GuaguaConstants.GUAGUA_ITERATION_COUNT, otherArgs[0]); conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, otherArgs[1]); conf.setInt(GuaguaConstants.GUAGUA_ZK_SESSION_TIMEOUT, 300 * 1000); conf.setInt(GuaguaConstants.GUAGUA_ZK_MAX_ATTEMPTS, 5); conf.setInt(GuaguaConstants.GUAGUA_ZK_RETRY_WAIT_MILLS, 1000); // if you set result class to hadoop Writable, you must use GuaguaWritableSerializer, this can be avoided by // using GuaguaMapReduceClient conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, LongWritable.class.getName()); conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, LongWritable.class.getName()); conf.set(GuaguaConstants.GUAGUA_MASTER_IO_SERIALIZER, "ml.shifu.guagua.mapreduce.GuaguaWritableSerializer"); conf.set(GuaguaConstants.GUAGUA_WORKER_IO_SERIALIZER, "ml.shifu.guagua.mapreduce.GuaguaWritableSerializer"); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false); conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 3600000); conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0); Job job = new Job(conf, "Guagua Sum Master-Workers Job"); job.setJarByClass(SumMapReduceClient.class); job.setMapperClass(GuaguaMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(GuaguaInputFormat.class); job.setOutputFormatClass(GuaguaOutputFormat.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(otherArgs[2])); job.waitForCompletion(true); }