Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:jobs.MatrixBlockSum.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[2]));

    if (args.length >= 4)
        conf.setInt("SR", Integer.parseInt(args[3]));

    if (args.length >= 5)
        conf.setInt("SC", Integer.parseInt(args[4]));

    conf.set("RESNAME", args[1]);

    //heap space - again - should be passed with the -D option
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job/*from   w w  w. j ava2s  . c o m*/
    Job job1 = new Job(conf, "MatrixBlockSum");
    job1.setJarByClass(MatrixBlockSum.class);

    // No Map
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(SequenceFileInputFormat.class);

    //Reduce       
    job1.setCombinerClass(MatrixBlockSumReducer.class);
    job1.setReducerClass(MatrixBlockSumReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    //job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}

From source file:jobs.MatrixBlockToTextRows.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    conf.setInt("N", Integer.parseInt(args[2]));
    conf.setInt("NC", Integer.parseInt(args[3]));
    conf.setInt("SC", Integer.parseInt(args[4]));

    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[5]));

    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");

    //heap space - again - should be passed with the -D option
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job//from   w  w  w .j  a v a 2  s  .  co  m
    Job job1 = new Job(conf, "MatrixBlockToTextRows");
    job1.setJarByClass(MatrixBlockToTextRows.class);

    // No Map
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(SequenceFileInputFormat.class);
    job1.setMapperClass(MatrixBlockRowPartMapper.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(RowPart.class);

    //Reduce       
    job1.setReducerClass(RowPartTextRowsReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    //job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(true) ? 0 : 1;
}

From source file:jobs.MatrixBlockTraceMult.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();

    conf.setFloat("SCALAR", Float.parseFloat(args[3]));

    conf.setBoolean("LTRANS", Boolean.parseBoolean(args[4]));
    conf.setBoolean("RTRANS", Boolean.parseBoolean(args[5]));

    //set # of reducers
    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[6]));

    //Get optional blocksize parameters
    if (args.length >= 8)
        conf.setInt("SRL", Integer.parseInt(args[7]));

    if (args.length >= 9)
        conf.setInt("SCL", Integer.parseInt(args[8]));

    if (args.length >= 10)
        conf.setInt("SRR", Integer.parseInt(args[9]));

    if (args.length >= 11)
        conf.setInt("SCR", Integer.parseInt(args[10]));

    conf.set("LEFTNAME", args[0]);
    conf.set("RIGHTNAME", args[1]);

    //heap space - should be entered with the -D format and not dealt with by the program.    
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job//ww  w  .  j a  va2s. c  o  m
    Job job1 = new Job(conf, "MatrixBlockTraceMult");
    job1.setJarByClass(MatrixBlockMult.class);

    // Map
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    FileInputFormat.addInputPath(job1, new Path(args[1]));
    job1.setInputFormatClass(SequenceFileInputFormat.class);
    job1.setMapperClass(SquareBlockTraceMultiplicationGroupingMapper.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(MatrixBlock.class);

    //Reduce       
    job1.setReducerClass(SquareMatrixBlockTraceMultReducer.class);
    job1.setOutputKeyClass(NullWritable.class);
    job1.setOutputValueClass(DoubleWritable.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[2]));
    job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}

From source file:kafka.bridge.hadoop.KafkaOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, W> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path outputPath = getOutputPath(context);
    if (outputPath == null)
        throw new IllegalArgumentException("no kafka output url specified");
    URI uri = outputPath.toUri();
    Configuration job = context.getConfiguration();

    final String topic = uri.getPath().substring(1); // ignore the initial '/' in the path

    final int queueSize = job.getInt("kafka.output.queue_size", KAFKA_QUEUE_SIZE);
    final int timeout = job.getInt("kafka.output.connect_timeout", KAFKA_PRODUCER_CONNECT_TIMEOUT);
    final int interval = job.getInt("kafka.output.reconnect_interval", KAFKA_PRODUCER_RECONNECT_INTERVAL);
    final int bufSize = job.getInt("kafka.output.bufsize", KAFKA_PRODUCER_BUFFER_SIZE);
    final int maxSize = job.getInt("kafka.output.max_msgsize", KAFKA_PRODUCER_MAX_MESSAGE_SIZE);

    job.set("kafka.output.server", String.format("%s:%d", uri.getHost(), uri.getPort()));
    job.set("kafka.output.topic", topic);
    job.setInt("kafka.output.queue_size", queueSize);
    job.setInt("kafka.output.connect_timeout", timeout);
    job.setInt("kafka.output.reconnect_interval", interval);
    job.setInt("kafka.output.bufsize", bufSize);
    job.setInt("kafka.output.max_msgsize", maxSize);

    if (uri.getHost().isEmpty())
        throw new IllegalArgumentException("missing kafka server");
    if (uri.getPath().isEmpty())
        throw new IllegalArgumentException("missing kafka topic");

    Properties props = new Properties();
    props.setProperty("host", uri.getHost());
    props.setProperty("port", Integer.toString(uri.getPort()));
    props.setProperty("buffer.size", Integer.toString(bufSize));
    props.setProperty("connect.timeout.ms", Integer.toString(timeout));
    props.setProperty("reconnect.interval", Integer.toString(interval));
    props.setProperty("max.message.size", Integer.toString(maxSize));

    SyncProducer producer = new SyncProducer(new SyncProducerConfig(props));
    return new KafkaRecordWriter<W>(producer, topic, queueSize);
}

From source file:kogiri.hadoop.common.config.ClusterConfiguration.java

License:Open Source License

@JsonIgnore
public void configureTo(Configuration conf) {
    for (ConfigurationParam param : this.externalParams) {
        if (param.isValueInt()) {
            conf.setInt(param.getKey(), param.getValueAsInt());
        } else {//from   w  w  w . j ava 2 s .c o m
            conf.set(param.getKey(), param.getValue());
        }
    }
}

From source file:ldbc.socialnet.dbgen.generator.MRGenerateUsers.java

License:Open Source License

public int runGenerateJob(Configuration conf) throws Exception {
    FileSystem fs = FileSystem.get(conf);
    String hadoopDir = new String(conf.get("outputDir") + "/hadoop");
    String socialNetDir = new String(conf.get("outputDir") + "/social_network");
    int numThreads = Integer.parseInt(conf.get("numThreads"));
    System.out.println("NUMBER OF THREADS " + numThreads);

    /// --------- Execute Jobs ------
    long start = System.currentTimeMillis();

    /// --------------- First job Generating users----------------
    printProgress("Starting: Person generation");
    conf.set("pass", Integer.toString(0));
    Job job = new Job(conf, "SIB Generate Users & 1st Dimension");
    job.setMapOutputKeyClass(TupleKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(GenerateUsersMapper.class);
    job.setMapperClass(GenerateUsersMapper.class);
    job.setNumReduceTasks(numThreads);//w  w  w.  ja v a  2s .  co  m
    job.setInputFormatClass(NLineInputFormat.class);
    conf.setInt("mapred.line.input.format.linespermap", 1);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir) + "/mrInputFile");
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib"));
    job.waitForCompletion(true);

    /// --------------- Sorting by first dimension  ----------------
    printProgress("Starting: Sorting by first dimension");
    HadoopFileRanker fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib", hadoopDir + "/sibSorting");
    fs.delete(new Path(hadoopDir + "/sib"), true);

    /// --------------- job Generating First dimension Friendships  ----------------
    printProgress("Starting: Friendship generation 1.");
    conf.set("pass", Integer.toString(0));
    conf.set("dimension", Integer.toString(1));
    job = new Job(conf, "SIB Generate Friendship - Interest");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(DimensionReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);

    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib2"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sibSorting"), true);

    /// --------------- Sorting phase 2  ----------------
    printProgress("Starting: Sorting by second dimension");
    fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib2", hadoopDir + "/sibSorting2");
    fs.delete(new Path(hadoopDir + "/sib2"), true);

    /// --------------- Second job Generating Friendships  ----------------
    printProgress("Starting: Friendship generation 2.");
    conf.set("pass", Integer.toString(1));
    conf.set("dimension", Integer.toString(2));
    job = new Job(conf, "SIB Generate Friendship - Interest");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(DimensionReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting2"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib3"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sibSorting2"), true);

    /// --------------- Sorting phase 3--------------
    printProgress("Starting: Sorting by third dimension");
    fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib3", hadoopDir + "/sibSorting3");
    fs.delete(new Path(hadoopDir + "/sib3"), true);

    /// --------------- Third job Generating Friendships----------------
    printProgress("Starting: Friendship generation 3.");
    conf.set("pass", Integer.toString(2));
    conf.set("dimension", Integer.toString(2));
    job = new Job(conf, "SIB Generate Friendship - Random");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(DimensionReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting3"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib4"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sibSorting3"), true);

    /// --------------- Sorting phase 3--------------

    printProgress("Starting: Sorting by third dimension (for activity generation)");
    fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib4", hadoopDir + "/sibSorting4");
    fs.delete(new Path(hadoopDir + "/sib4"), true);

    /// --------------- Fourth job: Serialize static network ----------------

    printProgress("Starting: Generating person activity");
    job = new Job(conf, "Generate user activity");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(UserActivityReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting4"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib5"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sib5"), true);

    int numEvents = 0;
    long min = Long.MAX_VALUE;
    long max = Long.MIN_VALUE;

    if (conf.getBoolean("updateStreams", false)) {
        for (int i = 0; i < numThreads; ++i) {
            int numPartitions = conf.getInt("numUpdatePartitions", 1);
            for (int j = 0; j < numPartitions; ++j) {
                /// --------------- Fifth job: Sort update streams ----------------
                conf.setInt("mapred.line.input.format.linespermap", 1000000);
                conf.setInt("reducerId", i);
                conf.setInt("partitionId", j);
                conf.set("streamType", "forum");
                Job jobForum = new Job(conf, "Soring update streams " + j + " of reducer " + i);
                jobForum.setMapOutputKeyClass(LongWritable.class);
                jobForum.setMapOutputValueClass(Text.class);
                jobForum.setOutputKeyClass(LongWritable.class);
                jobForum.setOutputValueClass(Text.class);
                jobForum.setJarByClass(UpdateEventMapper.class);
                jobForum.setMapperClass(UpdateEventMapper.class);
                jobForum.setReducerClass(UpdateEventReducer.class);
                jobForum.setNumReduceTasks(1);
                jobForum.setInputFormatClass(SequenceFileInputFormat.class);
                jobForum.setOutputFormatClass(SequenceFileOutputFormat.class);
                jobForum.setPartitionerClass(UpdateEventPartitioner.class);
                FileInputFormat.addInputPath(jobForum,
                        new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum"));
                FileOutputFormat.setOutputPath(jobForum, new Path(hadoopDir + "/sibEnd"));
                printProgress("Starting: Sorting update streams");
                jobForum.waitForCompletion(true);
                fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum"), false);
                fs.delete(new Path(hadoopDir + "/sibEnd"), true);

                conf.setInt("mapred.line.input.format.linespermap", 1000000);
                conf.setInt("reducerId", i);
                conf.setInt("partitionId", j);
                conf.set("streamType", "person");
                Job jobPerson = new Job(conf, "Soring update streams " + j + " of reducer " + i);
                jobPerson.setMapOutputKeyClass(LongWritable.class);
                jobPerson.setMapOutputValueClass(Text.class);
                jobPerson.setOutputKeyClass(LongWritable.class);
                jobPerson.setOutputValueClass(Text.class);
                jobPerson.setJarByClass(UpdateEventMapper.class);
                jobPerson.setMapperClass(UpdateEventMapper.class);
                jobPerson.setReducerClass(UpdateEventReducer.class);
                jobPerson.setNumReduceTasks(1);
                jobPerson.setInputFormatClass(SequenceFileInputFormat.class);
                jobPerson.setOutputFormatClass(SequenceFileOutputFormat.class);
                jobPerson.setPartitionerClass(UpdateEventPartitioner.class);
                FileInputFormat.addInputPath(jobPerson,
                        new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person"));
                FileOutputFormat.setOutputPath(jobPerson, new Path(hadoopDir + "/sibEnd"));
                printProgress("Starting: Sorting update streams");
                jobPerson.waitForCompletion(true);
                fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person"), false);
                fs.delete(new Path(hadoopDir + "/sibEnd"), true);

                if (conf.getBoolean("updateStreams", false)) {
                    Properties properties = new Properties();
                    FSDataInputStream file = fs.open(new Path(conf.get("outputDir")
                            + "/social_network/updateStream_" + i + "_" + j + "_person.properties"));
                    properties.load(file);
                    if (properties.getProperty("min_write_event_start_time") != null) {
                        Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
                        min = auxMin < min ? auxMin : min;
                        Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
                        max = auxMax > max ? auxMax : max;
                        numEvents += Long.parseLong(properties.getProperty("num_events"));
                    }
                    file.close();
                    file = fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_"
                            + j + "_forum.properties"));
                    properties.load(file);
                    if (properties.getProperty("min_write_event_start_time") != null) {
                        Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
                        min = auxMin < min ? auxMin : min;
                        Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
                        max = auxMax > max ? auxMax : max;
                        numEvents += Long.parseLong(properties.getProperty("num_events"));
                    }
                    file.close();
                    fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j
                            + "_person.properties"), true);
                    fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j
                            + "_forum.properties"), true);
                }
            }
        }

        if (conf.getBoolean("updateStreams", false)) {
            OutputStream output = fs
                    .create(new Path(conf.get("outputDir") + "/social_network/updateStream.properties"));
            output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + conf.get("deltaTime") + "\n")
                    .getBytes());
            output.write(
                    new String("ldbc.snb.interactive.min_write_event_start_time:" + min + "\n").getBytes());
            output.write(
                    new String("ldbc.snb.interactive.max_write_event_start_time:" + max + "\n").getBytes());
            output.write(new String("ldbc.snb.interactive.update_interleave:" + (max - min) / numEvents + "\n")
                    .getBytes());
            output.write(new String("ldbc.snb.interactive.num_events:" + numEvents).getBytes());
            output.close();
        }
    }

    /// --------------- Sixth job: Materialize the friends lists ----------------
    /*        Job job6 = new Job(conf,"Dump the friends lists");
            job6.setMapOutputKeyClass(ComposedKey.class);
            job6.setMapOutputValueClass(ReducedUserProfile.class);
            job6.setOutputKeyClass(ComposedKey.class);
            job6.setOutputValueClass(ReducedUserProfile.class);
            job6.setJarByClass(HadoopBlockMapper.class);
            job6.setMapperClass(HadoopBlockMapper.class);
            job6.setReducerClass(FriendListOutputReducer.class);
            job6.setNumReduceTasks(numThreads);
            job6.setInputFormatClass(SequenceFileInputFormat.class);
            job6.setOutputFormatClass(SequenceFileOutputFormat.class);
            job6.setPartitionerClass(HadoopBlockPartitioner.class);
            job6.setSortComparatorClass(ComposedKeyComparator.class);
            job6.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
            FileInputFormat.setInputPaths(job6, new Path(hadoopDir + "/sibSorting4"));
            FileOutputFormat.setOutputPath(job6, new Path(hadoopDir + "/job6") );
            
            
            printProgress("Starting: Materialize friends for substitution parameters");
            int resMaterializeFriends = job6.waitForCompletion(true) ? 0 : 1;
            fs.delete(new Path(hadoopDir + "/sibSorting3"),true);
            */

    long end = System.currentTimeMillis();
    System.out.println(((end - start) / 1000) + " total seconds");
    for (int i = 0; i < numThreads; ++i) {
        fs.copyToLocalFile(new Path(socialNetDir + "/m" + i + "factors.txt"), new Path("./"));
        fs.copyToLocalFile(new Path(socialNetDir + "/m0friendList" + i + ".csv"), new Path("./"));
    }
    return 0;
}

From source file:libra.common.hadoop.io.format.fasta.FastaKmerInputFormat.java

License:Apache License

public static void setKmerSize(Configuration conf, int k) {
    conf.setInt(CONF_KMER_SIZE, k);
}

From source file:ml.shifu.guagua.mapreduce.example.nn.NNMapReduceClient.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 6) {
        throw new IllegalArgumentException(
                "NNMapReduceClient: Must have at least 5 arguments <guagua.iteration.count> <guagua.zk.servers> <nn.test.scale> <nn.record.scales> <input path or folder> <guagua.nn.output>. ");
    }//from  w ww.ja  va 2 s  .c  o m
    conf.set(GuaguaConstants.WORKER_COMPUTABLE_CLASS, NNWorker.class.getName());
    conf.set(GuaguaConstants.MASTER_COMPUTABLE_CLASS, NNMaster.class.getName());
    conf.set(GuaguaConstants.GUAGUA_ITERATION_COUNT, otherArgs[0]);

    conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, otherArgs[1]);

    conf.set(NNConstants.NN_TEST_SCALE, otherArgs[2]);
    conf.set(NNConstants.NN_RECORD_SCALE, otherArgs[3]);

    conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, NNParams.class.getName());
    conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, NNParams.class.getName());

    conf.setInt(NNConstants.GUAGUA_NN_INPUT_NODES, NNConstants.GUAGUA_NN_DEFAULT_INPUT_NODES);
    conf.setInt(NNConstants.GUAGUA_NN_HIDDEN_NODES, NNConstants.GUAGUA_NN_DEFAULT_HIDDEN_NODES);
    conf.setInt(NNConstants.GUAGUA_NN_OUTPUT_NODES, NNConstants.GUAGUA_NN_DEFAULT_OUTPUT_NODES);
    conf.set(NNConstants.GUAGUA_NN_ALGORITHM, NNConstants.GUAGUA_NN_DEFAULT_ALGORITHM);
    conf.setInt(NNConstants.GUAGUA_NN_THREAD_COUNT, NNConstants.GUAGUA_NN_DEFAULT_THREAD_COUNT);
    conf.set(NNConstants.GUAGUA_NN_LEARNING_RATE, NNConstants.GUAGUA_NN_DEFAULT_LEARNING_RATE);

    conf.set(NNConstants.GUAGUA_NN_OUTPUT, otherArgs[5]);

    conf.set(GuaguaConstants.GUAGUA_MASTER_INTERCEPTERS, NNOutput.class.getName());

    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false);
    conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 3600000);
    conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0);

    Job job = new Job(conf, "Guagua NN Master-Workers Job");
    job.setJarByClass(NNMapReduceClient.class);
    job.setMapperClass(GuaguaMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(NNInputFormat.class);
    job.setOutputFormatClass(GuaguaOutputFormat.class);
    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, new Path(otherArgs[4]));
    job.waitForCompletion(true);
}

From source file:ml.shifu.guagua.mapreduce.example.sum.SumMapReduceClient.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 3) {
        throw new IllegalArgumentException(
                "NNMapReduceClient: Must have at least 2 arguments <guagua.iteration.count> <guagua.zk.servers> <input path or folder>. ");
    }/*from   w  w  w  .j a  va  2 s . c  om*/
    conf.set(GuaguaConstants.WORKER_COMPUTABLE_CLASS, SumWorker.class.getName());
    conf.set(GuaguaConstants.MASTER_COMPUTABLE_CLASS, SumMaster.class.getName());
    conf.set(GuaguaConstants.GUAGUA_ITERATION_COUNT, otherArgs[0]);

    conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, otherArgs[1]);
    conf.setInt(GuaguaConstants.GUAGUA_ZK_SESSION_TIMEOUT, 300 * 1000);
    conf.setInt(GuaguaConstants.GUAGUA_ZK_MAX_ATTEMPTS, 5);
    conf.setInt(GuaguaConstants.GUAGUA_ZK_RETRY_WAIT_MILLS, 1000);

    // if you set result class to hadoop Writable, you must use GuaguaWritableSerializer, this can be avoided by
    // using GuaguaMapReduceClient
    conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, LongWritable.class.getName());
    conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, LongWritable.class.getName());
    conf.set(GuaguaConstants.GUAGUA_MASTER_IO_SERIALIZER, "ml.shifu.guagua.mapreduce.GuaguaWritableSerializer");
    conf.set(GuaguaConstants.GUAGUA_WORKER_IO_SERIALIZER, "ml.shifu.guagua.mapreduce.GuaguaWritableSerializer");

    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false);
    conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 3600000);
    conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0);

    Job job = new Job(conf, "Guagua Sum Master-Workers Job");
    job.setJarByClass(SumMapReduceClient.class);
    job.setMapperClass(GuaguaMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(GuaguaInputFormat.class);
    job.setOutputFormatClass(GuaguaOutputFormat.class);
    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, new Path(otherArgs[2]));
    job.waitForCompletion(true);
}