Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for the map output data.

Usage

From source file:ImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//from  w w  w  . jav  a 2 s.  c om
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {
    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }

            // See if a non-default Mapper was set
            String mapperClassName = conf.get(MAPPER_CONF_KEY);
            Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

            TableName tableName = TableName.valueOf(args[0]);
            Path inputDir = new Path(args[1]);

            // set filter
            conf.set(EASTCOM_FILTER_PARAMS, args[3]);
            conf.set(EASTCOM_FILTER_DEFINE, args[4]);

            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            job.setJarByClass(mapperClass);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClass(TextInputFormat.class);
            job.setMapperClass(mapperClass);
            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
            if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
                String fileLoc = conf.get(CREDENTIALS_LOCATION);
                Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
                job.getCredentials().addAll(cred);
            }

            if (hfileOutPath != null) {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
                        LOG.warn(errorMsg);
                        // TODO: this is backwards. Instead of depending on the existence of a table,
                        // create a sane splits file for HFileOutputFormat based on data sampling.
                        createTable(admin, tableName, columns);
                    } else {
                        LOG.error(errorMsg);
                        throw new TableNotFoundException(errorMsg);
                    }
                }
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
                    // if no.strict is false then check column family
                    if (!noStrict) {
                        ArrayList<String> unmatchedFamilies = new ArrayList<String>();
                        Set<String> cfSet = getColumnFamilies(columns);
                        HTableDescriptor tDesc = table.getTableDescriptor();
                        for (String cf : cfSet) {
                            if (tDesc.getFamily(Bytes.toBytes(cf)) == null) {
                                unmatchedFamilies.add(cf);
                            }
                        }
                        if (unmatchedFamilies.size() > 0) {
                            ArrayList<String> familyNames = new ArrayList<String>();
                            for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) {
                                familyNames.add(family.getNameAsString());
                            }
                            String msg = "Column Families " + unmatchedFamilies + " specified in "
                                    + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName
                                    + " column families " + familyNames + ".\n"
                                    + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
                                    + "=true.\n";
                            usage(msg);
                            System.exit(-1);
                        }
                    }
                    job.setReducerClass(PutSortReducer.class);
                    Path outputDir = new Path(hfileOutPath);
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
                    if (mapperClass.equals(TsvImporterTextMapper.class)) {
                        job.setMapOutputValueClass(Text.class);
                        job.setReducerClass(TextSortReducer.class);
                    } else {
                        job.setMapOutputValueClass(Put.class);
                        job.setCombinerClass(PutCombiner.class);
                    }
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
                }
            } else {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    LOG.error(errorMsg);
                    throw new TableNotFoundException(errorMsg);
                }
                if (mapperClass.equals(TsvImporterTextMapper.class)) {
                    usage(TsvImporterTextMapper.class.toString()
                            + " should not be used for non bulkloading case. use "
                            + TsvImporterMapper.class.toString()
                            + " or custom mapper whose value type is Put.");
                    System.exit(-1);
                }
                // No reducers. Just write straight to table. Call initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
                job.setNumReduceTasks(0);
            }

            TableMapReduceUtil.addDependencyJars(job);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                    com.google.common.base.Function.class /* Guava used by TsvParser */);
        }
    }
    return job;
}

From source file:ReverseIndexer.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ReverseIndexer <output> <input file(s)>");
        System.exit(2);//w  ww .j a  v  a 2 s . c o m
    }
    Job job = new Job(conf, "reverse indexer");
    job.setJarByClass(ReverseIndexer.class);
    job.setMapperClass(IndexerMapper.class);
    job.setReducerClass(IndexerReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LineRecWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    for (int i = 1; i < otherArgs.length; i++) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Authset.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* ww w . j ava  2  s  .c  o  m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(Authset.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(10);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    //DistributedCache.addCacheFile(new Path(otherArgs[0]).toUri(),
    //job.getConfiguration());

    //DistributedCache.setLocalFiles(job.getConfiguration(), otherArgs[0]);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:DumpRecordsExtended.java

License:Apache License

/**
 * Runs this tool.//w  w w. jav a  2  s.c  o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);

    LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    Configuration conf = new Configuration();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(DumpRecordsExtended.class.getSimpleName());
    job.setJarByClass(DumpRecordsExtended.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:StripesPMI_nocombiner.java

License:Apache License

/**
 * Runs this tool.//w w  w. j  a va2  s .c o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT) + "_TMP";// cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + StripesPMI_nocombiner.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job_first = Job.getInstance(getConf());
    job_first.setJobName(StripesPMI_nocombiner.class.getSimpleName());
    job_first.setJarByClass(StripesPMI_nocombiner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job_first.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_first, new Path(inputPath));
    FileOutputFormat.setOutputPath(job_first, new Path(outputPath));

    job_first.setMapOutputKeyClass(Text.class);
    job_first.setMapOutputValueClass(String2IntOpenHashMapWritable.class);
    job_first.setOutputKeyClass(PairOfStrings.class);// Text.class);// PairOfStrings.class);
    job_first.setOutputValueClass(DoubleWritable.class);
    job_first.setOutputFormatClass(TextOutputFormat.class);// changed

    job_first.setMapperClass(MyMapper_first.class);
    // job_first.setCombinerClass(MyCombiner.class);
    job_first.setReducerClass(MyReducer_first.class);

    long startTime = System.currentTimeMillis();
    job_first.waitForCompletion(true);

    // ////////////////START.: run the second MR job to just aggregate result////////////////
    inputPath = outputPath;// cmdline.getOptionValue(INPUT);
    outputPath = cmdline.getOptionValue(OUTPUT);

    Job job_second = Job.getInstance(getConf());
    job_second.setJobName(StripesPMI_nocombiner.class.getSimpleName());
    job_second.setJarByClass(StripesPMI_nocombiner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job_second.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_second, new Path(inputPath));
    FileOutputFormat.setOutputPath(job_second, new Path(outputPath));

    job_second.setMapOutputKeyClass(Text.class);
    job_second.setMapOutputValueClass(DoubleWritable.class);
    job_second.setOutputKeyClass(Text.class);// PairOfStrings.class);
    job_second.setOutputValueClass(DoubleWritable.class);
    // job_second.setOutputFormatClass(TextOutputFormat.class);// changed

    job_second.setMapperClass(MyMapper_second.class);
    // job_second.setCombinerClass(MyCombiner.class);
    job_second.setReducerClass(MyReducer_second.class);

    job_second.waitForCompletion(true);

    // END////////////

    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:BuildInvertedIndex.java

License:Apache License

/**
     * Runs this tool.//from   w  w w.j  a v  a2s  .  c o  m
     */
    @SuppressWarnings({ "static-access" })
    public int run(String[] args) throws Exception {
        Options options = new Options();

        options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
        options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
        options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
                .create(NUM_REDUCERS));

        CommandLine cmdline;
        CommandLineParser parser = new GnuParser();

        try {
            cmdline = parser.parse(options, args);
        } catch (ParseException exp) {
            System.err.println("Error parsing command line: " + exp.getMessage());
            return -1;
        }

        if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
            System.out.println("args: " + Arrays.toString(args));
            HelpFormatter formatter = new HelpFormatter();
            formatter.setWidth(120);
            formatter.printHelp(this.getClass().getName(), options);
            ToolRunner.printGenericCommandUsage(System.out);
            return -1;
        }

        String inputPath = cmdline.getOptionValue(INPUT);
        String outputPath = cmdline.getOptionValue(OUTPUT);
        int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
                : 1;

        LOG.info("Tool name: " + BuildInvertedIndex.class.getSimpleName());
        LOG.info(" - input path: " + inputPath);
        LOG.info(" - output path: " + outputPath);
        LOG.info(" - num reducers: " + reduceTasks);

        Job job = Job.getInstance(getConf());
        job.setJobName(BuildInvertedIndex.class.getSimpleName());
        job.setJarByClass(BuildInvertedIndex.class);

        job.setNumReduceTasks(reduceTasks);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(PairOfInts.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(PairOfWritables.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        // Delete the output directory if it exists already.
        Path outputDir = new Path(outputPath);
        FileSystem.get(getConf()).delete(outputDir, true);

        long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        return 0;
    }

From source file:RunPageRankSchimmy.java

License:Apache License

private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner,
        boolean useRange) throws Exception {
    Configuration conf = getConf();

    String in = path + "/iter" + FORMAT.format(i);
    String out = path + "/iter" + FORMAT.format(j) + "t";
    String outm = out + "-mass";

    FileSystem fs = FileSystem.get(conf);

    // We need to actually count the number of part files to get the number
    // of partitions (because the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-")) {
            numPartitions++;//from  ww w .j a  v a  2 s  .co  m
        }
    }

    conf.setInt("NodeCount", n);

    Partitioner<IntWritable, Writable> p = null;

    if (useRange) {
        p = new RangePartitioner();
        ((Configurable) p).setConf(conf);
    } else {
        p = new HashPartitioner<IntWritable, Writable>();
    }

    // This is really annoying: the mapping between the partition numbers on
    // disk (i.e., part-XXXX) and what partition the file contains (i.e.,
    // key.hash % #reducer) is arbitrary... so this means that we need to
    // open up each partition, peek inside to find out.
    IntWritable key = new IntWritable();
    PageRankNode value = new PageRankNode();
    FileStatus[] status = fs.listStatus(new Path(in));

    StringBuilder sb = new StringBuilder();

    for (FileStatus f : status) {
        if (!f.getPath().getName().contains("part-")) {
            continue;
        }

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(f.getPath()));

        reader.next(key, value);
        int np = p.getPartition(key, value, numPartitions);
        reader.close();

        LOG.info(f.getPath() + "\t" + np);
        sb.append(np + "=" + f.getPath() + ";");
    }

    LOG.info(sb.toString().trim());

    LOG.info("PageRankSchimmy: iteration " + j + ": Phase1");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);
    LOG.info(" - nodeCnt: " + n);
    LOG.info(" - useCombiner: " + useCombiner);
    LOG.info(" - useInmapCombiner: " + useInmapCombiner);
    LOG.info(" - numPartitions: " + numPartitions);
    LOG.info(" - useRange: " + useRange);
    LOG.info("computed number of partitions: " + numPartitions);

    int numReduceTasks = numPartitions;

    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    //conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.set("PageRankMassPath", outm);
    conf.set("BasePath", in);
    conf.set("PartitionMapping", sb.toString().trim());

    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    Job job = Job.getInstance(conf);
    job.setJobName("PageRankSchimmy:iteration" + j + ":Phase1");
    job.setJarByClass(RunPageRankSchimmy.class);

    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useInmapCombiner) {
        job.setMapperClass(MapWithInMapperCombiningClass.class);
    } else {
        job.setMapperClass(MapClass.class);
    }

    if (useCombiner) {
        job.setCombinerClass(CombineClass.class);
    }

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    job.setReducerClass(ReduceClass.class);

    FileSystem.get(conf).delete(new Path(out), true);
    FileSystem.get(conf).delete(new Path(outm), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    float mass = Float.NEGATIVE_INFINITY;
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        mass = sumLogProbs(mass, fin.readFloat());
        fin.close();
    }

    return mass;
}

From source file:RunPageRankSchimmy.java

License:Apache License

private void phase2(String path, int i, int j, int n, float missing) throws Exception {
    Configuration conf = getConf();

    LOG.info("missing PageRank mass: " + missing);
    LOG.info("number of nodes: " + n);

    String in = path + "/iter" + FORMAT.format(j) + "t";
    String out = path + "/iter" + FORMAT.format(j);

    LOG.info("PageRankSchimmy: iteration " + j + ": Phase2");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);

    Job job = Job.getInstance(conf);
    job.setJobName("PageRankSchimmy:iteration" + j + ":Phase2");
    job.setJarByClass(RunPageRankSchimmy.class);
    job.setNumReduceTasks(0);//from  w  ww . j  a  v a  2 s.  c  o  m

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MapPageRankMassDistributionClass.class);

    conf.setFloat("MissingMass", (float) missing);
    conf.setInt("NodeCount", n);

    FileSystem.get(conf).delete(new Path(out), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:PartitionGraph.java

License:Apache License

/**
 * Runs this tool.//from  w w  w . j  a v  a 2s. c om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeMultiSrc.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeMultiSrc.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:Hw2Part1.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: <input file> <output directory>");
        System.exit(2);//from   w  w w . ja  v a  2 s.c o m
    }

    //    FileSystem hdfs = FileSystem.get(conf);
    String target = "hdfs://localhost:9000/";
    FileSystem fs = FileSystem.get(URI.create(target), conf);//is diffrent
    Path outputpath = new Path(otherArgs[otherArgs.length - 1]);
    if (fs.exists(outputpath)) {
        fs.delete(outputpath, true);
    }

    Job job = Job.getInstance(conf, "Hw2Part1");

    job.setJarByClass(Hw2Part1.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombiner.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(InfoWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InfoWritable.class);

    // add the input paths as given by command line
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    // add the output path as given by the command line
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}