Example usage for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value)

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:hadoop.api.RecommenderJob.java

License:Apache License

/**
 * Calculate the recommender/*from   w w w .j  a  va 2 s.  com*/
 *
 * @param args Information about the input pathpartialMultiply, explicitFilterPath, numRecommendations
 * @return
 */
public int recommender(String[] args) {
    try {
        prepareRecommender(args);
    } catch (IOException e) {
        e.printStackTrace();
    }
    Path explicitFilterPath = new Path(prepPath, "explicitFilterPath");
    Path partialMultiplyPath = new Path(prepPath, "partialMultiply");
    Path outputPath = getOutputPath();
    String itemsFile = getOption("itemsFile");
    String filterFile = getOption("filterFile");
    boolean booleanData = Boolean.valueOf(getOption("booleanData"));
    int numRecommendations = Integer.parseInt(getOption("numRecommendations"));

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        //filter out any users we don't care about
        if (filterFile != null) {
            Job itemFiltering = null;
            try {
                itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class,
                        ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class,
                        ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class,
                        VectorAndPrefsWritable.class, SequenceFileOutputFormat.class);
            } catch (IOException e) {
                e.printStackTrace();
            }
            boolean succeeded = false;
            try {
                succeeded = itemFiltering.waitForCompletion(true);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            }
            if (!succeeded) {
                return -1;
            }
        }

        String aggregateAndRecommendInput = partialMultiplyPath.toString();
        if (filterFile != null) {
            aggregateAndRecommendInput += "," + explicitFilterPath;
        }

        Class<? extends OutputFormat> outputFormat = parsedArgs.containsKey("--sequencefileOutput")
                ? SequenceFileOutputFormat.class
                : TextOutputFormat.class;

        //extract out the recommendations
        Job aggregateAndRecommend = null;
        try {
            aggregateAndRecommend = prepareJob(new Path(aggregateAndRecommendInput), outputPath,
                    SequenceFileInputFormat.class, PartialMultiplyMapper.class, VarLongWritable.class,
                    PrefAndSimilarityColumnWritable.class,
                    org.apache.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer.class,
                    VarLongWritable.class, RecommendedItemsWritable.class, outputFormat);
        } catch (IOException e) {
            e.printStackTrace();
        }
        Configuration aggregateAndRecommendConf = aggregateAndRecommend.getConfiguration();
        if (itemsFile != null) {
            aggregateAndRecommendConf.set(hadoop.api.AggregateAndRecommendReducer.ITEMS_FILE, itemsFile);
        }

        if (filterFile != null) {
            try {
                setS3SafeCombinedInputPath(aggregateAndRecommend, getTempPath(), partialMultiplyPath,
                        explicitFilterPath);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        setIOSort(aggregateAndRecommend);
        aggregateAndRecommendConf.set(hadoop.api.AggregateAndRecommendReducer.ITEMID_INDEX_PATH,
                new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
        aggregateAndRecommendConf.setInt(hadoop.api.AggregateAndRecommendReducer.NUM_RECOMMENDATIONS,
                numRecommendations);
        aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData);
        boolean succeeded = false;
        try {
            succeeded = aggregateAndRecommend.waitForCompletion(true);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
        if (!succeeded) {
            return -1;
        }
    }

    return 0;
}

From source file:hadoop.api.RecommenderJob.java

License:Apache License

private static void setIOSort(JobContext job) {
    Configuration conf = job.getConfiguration();
    conf.setInt("io.sort.factor", 100);
    String javaOpts = conf.get("mapred.map.child.java.opts"); // new arg name
    if (javaOpts == null) {
        javaOpts = conf.get("mapred.child.java.opts"); // old arg name
    }//from   w  ww. ja  va  2s .c  om
    int assumedHeapSize = 512;
    if (javaOpts != null) {
        Matcher m = Pattern.compile("-Xmx([0-9]+)([mMgG])").matcher(javaOpts);
        if (m.find()) {
            assumedHeapSize = Integer.parseInt(m.group(1));
            String megabyteOrGigabyte = m.group(2);
            if ("g".equalsIgnoreCase(megabyteOrGigabyte)) {
                assumedHeapSize *= 1024;
            }
        }
    }
    // Cap this at 1024MB now; see https://issues.apache.org/jira/browse/MAPREDUCE-2308
    conf.setInt("io.sort.mb", Math.min(assumedHeapSize / 2, 1024));
    // For some reason the Merger doesn't report status for a long time; increase
    // timeout when running these jobs
    conf.setInt("mapred.task.timeout", 60 * 60 * 1000);
}

From source file:hadoop.SleepJob.java

License:Apache License

public Job createJob(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime,
        int reduceSleepCount) throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    Job job = new Job(conf, "sleep");
    job.setNumReduceTasks(numReducer);/*ww  w.j ava  2s.c o  m*/
    job.setJarByClass(SleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(SleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");
    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
}

From source file:hd_knn.HD_KNN.java

public static void main(String[] args) throws Exception {

    // argumentos
    // Variante KNN: 0 Normal, 1 Media, 2 Inversa de la distancia
    // Distancia a utilizar: 0 Euclidea, 1 Manhattan, 2 Chebyshev
    // k/*from w ww  . j  a  va  2 s . co m*/
    // test_file
    // Input path
    // Output path
    if (args.length != 6) {
        System.out.println("Arguments: knn_type distance k test_file input_path output_path");
        System.exit(-1);
    }
    Configuration conf = new Configuration();
    conf.set("knn_method", args[0]);
    conf.set("distance", args[1]);
    conf.setInt("k", Integer.parseInt(args[2]));
    conf.set("test_file", args[3]);
    Job job = Job.getInstance(conf, "KNN");
    job.setJarByClass(HD_KNN.class);
    job.setMapperClass(DistanceCalculatorMapper.class);
    job.setCombinerClass(PredictClassReducer.class);
    job.setReducerClass(PredictClassReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DistanceClassOutput.class);
    FileInputFormat.addInputPath(job, new Path(args[4]));
    FileOutputFormat.setOutputPath(job, new Path(args[5]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hudson.gridmaven.gridlayer.NameNodeStartTask.java

License:Open Source License

public Void call() throws IOException {
    File hadoopRoot = new File(hudsonRoot, "hadoop");
    if (hadoopRoot.mkdirs())
        format = true;//ww w .  j a  va 2s  .co m

    final Configuration conf = new Configuration();
    // location of the name node
    conf.set("fs.default.name", hdfsUrl);
    conf.set("dfs.http.address", "0.0.0.0:" + HTTP_PORT);
    // namespace node stores information here
    File namedir = new File(hadoopRoot, "namedir");
    if (namedir.mkdirs())
        format = true;
    conf.set("dfs.name.dir", namedir.getPath());
    // dfs node stores information here
    File datadir = new File(hadoopRoot, "datadir");
    conf.set("dfs.namenode.logging.level", "ALL");
    if (datadir.mkdirs())
        format = true;
    conf.set("dfs.data.dir", datadir.getPath());

    conf.setInt("dfs.replication", 1);
    conf.set("dfs.safemode.extension", "1");
    conf.set("dfs.block.size", "1048576");
    //if(format) {
    // This will provide format HDFS with every start
    System.out.println("Formatting HDFS");
    NameNode.format(conf);
    //}

    System.out.println("Starting namenode");
    NameNode.createNameNode(new String[0], conf);
    return null;
}

From source file:hudson.plugins.hadoop.NameNodeStartTask.java

License:Open Source License

public Void call() throws IOException {
    File hadoopRoot = new File(hudsonRoot, "hadoop");
    if (hadoopRoot.mkdirs())
        format = true;/* ww w .j av  a2  s .  c om*/

    final Configuration conf = new Configuration();
    // location of the name node
    conf.set("fs.default.name", hdfsUrl);
    conf.set("dfs.http.address", "0.0.0.0:" + HTTP_PORT);
    // namespace node stores information here
    File namedir = new File(hadoopRoot, "namedir");
    if (namedir.mkdirs())
        format = true;
    conf.set("dfs.name.dir", namedir.getPath());
    // dfs node stores information here
    File datadir = new File(hadoopRoot, "datadir");
    if (datadir.mkdirs())
        format = true;
    conf.set("dfs.data.dir", datadir.getPath());

    conf.setInt("dfs.replication", 1);

    if (format) {
        System.out.println("Formatting HDFS");
        NameNode.format(conf);
    }

    System.out.println("Starting namenode");
    NameNode.createNameNode(new String[0], conf);
    return null;
}

From source file:idgs.ConfVar.java

License:Open Source License

public static void initializeWithDefaults(Configuration conf) {
    if (conf.get(CLIPROMPT.varname) == null) {
        conf.set(CLIPROMPT.varname, CLIPROMPT.defaultVal);
    }/*from w ww. jav  a2s .  c  om*/
    if (conf.get(EXEC_MODE.varname) == null) {
        conf.set(EXEC_MODE.varname, EXEC_MODE.defaultVal);
    }
    if (conf.get(EXPLAIN_MODE.varname) == null) {
        conf.set(EXPLAIN_MODE.varname, EXPLAIN_MODE.defaultVal);
    }
    if (conf.get(COLUMN_INITIALSIZE.varname) == null) {
        conf.setInt(COLUMN_INITIALSIZE.varname, COLUMN_INITIALSIZE.defaultIntVal);
    }
    if (conf.get(CHECK_TABLENAME_FLAG.varname) == null) {
        conf.setBoolean(CHECK_TABLENAME_FLAG.varname, CHECK_TABLENAME_FLAG.defaultBoolVal);
    }
    if (conf.get(COMPRESS_QUERY_PLAN.varname) == null) {
        conf.setBoolean(COMPRESS_QUERY_PLAN.varname, COMPRESS_QUERY_PLAN.defaultBoolVal);
    }
    if (conf.get(MAP_PRUNING.varname) == null) {
        conf.setBoolean(MAP_PRUNING.varname, MAP_PRUNING.defaultBoolVal);
    }
    if (conf.get(MAP_PRUNING_PRINT_DEBUG.varname) == null) {
        conf.setBoolean(MAP_PRUNING_PRINT_DEBUG.varname, MAP_PRUNING_PRINT_DEBUG.defaultBoolVal);
    }
}

From source file:io.amient.kafka.hadoop.io.KafkaInputFormat.java

License:Apache License

public static void configureZkTimeouts(Configuration conf, int sessionTimeoutMs, int connectTimeoutMs) {
    conf.setInt(CONFIG_ZK_SESSION_TIMEOUT_MS, sessionTimeoutMs);
    conf.setInt(CONFIG_ZK_CONNECT_TIMEOUT_MS, connectTimeoutMs);
}

From source file:io.bfscan.clueweb12.BuildDictionary.java

License:Apache License

/**
 * Runs this tool./*from ww  w  . j  a  va2  s.c  o m*/
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of terms").create(COUNT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(COUNT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + ComputeTermStatistics.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    Configuration conf = getConf();

    conf.set(HADOOP_OUTPUT_OPTION, output);
    conf.setInt(HADOOP_TERMS_COUNT_OPTION, Integer.parseInt(cmdline.getOptionValue(COUNT_OPTION)));
    conf.set("mapreduce.map.memory.mb", "4096");
    conf.set("mapreduce.map.java.opts", "-Xmx4096m");
    conf.set("mapreduce.reduce.memory.mb", "4096");
    conf.set("mapreduce.reduce.java.opts", "-Xmx4096m");

    Job job = Job.getInstance(conf);
    job.setJobName(BuildDictionary.class.getSimpleName() + ":" + input);
    job.setJarByClass(BuildDictionary.class);
    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfIntLong.class);
    job.setOutputKeyClass(Text.class);
    job.setSortComparatorClass(DictionaryTransformationStrategy.WritableComparator.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(MyReducer.class);

    FileSystem.get(getConf()).delete(new Path(output), true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:io.bfscan.clueweb12.LMRetrieval.java

License:Apache License

/**
 * Runs this tool./*from w w w.ja  v a 2s .  co m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg()
            .withDescription("input path (pfor format expected, add * to retrieve files)")
            .create(DOCVECTOR_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("queries").create(QUERIES_OPTION));
    options.addOption(
            OptionBuilder.withArgName("float").hasArg().withDescription("smoothing").create(SMOOTHING));
    options.addOption(OptionBuilder.withArgName("int").hasArg().withDescription("topk").create(TOPK));
    options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg()
            .withDescription("preprocessing").create(PREPROCESSING));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(DOCVECTOR_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(QUERIES_OPTION)
            || !cmdline.hasOption(SMOOTHING) || !cmdline.hasOption(TOPK) || !cmdline.hasOption(PREPROCESSING)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String docvector = cmdline.getOptionValue(DOCVECTOR_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);
    String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION);
    String queries = cmdline.getOptionValue(QUERIES_OPTION);
    String smoothing = cmdline.getOptionValue(SMOOTHING);
    String topk = cmdline.getOptionValue(TOPK);
    String preprocessing = cmdline.getOptionValue(PREPROCESSING);

    LOG.info("Tool name: " + LMRetrieval.class.getSimpleName());
    LOG.info(" - docvector: " + docvector);
    LOG.info(" - output: " + output);
    LOG.info(" - dictionary: " + dictionary);
    LOG.info(" - queries: " + queries);
    LOG.info(" - smoothing: " + smoothing);
    LOG.info(" - topk: " + topk);
    LOG.info(" - preprocessing: " + preprocessing);

    Configuration conf = getConf();
    conf.set(DICTIONARY_OPTION, dictionary);
    conf.set(QUERIES_OPTION, queries);
    conf.setFloat(SMOOTHING, Float.parseFloat(smoothing));
    conf.setInt(TOPK, Integer.parseInt(topk));
    conf.set(PREPROCESSING, preprocessing);

    conf.set("mapreduce.map.memory.mb", "10048");
    conf.set("mapreduce.map.java.opts", "-Xmx10048m");
    conf.set("mapreduce.reduce.memory.mb", "10048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx10048m");
    conf.set("mapred.task.timeout", "6000000"); // default is 600000

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(output))) {
        fs.delete(new Path(output), true);
    }

    Job job = new Job(conf, LMRetrieval.class.getSimpleName() + ":" + docvector);
    job.setJarByClass(LMRetrieval.class);

    FileInputFormat.setInputPaths(job, docvector);
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(PairOfIntString.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MyMapper.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}