List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:hadoop.api.RecommenderJob.java
License:Apache License
/** * Calculate the recommender/*from w w w .j a va 2 s. com*/ * * @param args Information about the input pathpartialMultiply, explicitFilterPath, numRecommendations * @return */ public int recommender(String[] args) { try { prepareRecommender(args); } catch (IOException e) { e.printStackTrace(); } Path explicitFilterPath = new Path(prepPath, "explicitFilterPath"); Path partialMultiplyPath = new Path(prepPath, "partialMultiply"); Path outputPath = getOutputPath(); String itemsFile = getOption("itemsFile"); String filterFile = getOption("filterFile"); boolean booleanData = Boolean.valueOf(getOption("booleanData")); int numRecommendations = Integer.parseInt(getOption("numRecommendations")); if (shouldRunNextPhase(parsedArgs, currentPhase)) { //filter out any users we don't care about if (filterFile != null) { Job itemFiltering = null; try { itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class, ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class, ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class, SequenceFileOutputFormat.class); } catch (IOException e) { e.printStackTrace(); } boolean succeeded = false; try { succeeded = itemFiltering.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } if (!succeeded) { return -1; } } String aggregateAndRecommendInput = partialMultiplyPath.toString(); if (filterFile != null) { aggregateAndRecommendInput += "," + explicitFilterPath; } Class<? extends OutputFormat> outputFormat = parsedArgs.containsKey("--sequencefileOutput") ? SequenceFileOutputFormat.class : TextOutputFormat.class; //extract out the recommendations Job aggregateAndRecommend = null; try { aggregateAndRecommend = prepareJob(new Path(aggregateAndRecommendInput), outputPath, SequenceFileInputFormat.class, PartialMultiplyMapper.class, VarLongWritable.class, PrefAndSimilarityColumnWritable.class, org.apache.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer.class, VarLongWritable.class, RecommendedItemsWritable.class, outputFormat); } catch (IOException e) { e.printStackTrace(); } Configuration aggregateAndRecommendConf = aggregateAndRecommend.getConfiguration(); if (itemsFile != null) { aggregateAndRecommendConf.set(hadoop.api.AggregateAndRecommendReducer.ITEMS_FILE, itemsFile); } if (filterFile != null) { try { setS3SafeCombinedInputPath(aggregateAndRecommend, getTempPath(), partialMultiplyPath, explicitFilterPath); } catch (IOException e) { e.printStackTrace(); } } setIOSort(aggregateAndRecommend); aggregateAndRecommendConf.set(hadoop.api.AggregateAndRecommendReducer.ITEMID_INDEX_PATH, new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString()); aggregateAndRecommendConf.setInt(hadoop.api.AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations); aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData); boolean succeeded = false; try { succeeded = aggregateAndRecommend.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } if (!succeeded) { return -1; } } return 0; }
From source file:hadoop.api.RecommenderJob.java
License:Apache License
private static void setIOSort(JobContext job) { Configuration conf = job.getConfiguration(); conf.setInt("io.sort.factor", 100); String javaOpts = conf.get("mapred.map.child.java.opts"); // new arg name if (javaOpts == null) { javaOpts = conf.get("mapred.child.java.opts"); // old arg name }//from w ww. ja va 2s .c om int assumedHeapSize = 512; if (javaOpts != null) { Matcher m = Pattern.compile("-Xmx([0-9]+)([mMgG])").matcher(javaOpts); if (m.find()) { assumedHeapSize = Integer.parseInt(m.group(1)); String megabyteOrGigabyte = m.group(2); if ("g".equalsIgnoreCase(megabyteOrGigabyte)) { assumedHeapSize *= 1024; } } } // Cap this at 1024MB now; see https://issues.apache.org/jira/browse/MAPREDUCE-2308 conf.setInt("io.sort.mb", Math.min(assumedHeapSize / 2, 1024)); // For some reason the Merger doesn't report status for a long time; increase // timeout when running these jobs conf.setInt("mapred.task.timeout", 60 * 60 * 1000); }
From source file:hadoop.SleepJob.java
License:Apache License
public Job createJob(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) throws IOException { Configuration conf = getConf(); conf.setLong(MAP_SLEEP_TIME, mapSleepTime); conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); conf.setInt(MAP_SLEEP_COUNT, mapSleepCount); conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); conf.setInt(MRJobConfig.NUM_MAPS, numMapper); Job job = new Job(conf, "sleep"); job.setNumReduceTasks(numReducer);/*ww w.j ava 2s.c o m*/ job.setJarByClass(SleepJob.class); job.setNumReduceTasks(numReducer); job.setMapperClass(SleepMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(SleepInputFormat.class); job.setPartitionerClass(SleepJobPartitioner.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }
From source file:hd_knn.HD_KNN.java
public static void main(String[] args) throws Exception { // argumentos // Variante KNN: 0 Normal, 1 Media, 2 Inversa de la distancia // Distancia a utilizar: 0 Euclidea, 1 Manhattan, 2 Chebyshev // k/*from w ww . j a va 2 s . co m*/ // test_file // Input path // Output path if (args.length != 6) { System.out.println("Arguments: knn_type distance k test_file input_path output_path"); System.exit(-1); } Configuration conf = new Configuration(); conf.set("knn_method", args[0]); conf.set("distance", args[1]); conf.setInt("k", Integer.parseInt(args[2])); conf.set("test_file", args[3]); Job job = Job.getInstance(conf, "KNN"); job.setJarByClass(HD_KNN.class); job.setMapperClass(DistanceCalculatorMapper.class); job.setCombinerClass(PredictClassReducer.class); job.setReducerClass(PredictClassReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DistanceClassOutput.class); FileInputFormat.addInputPath(job, new Path(args[4])); FileOutputFormat.setOutputPath(job, new Path(args[5])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hudson.gridmaven.gridlayer.NameNodeStartTask.java
License:Open Source License
public Void call() throws IOException { File hadoopRoot = new File(hudsonRoot, "hadoop"); if (hadoopRoot.mkdirs()) format = true;//ww w . j a va 2s .co m final Configuration conf = new Configuration(); // location of the name node conf.set("fs.default.name", hdfsUrl); conf.set("dfs.http.address", "0.0.0.0:" + HTTP_PORT); // namespace node stores information here File namedir = new File(hadoopRoot, "namedir"); if (namedir.mkdirs()) format = true; conf.set("dfs.name.dir", namedir.getPath()); // dfs node stores information here File datadir = new File(hadoopRoot, "datadir"); conf.set("dfs.namenode.logging.level", "ALL"); if (datadir.mkdirs()) format = true; conf.set("dfs.data.dir", datadir.getPath()); conf.setInt("dfs.replication", 1); conf.set("dfs.safemode.extension", "1"); conf.set("dfs.block.size", "1048576"); //if(format) { // This will provide format HDFS with every start System.out.println("Formatting HDFS"); NameNode.format(conf); //} System.out.println("Starting namenode"); NameNode.createNameNode(new String[0], conf); return null; }
From source file:hudson.plugins.hadoop.NameNodeStartTask.java
License:Open Source License
public Void call() throws IOException { File hadoopRoot = new File(hudsonRoot, "hadoop"); if (hadoopRoot.mkdirs()) format = true;/* ww w .j av a2 s . c om*/ final Configuration conf = new Configuration(); // location of the name node conf.set("fs.default.name", hdfsUrl); conf.set("dfs.http.address", "0.0.0.0:" + HTTP_PORT); // namespace node stores information here File namedir = new File(hadoopRoot, "namedir"); if (namedir.mkdirs()) format = true; conf.set("dfs.name.dir", namedir.getPath()); // dfs node stores information here File datadir = new File(hadoopRoot, "datadir"); if (datadir.mkdirs()) format = true; conf.set("dfs.data.dir", datadir.getPath()); conf.setInt("dfs.replication", 1); if (format) { System.out.println("Formatting HDFS"); NameNode.format(conf); } System.out.println("Starting namenode"); NameNode.createNameNode(new String[0], conf); return null; }
From source file:idgs.ConfVar.java
License:Open Source License
public static void initializeWithDefaults(Configuration conf) { if (conf.get(CLIPROMPT.varname) == null) { conf.set(CLIPROMPT.varname, CLIPROMPT.defaultVal); }/*from w ww. jav a2s . c om*/ if (conf.get(EXEC_MODE.varname) == null) { conf.set(EXEC_MODE.varname, EXEC_MODE.defaultVal); } if (conf.get(EXPLAIN_MODE.varname) == null) { conf.set(EXPLAIN_MODE.varname, EXPLAIN_MODE.defaultVal); } if (conf.get(COLUMN_INITIALSIZE.varname) == null) { conf.setInt(COLUMN_INITIALSIZE.varname, COLUMN_INITIALSIZE.defaultIntVal); } if (conf.get(CHECK_TABLENAME_FLAG.varname) == null) { conf.setBoolean(CHECK_TABLENAME_FLAG.varname, CHECK_TABLENAME_FLAG.defaultBoolVal); } if (conf.get(COMPRESS_QUERY_PLAN.varname) == null) { conf.setBoolean(COMPRESS_QUERY_PLAN.varname, COMPRESS_QUERY_PLAN.defaultBoolVal); } if (conf.get(MAP_PRUNING.varname) == null) { conf.setBoolean(MAP_PRUNING.varname, MAP_PRUNING.defaultBoolVal); } if (conf.get(MAP_PRUNING_PRINT_DEBUG.varname) == null) { conf.setBoolean(MAP_PRUNING_PRINT_DEBUG.varname, MAP_PRUNING_PRINT_DEBUG.defaultBoolVal); } }
From source file:io.amient.kafka.hadoop.io.KafkaInputFormat.java
License:Apache License
public static void configureZkTimeouts(Configuration conf, int sessionTimeoutMs, int connectTimeoutMs) { conf.setInt(CONFIG_ZK_SESSION_TIMEOUT_MS, sessionTimeoutMs); conf.setInt(CONFIG_ZK_CONNECT_TIMEOUT_MS, connectTimeoutMs); }
From source file:io.bfscan.clueweb12.BuildDictionary.java
License:Apache License
/** * Runs this tool./*from ww w . j a va2 s.c o m*/ */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of terms").create(COUNT_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(COUNT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); LOG.info("Tool name: " + ComputeTermStatistics.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); Configuration conf = getConf(); conf.set(HADOOP_OUTPUT_OPTION, output); conf.setInt(HADOOP_TERMS_COUNT_OPTION, Integer.parseInt(cmdline.getOptionValue(COUNT_OPTION))); conf.set("mapreduce.map.memory.mb", "4096"); conf.set("mapreduce.map.java.opts", "-Xmx4096m"); conf.set("mapreduce.reduce.memory.mb", "4096"); conf.set("mapreduce.reduce.java.opts", "-Xmx4096m"); Job job = Job.getInstance(conf); job.setJobName(BuildDictionary.class.getSimpleName() + ":" + input); job.setJarByClass(BuildDictionary.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfIntLong.class); job.setOutputKeyClass(Text.class); job.setSortComparatorClass(DictionaryTransformationStrategy.WritableComparator.class); job.setMapperClass(Mapper.class); job.setReducerClass(MyReducer.class); FileSystem.get(getConf()).delete(new Path(output), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:io.bfscan.clueweb12.LMRetrieval.java
License:Apache License
/** * Runs this tool./*from w w w.ja v a 2s . co m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("input path (pfor format expected, add * to retrieve files)") .create(DOCVECTOR_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("queries").create(QUERIES_OPTION)); options.addOption( OptionBuilder.withArgName("float").hasArg().withDescription("smoothing").create(SMOOTHING)); options.addOption(OptionBuilder.withArgName("int").hasArg().withDescription("topk").create(TOPK)); options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg() .withDescription("preprocessing").create(PREPROCESSING)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(DOCVECTOR_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(SMOOTHING) || !cmdline.hasOption(TOPK) || !cmdline.hasOption(PREPROCESSING)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String docvector = cmdline.getOptionValue(DOCVECTOR_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION); String queries = cmdline.getOptionValue(QUERIES_OPTION); String smoothing = cmdline.getOptionValue(SMOOTHING); String topk = cmdline.getOptionValue(TOPK); String preprocessing = cmdline.getOptionValue(PREPROCESSING); LOG.info("Tool name: " + LMRetrieval.class.getSimpleName()); LOG.info(" - docvector: " + docvector); LOG.info(" - output: " + output); LOG.info(" - dictionary: " + dictionary); LOG.info(" - queries: " + queries); LOG.info(" - smoothing: " + smoothing); LOG.info(" - topk: " + topk); LOG.info(" - preprocessing: " + preprocessing); Configuration conf = getConf(); conf.set(DICTIONARY_OPTION, dictionary); conf.set(QUERIES_OPTION, queries); conf.setFloat(SMOOTHING, Float.parseFloat(smoothing)); conf.setInt(TOPK, Integer.parseInt(topk)); conf.set(PREPROCESSING, preprocessing); conf.set("mapreduce.map.memory.mb", "10048"); conf.set("mapreduce.map.java.opts", "-Xmx10048m"); conf.set("mapreduce.reduce.memory.mb", "10048"); conf.set("mapreduce.reduce.java.opts", "-Xmx10048m"); conf.set("mapred.task.timeout", "6000000"); // default is 600000 FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(output))) { fs.delete(new Path(output), true); } Job job = new Job(conf, LMRetrieval.class.getSimpleName() + ":" + docvector); job.setJarByClass(LMRetrieval.class); FileInputFormat.setInputPaths(job, docvector); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(PairOfIntString.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }