List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:com.yahoo.glimmer.indexing.generator.TripleIndexGenerator.java
License:Open Source License
public int run(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(TripleIndexGenerator.class.getName(), "Generates a keyword index from RDF data.", new Parameter[] { new Switch(NO_CONTEXTS_ARG, 'C', "withoutContexts", "Don't process the contexts for each tuple."), new FlaggedOption(METHOD_ARG, JSAP.STRING_PARSER, "horizontal", JSAP.REQUIRED, 'm', METHOD_ARG, "horizontal or vertical."), new FlaggedOption(PREDICATES_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', PREDICATES_ARG, "Subset of the properties to be indexed."), new FlaggedOption(RESOURCE_PREFIX_ARG, JSAP.STRING_PARSER, "@", JSAP.NOT_REQUIRED, 'r', RESOURCE_PREFIX_ARG, "Prefix to add to object resource hash values when indexing. Stops queries for numbers matching resource hash values. Default is '@'"), new UnflaggedOption("input", JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the input data."), new UnflaggedOption(NUMBER_OF_DOCS_ARG, JSAP.LONG_PARSER, JSAP.REQUIRED, "Number of documents to index"), new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the output."), new UnflaggedOption(RESOURCES_HASH_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location of the resources hash file."), }); JSAPResult jsapResult = jsap.parse(args); // check whether the command line was valid, and if it wasn't, // display usage information and exit. if (!jsapResult.success()) { System.err.println();/*ww w . j ava 2 s.co m*/ System.err.println("Usage: java " + TripleIndexGenerator.class.getName()); System.err.println(" " + jsap.getUsage()); System.err.println(); System.exit(1); } Job job = Job.getInstance(getConf()); job.setJarByClass(TripleIndexGenerator.class); job.setJobName("TripleIndexGenerator" + System.currentTimeMillis()); FileInputFormat.setInputPaths(job, new Path(jsapResult.getString("input"))); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(DocumentMapper.class); job.setMapOutputKeyClass(TermKey.class); job.setMapOutputValueClass(TermValue.class); job.setPartitionerClass(TermKey.FirstPartitioner.class); job.setGroupingComparatorClass(TermKey.FirstGroupingComparator.class); job.setReducerClass(TermReduce.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IndexRecordWriterValue.class); job.setOutputFormatClass(IndexRecordWriter.OutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(jsapResult.getString("output"))); Configuration conf = job.getConfiguration(); conf.setClass("mapred.output.key.comparator.class", TermKey.Comparator.class, WritableComparator.class); conf.set("mapreduce.user.classpath.first", "true"); long numDocs = jsapResult.getLong(NUMBER_OF_DOCS_ARG); conf.setLong(NUMBER_OF_DOCUMENTS, numDocs); // Set this in a attempt to get around the 2GB of ram task limit on our cluster. // Setting this in the hope of fixing Direct buffer memory errors conf.setInt(INDEX_WRITER_CACHE_SIZE, 1024 * 1024); conf.set(OUTPUT_DIR, jsapResult.getString("output")); boolean withContexts = !jsapResult.getBoolean(NO_CONTEXTS_ARG, false); if (jsapResult.getString(METHOD_ARG).equalsIgnoreCase(METHOD_ARG_VALUE_HORIZONTAL)) { HorizontalDocumentFactory.setupConf(conf, withContexts, jsapResult.getString(RESOURCES_HASH_ARG), jsapResult.getString(RESOURCE_PREFIX_ARG)); } else if (jsapResult.getString(METHOD_ARG).equalsIgnoreCase(METHOD_ARG_VALUE_VERTICAL)) { if (!jsapResult.contains(PREDICATES_ARG)) { throw new IllegalArgumentException("When '" + METHOD_ARG + "' is '" + METHOD_ARG_VALUE_VERTICAL + "' you have to give a predicates file too."); } VerticalDocumentFactory.setupConf(conf, withContexts, jsapResult.getString(RESOURCES_HASH_ARG), jsapResult.getString(RESOURCE_PREFIX_ARG), jsapResult.getString(PREDICATES_ARG)); } else { throw new IllegalArgumentException(METHOD_ARG + " should be '" + METHOD_ARG_VALUE_HORIZONTAL + "' or '" + METHOD_ARG_VALUE_VERTICAL + "'"); } conf.setInt("mapreduce.input.linerecordreader.line.maxlength", 1024 * 1024); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.yahoo.storm.yarn.TestIntegration.java
License:Open Source License
@SuppressWarnings({ "rawtypes", "unchecked" }) @BeforeClass// w w w .ja v a 2 s.co m public static void setup() { try { zkServer = new EmbeddedZKServer(); zkServer.start(); LOG.info("Starting up MiniYARN cluster"); if (yarnCluster == null) { yarnCluster = new MiniYARNCluster(TestIntegration.class.getName(), 2, 1, 1); Configuration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 2 * 1024); yarnCluster.init(conf); yarnCluster.start(); } sleep(2000); Configuration miniyarn_conf = yarnCluster.getConfig(); yarn_site_xml = testConf.createYarnSiteConfig(miniyarn_conf); storm_home = testConf.stormHomePath(); LOG.info("Will be using storm found on PATH at " + storm_home); //create a storm configuration file with zkport final Map storm_conf = Config.readStormConfig(); storm_conf.put(backtype.storm.Config.STORM_ZOOKEEPER_PORT, zkServer.port()); storm_conf_file = testConf.createConfigFile(storm_conf); List<String> cmd = java.util.Arrays.asList("bin/storm-yarn", "launch", storm_conf_file.toString(), "--stormZip", "lib/storm.zip", "--appname", "storm-on-yarn-test", "--output", "target/appId.txt"); execute(cmd); //wait for Storm cluster to be fully luanched sleep(15000); BufferedReader reader = new BufferedReader(new FileReader("target/appId.txt")); appId = reader.readLine(); reader.close(); if (appId != null) appId = appId.trim(); LOG.info("application ID:" + appId); } catch (Exception ex) { LOG.error("setup failure", ex); Assert.assertEquals(null, ex); } }
From source file:com.zjy.mongo.util.MongoConfigUtil.java
License:Apache License
/** * Set the maximum number of documents that should be loaded into memory * and sent in a batch to MongoDB as the output of a job. * @param conf the Configuration/*from ww w .j a va 2s . c o m*/ * @param size the number of documents */ public static void setBatchSize(final Configuration conf, final int size) { conf.setInt(OUTPUT_BATCH_SIZE, size); }
From source file:com.zjy.mongo.util.MongoConfigUtil.java
License:Apache License
public static Configuration buildConfiguration(final Map<String, Object> data) { Configuration newConf = new Configuration(); for (Entry<String, Object> entry : data.entrySet()) { String key = entry.getKey(); Object val = entry.getValue(); if (val instanceof String) { newConf.set(key, (String) val); } else if (val instanceof Boolean) { newConf.setBoolean(key, (Boolean) val); } else if (val instanceof Integer) { newConf.setInt(key, (Integer) val); } else if (val instanceof Float) { newConf.setFloat(key, (Float) val); } else if (val instanceof DBObject) { setDBObject(newConf, key, (DBObject) val); } else {/*from www . java 2 s .co m*/ throw new RuntimeException("can't convert " + val.getClass() + " into any type for Configuration"); } } return newConf; }
From source file:Compress.TestLZO.java
License:Open Source License
public static void main(String[] argv) throws IOException { System.out.println(System.getProperty("java.library.path")); Configuration conf = new Configuration(); conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024); LzoCodec codec = new LzoCodec(); codec.setConf(conf);/* w ww . ja v a2 s.c om*/ OutputStream out = new DataOutputBuffer(); CompressionOutputStream out2 = codec.createOutputStream(out); byte[] str2 = new byte[20]; int num = 10000; int before = 0; String part = "hello konten hello konten"; for (long i = 0; i < num; i++) { Util.long2bytes(str2, i); out2.write(str2, 0, 8); } out2.finish(); byte[] buffer = ((DataOutputBuffer) out).getData(); System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out).getLength()); InputStream in = new DataInputBuffer(); ((DataInputBuffer) in).reset(((DataOutputBuffer) out).getData(), 0, ((DataOutputBuffer) out).getLength()); CompressionInputStream in2 = codec.createInputStream(in); byte[] buf = new byte[100]; for (long i = 0; i < num; i++) { int count = 0; count = in2.read(buf, 0, 8); if (count > 0) { long value = Util.bytes2long(buf, 0, 8); if (value != i) { System.out.println(i + ",count:" + count + ",value:" + value); } else if (i > (num - 20)) { System.out.println(i + ",value:" + value); } } else { System.out.println("count:" + count + ", string " + i); break; } } in2.close(); System.out.println("test compress array..."); OutputStream out3 = new DataOutputBuffer(); CompressionOutputStream out4 = codec.createOutputStream(out3); DataOutputBuffer tout3 = new DataOutputBuffer(); for (long i = 0; i < num; i++) { Util.long2bytes(str2, i); out4.write(str2, 0, 8); } out4.finish(); buffer = ((DataOutputBuffer) out3).getData(); System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out3).getLength()); InputStream in3 = new DataInputBuffer(); ((DataInputBuffer) in3).reset(((DataOutputBuffer) out3).getData(), 0, ((DataOutputBuffer) out3).getLength()); CompressionInputStream in4 = codec.createInputStream(in3); for (long i = 0; i < num; i++) { int count = 0; count = in4.read(buf, 0, 8); if (count > 0) { long value = Util.bytes2long(buf, 0, 8); if (value != i) { System.out.println(i + ",count:" + count + ",value:" + value); } if (i > (num - 20)) { System.out.println(i + ",value:" + value); } } else { System.out.println("count:" + count + ", string " + i); break; } } in2.close(); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java
License:Apache License
/** * pass1: generate collocations, ngrams//from w w w . j a v a2s .c o m */ private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams, int maxNGramSize, int reduceTasks, int minSupport, Window mode, int winsize) throws IOException, ClassNotFoundException, InterruptedException { Configuration con = new Configuration(baseConf); con.setBoolean(EMIT_UNIGRAMS, emitUnigrams); con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize); con.setInt(CollocReducer.MIN_SUPPORT, minSupport); con.set(WINDOW_TYPE, mode.toString()); con.setInt(WINDOW_SIZE, winsize); if (mode.toString().equalsIgnoreCase("DOCUMENT")) { con.setInt("mapred.job.map.memory.mb", 3000); con.set("mapred.child.java.opts", "-Xmx2900M"); con.set("mapred.reduce.child.java.opts", "-Xmx8000M"); con.setInt("mapred.job.reduce.memory.mb", 8120); } else { con.setInt("mapred.job.map.memory.mb", 2000); con.set("mapred.child.java.opts", "-Xmx1900M"); con.set("mapred.reduce.child.java.opts", "-Xmx2900M"); con.setInt("mapred.job.reduce.memory.mb", 3000); } con.setBoolean("mapred.compress.map.output", true); con.setStrings("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); con.setBoolean("mapred.compress.output", true); con.setStrings("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); con.setInt("mapred.task.timeout", 6000000); con.setInt("io.sort.factor", 50); con.setInt("mapreduce.map.tasks", 256); con.setInt("dfs.replication", 1); Job job = new Job(con); job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(GramKey.class); job.setMapOutputValueClass(Gram.class); job.setPartitionerClass(GramKeyPartitioner.class); job.setGroupingComparatorClass(GramKeyGroupComparator.class); job.setOutputKeyClass(Gram.class); job.setOutputValueClass(Gram.class); job.setCombinerClass(CollocCombiner.class); FileInputFormat.setInputPaths(job, input); Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY); FileOutputFormat.setOutputPath(job, outputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CollocMapper.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setReducerClass(CollocReducer.class); job.setNumReduceTasks(512); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue(); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java
License:Apache License
/** * pass2: perform the LLR calculation//from w ww.ja v a2 s. c om */ private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal, boolean emitUnigrams, float minValue, int reduceTasks) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal); conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams); conf.setFloat(AssocReducer.MIN_VALUE, minValue); conf.setInt("mapred.job.map.memory.mb", 1280); conf.setInt("mapred.job.reduce.memory.mb", 2560); conf.set("mapred.reduce.child.java.opts", "-Xmx2G"); conf.setInt("mapred.task.timeout", 6000000); conf.set(AssocReducer.ASSOC_METRIC, "llr"); Job job = new Job(conf); job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(Gram.class); job.setMapOutputValueClass(Gram.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY)); Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr"); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class); job.setReducerClass(AssocReducer.class); job.setNumReduceTasks(reduceTasks); // Defines additional single text based output 'text' for the job MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class); // Defines additional multi sequencefile based output 'sequence' for the // job MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
From source file:eagle.service.hbase.EmbeddedHbase.java
License:Apache License
public void start() { try {//from w ww . ja v a2 s . c o m util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); conf.setInt("test.hbase.zookeeper.property.clientPort", port); conf.set("zookeeper.znode.parent", znode); conf.setInt("hbase.zookeeper.property.maxClientCnxns", 200); conf.setInt("hbase.master.info.port", -1);//avoid port clobbering // start mini hbase cluster hBaseCluster = util.startMiniCluster(); Configuration config = hBaseCluster.getConf(); config.set("zookeeper.session.timeout", "120000"); config.set("hbase.zookeeper.property.tickTime", "6000"); config.set(HConstants.HBASE_CLIENT_PAUSE, "3000"); config.set(HConstants.HBASE_CLIENT_RETRIES_NUMBER, "1"); config.set(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, "60000"); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { shutdown(); } }); } catch (Throwable t) { LOG.error("Got an exception: ", t); } }
From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java
License:Apache License
public void testArchiving() throws Exception { System.out.println("starting archive test"); Configuration conf = new Configuration(); System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp")); MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null); FileSystem fileSys = dfs.getFileSystem(); fileSys.delete(OUTPUT_DIR, true);//nuke output dir writeASinkFile(conf, fileSys, INPUT_DIR, 1000); FileStatus fstat = fileSys.getFileStatus(INPUT_DIR); assertTrue(fstat.getLen() > 10);/*from w ww . j av a 2s . c o m*/ System.out.println("filesystem is " + fileSys.getUri()); conf.set("fs.default.name", fileSys.getUri().toString()); conf.setInt("io.sort.mb", 1); conf.setInt("io.sort.factor", 5); conf.setInt("mapred.tasktracker.map.tasks.maximum", 2); conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2); MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1); String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() }; JobConf jc = mr.createJobConf(new JobConf(conf)); assertEquals("true", jc.get("archive.groupByClusterName")); assertEquals(1, jc.getInt("io.sort.mb", 5)); int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs); assertEquals(0, returnVal); fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc")); assertTrue(fstat.getLen() > 10); Thread.sleep(1000); System.out.println("done!"); }
From source file:edu.cuhk.hccl.hadoop.HadoopApp.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args == null || args.length < 4) { System.out.println("Please specify parameters: input, output, domain, num-reducers!"); System.exit(-1);//from ww w .j a v a 2 s . c om } String input = args[0]; String output = args[1]; String domain = args[2]; int numReducers = Integer.parseInt(args[3]); float similarity = Float.parseFloat(args[4]); int range = Integer.parseInt(args[5]); Job job = new Job(new Configuration(), this.getClass().getSimpleName()); // Must below the line of job creation Configuration conf = job.getConfiguration(); // Reuse the JVM conf.setInt("mapred.job.reuse.jvm.num.tasks", -1); conf.setFloat("SIM_THRESHOLD", similarity); conf.setInt("SEARCH_RANGE", range); if (domain.equalsIgnoreCase("restaurant")) { conf.setStrings("ASPECTS", Constant.RESTAURANT_ASPECTS); job.setMapperClass(YelpMapper.class); job.setInputFormatClass(TextInputFormat.class); // args[4] is the business file to select matching business_ids to restaurant String busiFile = args[6]; DistributedCache.addCacheFile(new URI(busiFile), conf); } else if (domain.equalsIgnoreCase("hotel")) { conf.setStrings("ASPECTS", Constant.TRIPADVISOR_ASPECTS); job.setMapperClass(TripAdvisorMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); } else { System.out.println("Wrong domain type!"); System.exit(-1); } job.setJarByClass(HadoopApp.class); job.setReducerClass(ReviewReducer.class); job.setNumReduceTasks(numReducers); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(UserItemPair.class); job.setOutputValueClass(NounPhrase.class); // Delete output if exists Path outputDir = new Path(output); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(outputDir)) hdfs.delete(outputDir, true); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.waitForCompletion(true); return 0; }