List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:nl.utwente.mirex.AnchorExtract.java
License:Open Source License
/** * Runs the MapReduce job "anchor text extraction" * @param args 0: path to web collection on HDFS; 1: (non-existing) path that will contain anchor texts * @usage. /*w ww .j a va2s . co m*/ * <code> hadoop jar mirex-0.2.jar nl.utwente.mirex.AnchorExtract /user/hadoop/ClueWeb09_English/*/ /user/hadoop/ClueWeb09_Anchors </code> */ public static void main(String[] args) throws Exception { // Set job configuration Configuration conf = new Configuration(); conf.setLong("mapred.task.timeout", 1800 * 1000L); // 30 minutes timeout Job job = new Job(conf, "AnchorExtract"); job.setJarByClass(AnchorExtract.class); if (args.length != 2) { System.out.printf("Usage: %s inputFiles outputFile\n", AnchorExtract.class.getSimpleName()); System.out.println(" inputFiles: path to data"); System.out.println(" outputFile: directory where anchor text is stored"); System.exit(1); } int argc = 0; String inputFiles = args[argc++]; String outputFile = args[argc++]; job.setMapperClass(Map.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(Combine.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(WarcFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputFiles)); // '(conf, args[0])' to accept comma-separated list. FileOutputFormat.setOutputPath(job, new Path(outputFile)); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.waitForCompletion(true); }
From source file:org.apache.accumulo.examples.mapreduce.TeraSortIngest.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("TeraSortCloud"); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(TeraSortIngest.class.getName(), args); job.setInputFormatClass(RangeInputFormat.class); job.setMapperClass(SortGenMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0);/*from ww w .j av a 2 s . c om*/ job.setOutputFormatClass(AccumuloOutputFormat.class); opts.setAccumuloConfigs(job); BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); Configuration conf = job.getConfiguration(); conf.setLong(NUMROWS, opts.numRows); conf.setInt("cloudgen.minkeylength", opts.minKeyLength); conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength); conf.setInt("cloudgen.minvaluelength", opts.minValueLength); conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength); conf.set("cloudgen.tablename", opts.getTableName()); if (opts.splits != 0) conf.setInt(NUMSPLITS, opts.splits); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.examples.simple.mapreduce.TeraSortIngest.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobUtil.getJob(getConf()); job.setJobName("TeraSortCloud"); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(TeraSortIngest.class.getName(), args); job.setInputFormatClass(RangeInputFormat.class); job.setMapperClass(SortGenMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0);/*w w w. ja v a 2s . co m*/ job.setOutputFormatClass(AccumuloOutputFormat.class); opts.setAccumuloConfigs(job); BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); Configuration conf = job.getConfiguration(); conf.setLong(NUMROWS, opts.numRows); conf.setInt("cloudgen.minkeylength", opts.minKeyLength); conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength); conf.setInt("cloudgen.minvaluelength", opts.minValueLength); conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength); conf.set("cloudgen.tablename", opts.getTableName()); if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.examples.wikisearch.output.SortingRFileOutputFormat.java
License:Apache License
public static void setMaxBufferSize(Configuration conf, long maxBufferSize) { conf.setLong(MAX_BUFFER_SIZE, maxBufferSize); }
From source file:org.apache.accumulo.server.test.continuous.ContinuousMoru.java
License:Apache License
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length != 13) { throw new IllegalArgumentException("Usage : " + ContinuousMoru.class.getName() + " <instance name> <zookeepers> <user> <pass> <table> <min> <max> <max cf> <max cq> <max mem> <max latency> <num threads> <max maps>"); }//from ww w . j ava2 s . c o m String instance = args[0]; String zookeepers = args[1]; String user = args[2]; String pass = args[3]; String table = args[4]; String min = args[5]; String max = args[6]; String max_cf = args[7]; String max_cq = args[8]; String maxMem = args[9]; String maxLatency = args[10]; String numThreads = args[11]; String maxMaps = args[12]; Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setInputInfo(job.getConfiguration(), user, pass.getBytes(), table, new Authorizations()); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), instance, zookeepers); // set up ranges try { AccumuloInputFormat.setRanges(job.getConfiguration(), new ZooKeeperInstance(instance, zookeepers).getConnector(user, pass.getBytes()) .tableOperations().splitRangeByTablets(table, new Range(), Integer.parseInt(maxMaps))); AccumuloInputFormat.disableAutoAdjustRanges(job.getConfiguration()); } catch (Exception e) { throw new IOException(e); } job.setMapperClass(CMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); Configuration conf = job.getConfiguration(); AccumuloOutputFormat.setOutputInfo(conf, user, pass.getBytes(), false, table); AccumuloOutputFormat.setZooKeeperInstance(conf, instance, zookeepers); AccumuloOutputFormat.setMaxLatency(conf, (int) (Integer.parseInt(maxLatency) / 1000.0)); AccumuloOutputFormat.setMaxMutationBufferSize(conf, Long.parseLong(maxMem)); AccumuloOutputFormat.setMaxWriteThreads(conf, Integer.parseInt(numThreads)); conf.setLong(MIN, Long.parseLong(min)); conf.setLong(MAX, Long.parseLong(max)); conf.setInt(MAX_CF, Integer.parseInt(max_cf)); conf.setInt(MAX_CQ, Integer.parseInt(max_cq)); conf.set(CI_ID, UUID.randomUUID().toString()); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.test.continuous.ContinuousMoru.java
License:Apache License
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException { Opts opts = new Opts(); BatchWriterOpts bwOpts = new BatchWriterOpts(); MapReduceClientOnDefaultTable clientOpts = new MapReduceClientOnDefaultTable("ci"); clientOpts.parseArgs(ContinuousMoru.class.getName(), args, bwOpts, opts); Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); clientOpts.setAccumuloConfigs(job);// w w w . ja va2 s. co m // set up ranges try { Set<Range> ranges = clientOpts.getConnector().tableOperations() .splitRangeByTablets(clientOpts.getTableName(), new Range(), opts.maxMaps); AccumuloInputFormat.setRanges(job, ranges); AccumuloInputFormat.setAutoAdjustRanges(job, false); } catch (Exception e) { throw new IOException(e); } job.setMapperClass(CMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setBatchWriterOptions(job, bwOpts.getBatchWriterConfig()); Configuration conf = job.getConfiguration(); conf.setLong(MIN, opts.min); conf.setLong(MAX, opts.max); conf.setInt(MAX_CF, opts.maxColF); conf.setInt(MAX_CQ, opts.maxColQ); conf.set(CI_ID, UUID.randomUUID().toString()); job.waitForCompletion(true); clientOpts.stopTracing(); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.test.mapreduce.TeraSortIngest.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("TeraSortCloud"); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(TeraSortIngest.class.getName(), args); job.setInputFormatClass(RangeInputFormat.class); job.setMapperClass(SortGenMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0);/*ww w . jav a 2s .c om*/ job.setOutputFormatClass(AccumuloOutputFormat.class); opts.setAccumuloConfigs(job); BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); Configuration conf = job.getConfiguration(); conf.setLong(NUMROWS, opts.numRows); conf.setInt("cloudgen.minkeylength", opts.minKeyLength); conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength); conf.setInt("cloudgen.minvaluelength", opts.minValueLength); conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength); conf.set("cloudgen.tablename", opts.getTableName()); if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.test.mrit.IntegrationTestMapReduce.java
License:Apache License
@Override public int run(String[] args) throws Exception { // read a list of tests from the input, and print out the results if (args.length != 2) { System.err.println("Wrong number of args: <input> <output>"); return 1; }// ww w. j ava 2 s .c o m Configuration conf = getConf(); Job job = Job.getInstance(conf, "accumulo integration test runner"); conf = job.getConfiguration(); // some tests take more than 10 minutes conf.setLong(MRJobConfig.TASK_TIMEOUT, 20 * 60 * 1000); // minicluster uses a lot of ram conf.setInt(MRJobConfig.MAP_MEMORY_MB, 4000); // hadoop puts an ancient version of jline on the classpath conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); // no need to run a test multiple times job.setSpeculativeExecution(false); // read one line at a time job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.setNumLinesPerSplit(job, 1); // run the test job.setJarByClass(IntegrationTestMapReduce.class); job.setMapperClass(TestMapper.class); // group test by result code job.setReducerClass(TestReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.accumulo.testing.core.continuous.ContinuousMoru.java
License:Apache License
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException { Properties props = TestProps.loadFromFile(args[0]); ContinuousEnv env = new ContinuousEnv(props); Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setConnectorInfo(job, env.getAccumuloUserName(), env.getToken()); AccumuloInputFormat.setInputTableName(job, env.getAccumuloTableName()); AccumuloInputFormat.setZooKeeperInstance(job, env.getClientConfiguration()); int maxMaps = Integer.parseInt(props.getProperty(TestProps.CI_VERIFY_MAX_MAPS)); // set up ranges try {// www . j a v a2 s . co m Set<Range> ranges = env.getAccumuloConnector().tableOperations() .splitRangeByTablets(env.getAccumuloTableName(), new Range(), maxMaps); AccumuloInputFormat.setRanges(job, ranges); AccumuloInputFormat.setAutoAdjustRanges(job, false); } catch (Exception e) { throw new IOException(e); } job.setMapperClass(CMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setBatchWriterOptions(job, env.getBatchWriterConfig()); AccumuloOutputFormat.setConnectorInfo(job, env.getAccumuloUserName(), env.getToken()); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, env.getAccumuloTableName()); AccumuloOutputFormat.setZooKeeperInstance(job, env.getClientConfiguration()); Configuration conf = job.getConfiguration(); conf.setLong(MIN, env.getRowMin()); conf.setLong(MAX, env.getRowMax()); conf.setInt(MAX_CF, env.getMaxColF()); conf.setInt(MAX_CQ, env.getMaxColQ()); conf.set(CI_ID, UUID.randomUUID().toString()); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.apex.engine.YarnAppLauncherImpl.java
License:Apache License
private void setConfiguration(Configuration conf, String property, Object value) { if (value instanceof Integer) { conf.setInt(property, (Integer) value); } else if (value instanceof Boolean) { conf.setBoolean(property, (Boolean) value); } else if (value instanceof Long) { conf.setLong(property, (Long) value); } else if (value instanceof Float) { conf.setFloat(property, (Float) value); } else if (value instanceof Double) { conf.setDouble(property, (Double) value); } else {// ww w . j a v a2 s . c o m conf.set(property, value.toString()); } }