List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:com.phantom.hadoop.examples.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); if (args.length == 0) { System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]"); ToolRunner.printGenericCommandUsage(System.out); return 2; }/*from www. java 2 s . co m*/ // check for passed parameters, otherwise use defaults int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH); int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT); int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH); for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-depth")) { depth = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-height")) { height = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-width")) { width = Integer.parseInt(args[++i].trim()); } } // now set the values within conf for M/R tasks to read, this // will ensure values are set preventing MAPREDUCE-4678 conf.setInt(Pentomino.WIDTH, width); conf.setInt(Pentomino.HEIGHT, height); conf.setInt(Pentomino.DEPTH, depth); Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS, OneSidedPentomino.class, Pentomino.class); int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { Job job = new Job(conf); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(PentMap.class); job.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); long inputSize = createInputDirectory(fileSys, input, pent, depth); // for forcing the number of maps FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps)); // the keys are the prefix strings job.setOutputKeyClass(Text.class); // the values are puzzle solutions job.setOutputValueClass(Text.class); job.setMapperClass(PentMap.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(1); return (job.waitForCompletion(true) ? 0 : 1); } finally { fileSys.delete(input, true); } }
From source file:com.phantom.hadoop.examples.pi.DistSum.java
License:Apache License
/** Create a job */ private Job createJob(String name, Summation sigma) throws IOException { final Job job = new Job(getConf(), parameters.remoteDir + "/" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(DistSum.class); jobconf.setInt(N_PARTS, parameters.nParts); SummationWritable.write(sigma, DistSum.class, jobconf); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job;// w w w.j a v a 2s.c om }
From source file:com.phantom.hadoop.examples.RandomTextWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything.//from w w w . j a va 2s. co m * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.phantom.hadoop.examples.RandomWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything.//from w w w . j a va2 s. c o m * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.philiphubbard.sabe.MRAssembler.java
License:Open Source License
public boolean run(Path inputPath, Path outputPath) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // Job.getInstance() copies the Configuration argument, so set its properties first. conf.setBoolean(MRVertex.CONFIG_ALLOW_EDGE_MULTIPLES, true); conf.setBoolean(MRVertex.CONFIG_COMPRESS_CHAIN_MULTIPLES_MUST_MATCH, false); conf.setInt(MRMerVertex.CONFIG_MER_LENGTH, vertexMerLength); conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true); conf.setInt(MRBuildVertices.CONFIG_COVERAGE, coverage); conf.setInt(MRCompressChains.CONFIG_TERMINATION_COUNT, 1); Job buildJob = Job.getInstance(conf); buildJob.setJobName("mrassemblerbuild"); Path buildInputPath = inputPath; Path buildOutputPath = new Path("sabe.MRAssemblerTmp"); System.out.println("sabe.MRAssembler starting vertex construction"); MRBuildMerVertices.setupJob(buildJob, buildInputPath, buildOutputPath); if (!buildJob.waitForCompletion(true)) return false; ////from ww w .j a v a 2 s . c o m Path compressInputPath = new Path(buildOutputPath.toString() + "/chain"); Path compressOutputPath = new Path(buildOutputPath.toString() + "/chainCompress"); int iter = 0; boolean keepGoing = true; MRCompressChains.beginIteration(); while (keepGoing) { Job compressJob = Job.getInstance(conf); compressJob.setJobName("mrassemblercompress"); System.out.println("sabe.MRAssembler starting compression iteration " + iter); MRCompressMerChains.setupIterationJob(compressJob, compressInputPath, compressOutputPath); if (!compressJob.waitForCompletion(true)) System.exit(1); iter++; keepGoing = MRCompressChains.continueIteration(compressJob, compressInputPath, compressOutputPath); } System.out.println("sabe.MRAssembler made " + iter + " compression iterations"); // Path branchPath = new Path(buildOutputPath.toString() + "/branch"); Path chainPath = compressOutputPath; FileSystem fileSystem = FileSystem.get(conf); Graph graph = buildCompressedGraph(conf, fileSystem, branchPath, chainPath); if (graph != null) { ArrayList<String> result = graph.assemble(); FSDataOutputStream out = fileSystem.create(outputPath); for (String seq : result) { out.writeBytes(seq); out.writeBytes("\n"); } } // fileSystem.delete(buildOutputPath, true); fileSystem.close(); return true; }
From source file:com.philiphubbard.sabe.MRMerVertexTest.java
License:Open Source License
public static void test() { System.out.println("Testing MRMerVertex:"); Configuration config = new Configuration(); config.setInt(MRMerVertex.CONFIG_MER_LENGTH, 5); String s1 = "ACGTA"; int m1 = Mer.toInt(s1); MRMerVertex mv1 = new MRMerVertex(m1, config); String s2 = "CGTAC"; int m2 = Mer.toInt(s2); MRMerVertex mv2 = new MRMerVertex(m2, config); String s3 = "GTACG"; int m3 = Mer.toInt(s3); MRMerVertex mv3 = new MRMerVertex(m3, config); String s4 = "TACGT"; int m4 = Mer.toInt(s4); MRMerVertex mv4 = new MRMerVertex(m4, config); String s5 = "ACGTT"; int m5 = Mer.toInt(s5); mv1.addEdgeTo(m2);//ww w. j av a 2 s .c o m mv2.addEdgeTo(m3); mv3.addEdgeTo(m4); mv4.addEdgeTo(m5); mv1.compressChain(mv2); String s1_2 = "ACGTAC"; int m1_2 = Mer.toInt(s1_2); MerString ms1_2 = new MerString(m1_2, 6); assert (mv1.getMerString().equals(ms1_2)); try { BytesWritable t1_2 = mv1.toWritable(MRVertex.EdgeFormat.EDGES_TO); MRMerVertex mv1a = new MRMerVertex(t1_2, config); assert (mv1a.equals(mv1)); assert (mv1a.getMerString().equals(ms1_2)); mv3.compressChain(mv4); String s3_4 = "GTACGT"; int m3_4 = Mer.toInt(s3_4); MerString ms3_4 = new MerString(m3_4, 6); assert (mv3.getMerString().equals(ms3_4)); BytesWritable t3_4 = mv3.toWritable(MRVertex.EdgeFormat.EDGES_TO); MRMerVertex mv3a = new MRMerVertex(t3_4, config); assert (mv3a.equals(mv3)); assert (mv3a.getMerString().equals(ms3_4)); mv1.compressChain(mv3); String s1_4 = "ACGTACGT"; int m1_4 = Mer.toInt(s1_4); MerString ms1_4 = new MerString(m1_4, 8); assert (mv1.getMerString().equals(ms1_4)); BytesWritable t1_4 = mv1.toWritable(MRVertex.EdgeFormat.EDGES_TO); MRMerVertex mv1b = new MRMerVertex(t1_4, config); assert (mv1b.equals(mv1)); assert (mv1b.getMerString().equals(ms1_4)); // config.setInt(MRMerVertex.CONFIG_MER_LENGTH, 3); int m10 = Mer.toInt("TCG"); MRMerVertex mv10 = new MRMerVertex(m10, config); BytesWritable t10a = mv10.toWritable(MRVertex.EdgeFormat.EDGES_TO); MRMerVertex mv10a = new MRMerVertex(t10a, config); assert (mv10.equals(mv10a)); int m11 = Mer.toInt("CGA"); MRMerVertex mv11 = new MRMerVertex(m11, config); mv10.addEdgeTo(m11); mv11.addEdgeTo(Mer.toInt("GAG")); mv10.compressChain(mv11); BytesWritable t10b = mv10.toWritable(MRVertex.EdgeFormat.EDGES_TO); MRMerVertex mv10b = new MRMerVertex(t10b, config); assert (mv10.equals(mv10b)); } catch (IOException exception) { System.out.println(exception.getMessage()); assert (false); } System.out.println("MRMerVertex passed."); }
From source file:com.pinterest.terrapin.server.TerrapinServerHandler.java
License:Apache License
public void start() throws Exception { String zookeeperQuorum = TerrapinUtil.getZKQuorumFromConf(configuration); int thriftPort = configuration.getInt(Constants.THRIFT_PORT, Constants.DEFAULT_THRIFT_PORT); // Connect to Helix. this.helixManager = HelixManagerFactory.getZKHelixManager( configuration.getString(Constants.HELIX_CLUSTER, Constants.HELIX_CLUSTER_NAME_DEFAULT), TerrapinUtil.getHelixInstanceFromHDFSHost(InetAddress.getLocalHost().getHostName()), InstanceType.PARTICIPANT, zookeeperQuorum); StateMachineEngine stateMach = this.helixManager.getStateMachineEngine(); // Create state model factory for HDFS. Configuration conf = new Configuration(); conf.set("fs.default.name", configuration.getString(Constants.HDFS_NAMENODE)); // Setup HDFS short circuit parameters. conf.setBoolean("dfs.client.read.shortcircuit", true); conf.setInt("dfs.client.read.shortcircuit.streams.cache.size", 5000); conf.setInt("dfs.client.read.shortcircuit.buffer.size", 131072); conf.set("dfs.domain.socket.path", "/var/run/hadoop-hdfs/dn._PORT"); FileSystem fs = FileSystem.get(conf); this.stateModelFactory = new OnlineOfflineStateModelFactory(this.configuration, resourcePartitionMap, new ReaderFactory(configuration, new HFileSystem(fs))); stateMach.registerStateModelFactory("OnlineOffline", this.stateModelFactory); this.helixManager.connect(); // Start up the thrift server for serving. startThriftServer(thriftPort);/*from www . j a v a 2s.c om*/ }
From source file:com.pinterest.terrapin.TerrapinUtil.java
License:Apache License
public static void setupConfiguration(Configuration conf, long dfsBlockSize, int dfsReplication) { conf.setInt("mapred.map.max.attempts", Constants.MAPRED_MAP_MAX_ATTEMPTS); conf.setInt("io.bytes.per.checksum", Constants.CHECKSUM_BYTES); long dfsBlockSizeAdjusted = dfsBlockSize; if (dfsBlockSize % Constants.CHECKSUM_BYTES != 0) { dfsBlockSizeAdjusted = (dfsBlockSize / Constants.CHECKSUM_BYTES + 1) * Constants.CHECKSUM_BYTES; }/*from www. j a v a 2 s .c om*/ conf.setLong("dfs.block.size", dfsBlockSizeAdjusted); conf.setInt("dfs.replication", dfsReplication); conf.set(Constants.HFILE_COMPRESSION, System.getProperty(Constants.HFILE_COMPRESSION, Constants.HFILE_COMPRESSION_DEFAULT)); conf.setInt(Constants.HFILE_BLOCKSIZE, Integer.parseInt( System.getProperty(Constants.HFILE_BLOCKSIZE, String.valueOf(Constants.HFILE_BLOCKSIZE_DEFAULT)))); }
From source file:com.placeiq.piqconnect.BlocksBuilder.java
License:Apache License
protected Job configStage1() throws Exception { FileSystem fs = FileSystem.get(getConf()); fs.delete(pathOutput, true); // useful ? Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.setBoolean(Constants.PROP_IS_VECTOR, isVector); conf.set("mapred.output.compression.type", "BLOCK"); // useful ? Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder"); job.setJarByClass(BlocksBuilder.class); job.setMapperClass(MapStage1.class); job.setReducerClass(RedStage1.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(BlockIndexWritable.class); job.setMapOutputValueClass(LightBlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); FileInputFormat.setInputPaths(job, pathEdges); SequenceFileOutputFormat.setOutputPath(job, pathOutput); SequenceFileOutputFormat.setCompressOutput(job, true); Runner.setCompression(job);/*from w ww . j a v a 2 s . c om*/ return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob1(Path input1, Path input2, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.set("mapred.output.compression.type", "BLOCK"); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1"); job.setJarByClass(Runner.class); job.setMapperClass(IterationStage1._Mapper.class); job.setReducerClass(IterationStage1._Reducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(IterationStage1.JoinKey.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(BlockWritable.class); job.setGroupingComparatorClass(IterationStage1.IndexComparator.class); job.setPartitionerClass(IterationStage1.IndexPartitioner.class); job.setSortComparatorClass(IterationStage1.SortComparator.class); FileInputFormat.setInputPaths(job, input1, input2); SequenceFileOutputFormat.setOutputPath(job, output); SequenceFileOutputFormat.setCompressOutput(job, true); setCompression(job);//from w w w .j av a 2s . c o m return job; }