List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.odiago.flumebase.exec.HashJoinElement.java
License:Apache License
public HashJoinElement(FlowElementContext ctxt, String leftName, String rightName, TypedField leftKey, TypedField rightKey, WindowSpec windowWidth, String outName, List<TypedField> leftFieldNames, List<TypedField> rightFieldNames, Configuration conf) { super(ctxt);// w w w.j a v a 2 s . c o m mSlackTime = conf.getInt(BucketedAggregationElement.SLACK_INTERVAL_KEY, BucketedAggregationElement.DEFAULT_SLACK_INTERVAL); if (mSlackTime < 0) { mSlackTime = BucketedAggregationElement.DEFAULT_SLACK_INTERVAL; } mLeftMap = new WindowedHashMap<Object, EventWrapper, Long>(); mRightMap = new WindowedHashMap<Object, EventWrapper, Long>(); mLeftName = leftName; mRightName = rightName; mLeftKey = leftKey; mRightKey = rightKey; mWindowWidth = windowWidth; try { assert mWindowWidth.getRangeSpec().isConstant(); mTimeSpan = (TimeSpan) mWindowWidth.getRangeSpec().eval(new EmptyEventWrapper()); } catch (IOException ioe) { // This should be a constant expression, so this would be quite surprising. LOG.error("Unexpected IOE during timespan eval() in HashJoin: " + ioe); } mOutName = outName; initFieldMap(leftFieldNames, rightFieldNames); }
From source file:com.phantom.hadoop.examples.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); if (args.length == 0) { System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]"); ToolRunner.printGenericCommandUsage(System.out); return 2; }//from www . jav a 2 s . com // check for passed parameters, otherwise use defaults int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH); int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT); int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH); for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-depth")) { depth = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-height")) { height = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-width")) { width = Integer.parseInt(args[++i].trim()); } } // now set the values within conf for M/R tasks to read, this // will ensure values are set preventing MAPREDUCE-4678 conf.setInt(Pentomino.WIDTH, width); conf.setInt(Pentomino.HEIGHT, height); conf.setInt(Pentomino.DEPTH, depth); Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS, OneSidedPentomino.class, Pentomino.class); int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { Job job = new Job(conf); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(PentMap.class); job.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); long inputSize = createInputDirectory(fileSys, input, pent, depth); // for forcing the number of maps FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps)); // the keys are the prefix strings job.setOutputKeyClass(Text.class); // the values are puzzle solutions job.setOutputValueClass(Text.class); job.setMapperClass(PentMap.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(1); return (job.waitForCompletion(true) ? 0 : 1); } finally { fileSys.delete(input, true); } }
From source file:com.phantom.hadoop.examples.RandomTextWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything./* w ww . j a va2 s . co m*/ * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.phantom.hadoop.examples.RandomWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything.// www. j a v a 2 s . c o m * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * /*from w w w. j a v a2 s . c o m*/ * @param job * the job to sample * @param partFile * where to write the output file to * @throws Throwable * if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:com.philiphubbard.sabe.MRMerVertex.java
License:Open Source License
public MRMerVertex(int id, Configuration config) { super(id, config); merString = new MerString(id, config.getInt(CONFIG_MER_LENGTH, 1)); }
From source file:com.philiphubbard.sabe.MRMerVertex.java
License:Open Source License
public String toDisplayString(Configuration config) { StringBuilder s = new StringBuilder(); s.append("MRMerVertex "); s.append(getId());/*from www. j a v a2 s . co m*/ s.append(" ("); int merLength = config.getInt(CONFIG_MER_LENGTH, 1); s.append(Mer.fromInt(getId(), merLength)); s.append(") "); MRVertex.AdjacencyIterator toIt = createToAdjacencyIterator(); if (toIt.begin() != NO_VERTEX) { s.append("; to: "); for (int to = toIt.begin(); !toIt.done(); to = toIt.next()) { s.append(to); s.append(" "); } } MRVertex.AdjacencyIterator fromIt = createFromAdjacencyIterator(); if (fromIt.begin() != NO_VERTEX) { s.append("; from: "); for (int from = fromIt.begin(); !fromIt.done(); from = fromIt.next()) { s.append(from); s.append(" "); } } if (merString != null) { s.append("; mer "); s.append(merString.toDisplayString()); } return s.toString(); }
From source file:com.philiphubbard.sabe.MRMerVertex.java
License:Open Source License
@Override protected void compressChainInternal(MRVertex other, Configuration config) { if (other instanceof MRMerVertex) { MRMerVertex otherMer = (MRMerVertex) other; int merLength = config.getInt(CONFIG_MER_LENGTH, 1); if (merString == null) merString = new MerString(getId(), merLength); MerString otherMerString = otherMer.merString; if (otherMerString == null) otherMerString = new MerString(other.getId(), merLength); merString.merge(otherMerString, merLength - 1); }/*from w w w . j a v a 2s.co m*/ }
From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java
License:Apache License
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputDir.getFileSystem(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); // Default to snappy. Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION)); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize) .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId())) .withCompression(compressionAlgorithm).build(); return new HFileRecordWriter(writer); }
From source file:com.pinterest.terrapin.hadoop.HFileRecordWriterTest.java
License:Apache License
@Test public void testWrite() throws Exception { Configuration conf = new Configuration(); HColumnDescriptor columnDescriptor = new HColumnDescriptor(); // Disable block cache to ensure it reads the actual file content. columnDescriptor.setBlockCacheEnabled(false); FileSystem fs = FileSystem.get(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf, columnDescriptor), fs, blockSize).withFilePath(new Path(tempFile.toURI())).build(); /* Create our RecordWriter */ RecordWriter<BytesWritable, BytesWritable> hfileWriter = new HFileRecordWriter(writer); List<String> keys = Lists.newArrayList(); List<String> values = Lists.newArrayList(); for (int i = 0; i < 100; ++i) { String key = String.format("%03d", i); String val = "value " + i; keys.add(key);//from w w w . j a va2 s . c o m values.add(val); hfileWriter.write(new BytesWritable(key.getBytes()), new BytesWritable(val.getBytes())); } /* This internally closes the StoreFile.Writer */ hfileWriter.close(null); HFile.Reader reader = HFile.createReader(fs, new Path(tempFile.toURI()), new CacheConfig(conf, columnDescriptor)); HFileScanner scanner = reader.getScanner(false, false, false); boolean valid = scanner.seekTo(); List<String> gotKeys = Lists.newArrayListWithCapacity(keys.size()); List<String> gotValues = Lists.newArrayListWithCapacity(values.size()); while (valid) { KeyValue keyValue = scanner.getKeyValue(); gotKeys.add(new String(keyValue.getRow())); gotValues.add(new String(keyValue.getValue())); valid = scanner.next(); } assertEquals(keys, gotKeys); assertEquals(values, gotValues); reader.close(); }