List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:edu.stolaf.cs.wmrserver.ThriftServer.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); int port = conf.getInt("wmr.server.bind.port", 50100); SubmissionDatabase.connect(conf);/* w ww. j a va2s .c o m*/ JobServiceHandler service = new JobServiceHandler(new Configuration()); JobService.Processor processor = new JobService.Processor(service); TServerTransport transport = new TServerSocket(port); TServer server = new TSimpleServer(new Args(transport).processor(processor)); server.serve(); return 0; }
From source file:edu.uci.ics.pregelix.api.util.BspUtils.java
License:Apache License
/** * Get the specified frame size//from www . j av a 2 s . co m * * @param conf * the job configuration * @return the specified frame size; -1 if it is not set by users */ public static int getFrameSize(Configuration conf) { return conf.getInt(PregelixJob.FRAME_SIZE, -1); }
From source file:edu.uci.ics.pregelix.example.PageRankVertex2.java
License:Apache License
@Override public void configure(Configuration conf) { maxIteration = conf.getInt(ITERATIONS, 10); }
From source file:edu.ucla.sspace.hadoop.CooccurrenceExtractor.java
License:Open Source License
/** * Creates an unconfigured {@code CooccurrenceMapper}. *///w w w .j a v a 2s. c om public CooccurrenceExtractor(Configuration conf) { semanticFilter = new HashSet<String>(); windowSize = conf.getInt(WINDOW_SIZE_PROPERTY, DEFAULT_WINDOW_SIZE); }
From source file:edu.udel.mxv.MxvMap.java
@Override protected void setup(Mapper<LongWritable, Text, IntWritable, DoubleWritable>.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String input_vector = conf.get("vector.path"); x_i = new double[conf.getInt("vector.n", 0)]; FileSystem fs = FileSystem.get(URI.create(input_vector), conf); FileStatus[] status = fs.listStatus(new Path(input_vector)); for (int i = 0; i < status.length; ++i) { Path file = status[i].getPath(); System.out.println("status: " + i + " " + file.toString()); DataInputStream dis = new DataInputStream(fs.open(file)); String line = null;//from w ww. j a v a2 s.c om int count = 0; while ((line = dis.readLine()) != null) { String[] split_line = line.split(","); if (split_line.length == 2) { int pos = Integer.parseInt(split_line[0]); double val = Double.parseDouble(split_line[1]); x_i[pos++] = val; count++; } else LOG.error("Parse error in line: " + line); } LOG.info("Number of elements read for vector = " + count); } }
From source file:edu.umd.cloud9.webgraph.driver.wt10g.GenericExtractLinks.java
License:Apache License
@Override public int runTool() throws Exception { Configuration conf = getConf(); Job job = new Job(conf); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); String mappingFile = conf.get("Cloud9.DocnoMappingFile"); FileSystem fs = FileSystem.get(conf); if (!fs.exists(new Path(mappingFile))) { throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!"); }/*from w w w. j a v a 2s. c om*/ DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration()); job.setJobName("ExtractLinks"); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInt("mapred.task.timeout", 60000000); job.setNumReduceTasks(numReducers); job.setMapperClass(GenericExtractLinks.Map.class); job.setCombinerClass(GenericExtractLinks.Reduce.class); job.setReducerClass(GenericExtractLinks.Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ArrayListWritable.class); configer.applyJobConfig(job); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); recursivelyAddInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); LOG.info("ExtractLinks"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - mapping file: " + mappingFile); LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false)); job.waitForCompletion(true); return 0; }
From source file:edu.umd.cloud9.webgraph.TrecExtractLinks.java
License:Apache License
@Override public int runTool() throws Exception { Configuration conf = getConf(); conf.set("mapred.child.java.opts", "-Xmx3072m"); conf.setInt("mapred.task.timeout", 60000000); Job job = new Job(conf); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); String mappingFile = conf.get("Cloud9.DocnoMappingFile"); FileSystem fs = FileSystem.get(conf); if (!fs.exists(new Path(mappingFile))) { throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!"); }//from w w w .j ava 2 s . c om DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration()); job.setJobName("ExtractLinks"); job.setNumReduceTasks(numReducers); job.setJarByClass(TrecExtractLinks.class); job.setMapperClass(TrecExtractLinks.Map.class); job.setCombinerClass(TrecExtractLinks.Reduce.class); job.setReducerClass(TrecExtractLinks.Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ArrayListWritable.class); configer.applyJobConfig(job); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); recursivelyAddInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); LOG.info("ExtractLinks"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - mapping file: " + mappingFile); LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false)); job.waitForCompletion(true); return 0; }
From source file:edu.umn.cs.spatialHadoop.mapred.RandomShapeGenerator.java
License:Open Source License
/** * Initialize from a FileSplit//w w w . j av a 2 s . c om * @param job * @param split * @throws IOException */ @SuppressWarnings("unchecked") public RandomShapeGenerator(Configuration job, RandomInputFormat.GeneratedSplit split) throws IOException { this(split.length, OperationsParams.getShape(job, "mbr").getMBR(), SpatialSite.getDistributionType(job, "type", DistributionType.UNIFORM), job.getInt("rectsize", 100), split.index + job.getLong("seed", System.currentTimeMillis()), job.getFloat("thickness", 1)); setShape((S) SpatialSite.createStockShape(job)); }
From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java
License:Open Source License
/** * Initialize from a path and file range * @param job/*from www .ja v a2 s . c o m*/ * @param s * @param l * @param p * @throws IOException */ public SpatialRecordReader(Configuration job, long s, long l, Path p) throws IOException { this.start = s; this.end = s + l; this.path = p; LOG.info("Open a SpatialRecordReader to file: " + p + "[" + s + "," + (s + l) + ")"); this.fs = this.path.getFileSystem(job); this.directIn = fs.open(this.path); this.blockSize = fs.getFileStatus(this.path).getBlockSize(); this.cellMbr = new Rectangle(); codec = new CompressionCodecFactory(job).getCodec(this.path); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = cIn; start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; // take pos from compressed stream } else { in = codec.createInputStream(directIn, decompressor); filePosition = directIn; } } else { directIn.seek(start); in = directIn; filePosition = directIn; } this.pos = start; this.maxShapesInOneRead = job.getInt(SpatialSite.MaxShapesInOneRead, 1000000); this.maxBytesInOneRead = job.getInt(SpatialSite.MaxBytesInOneRead, 32 * 1024 * 1024); initializeReader(); }
From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = super.getSplits(job); Configuration jobConf = job.getConfiguration(); if (jobConf.getInt(CombineSplits, 1) > 1) { long t1 = System.currentTimeMillis(); int combine = jobConf.getInt(CombineSplits, 1); /*/* w w w .j ava 2s . c o m*/ * Combine splits to reduce number of map tasks. Currently, this is done * using a greedy algorithm that combines splits based on how many hosts * they share. * TODO: Use a graph clustering algorithm where each vertex represents a * split, and each edge is weighted with number of shared hosts between * the two splits */ Vector<Vector<FileSplit>> openSplits = new Vector<Vector<FileSplit>>(); int maxNumberOfSplits = (int) Math.ceil((float) splits.size() / combine); List<InputSplit> combinedSplits = new Vector<InputSplit>(); for (InputSplit split : splits) { FileSplit fsplit = (FileSplit) split; int maxSimilarity = -1; // Best similarity found so far int bestFit = -1; // Index of a random open split with max similarity int numMatches = 0; // Number of splits with max similarity for (int i = 0; i < openSplits.size(); i++) { Vector<FileSplit> splitList = openSplits.elementAt(i); int similarity = 0; for (FileSplit otherSplit : splitList) { for (String host1 : fsplit.getLocations()) for (String host2 : otherSplit.getLocations()) if (host1.equals(host2)) similarity++; } if (similarity > maxSimilarity) { maxSimilarity = similarity; bestFit = i; numMatches = 1; } else if (similarity == maxSimilarity) { numMatches++; // Replace with a probability () for a reservoir sample double random = Math.random(); if (random < (double) 1 / numMatches) { // Replace the element in the reservoir bestFit = i; } } } if (maxSimilarity > 0 || (openSplits.size() + combinedSplits.size()) >= maxNumberOfSplits) { // Good fit || cannot create more open splits, // add it to an existing open split. Vector<FileSplit> bestList = openSplits.elementAt(bestFit); bestList.add(fsplit); if (bestList.size() > combine) { // Reached threshold for this list. Add it to combined splits combinedSplits.add(FileSplitUtil.combineFileSplits(bestList, 0, bestList.size())); // Remove it from open splits openSplits.remove(bestFit); } } else { // Bad fit && can add a new split // Create a new open split just for this one Vector<FileSplit> newOpenSplit = new Vector<FileSplit>(); newOpenSplit.add(fsplit); openSplits.addElement(newOpenSplit); } } // Add all remaining open splits to the list of combined splits for (Vector<FileSplit> openSplit : openSplits) { combinedSplits.add(FileSplitUtil.combineFileSplits(openSplit, 0, openSplit.size())); } String msg = String.format("Combined %d splits into %d combined splits", splits.size(), combinedSplits.size()); splits.clear(); splits.addAll(combinedSplits); long t2 = System.currentTimeMillis(); LOG.info(msg + " in " + ((t2 - t1) / 1000.0) + " seconds"); } return splits; }