Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:edu.stolaf.cs.wmrserver.ThriftServer.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    int port = conf.getInt("wmr.server.bind.port", 50100);

    SubmissionDatabase.connect(conf);/*  w ww.  j  a va2s  .c o m*/

    JobServiceHandler service = new JobServiceHandler(new Configuration());
    JobService.Processor processor = new JobService.Processor(service);
    TServerTransport transport = new TServerSocket(port);
    TServer server = new TSimpleServer(new Args(transport).processor(processor));

    server.serve();

    return 0;
}

From source file:edu.uci.ics.pregelix.api.util.BspUtils.java

License:Apache License

/**
 * Get the specified frame size//from www  . j  av  a  2  s . co  m
 * 
 * @param conf
 *            the job configuration
 * @return the specified frame size; -1 if it is not set by users
 */
public static int getFrameSize(Configuration conf) {
    return conf.getInt(PregelixJob.FRAME_SIZE, -1);
}

From source file:edu.uci.ics.pregelix.example.PageRankVertex2.java

License:Apache License

@Override
public void configure(Configuration conf) {
    maxIteration = conf.getInt(ITERATIONS, 10);
}

From source file:edu.ucla.sspace.hadoop.CooccurrenceExtractor.java

License:Open Source License

/**
 * Creates an unconfigured {@code CooccurrenceMapper}.
 *///w w w .j  a  v  a  2s. c  om
public CooccurrenceExtractor(Configuration conf) {
    semanticFilter = new HashSet<String>();
    windowSize = conf.getInt(WINDOW_SIZE_PROPERTY, DEFAULT_WINDOW_SIZE);
}

From source file:edu.udel.mxv.MxvMap.java

@Override
protected void setup(Mapper<LongWritable, Text, IntWritable, DoubleWritable>.Context context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();

    String input_vector = conf.get("vector.path");
    x_i = new double[conf.getInt("vector.n", 0)];

    FileSystem fs = FileSystem.get(URI.create(input_vector), conf);
    FileStatus[] status = fs.listStatus(new Path(input_vector));
    for (int i = 0; i < status.length; ++i) {
        Path file = status[i].getPath();
        System.out.println("status: " + i + " " + file.toString());

        DataInputStream dis = new DataInputStream(fs.open(file));

        String line = null;//from w  ww.  j  a v a2  s.c om
        int count = 0;
        while ((line = dis.readLine()) != null) {
            String[] split_line = line.split(",");
            if (split_line.length == 2) {
                int pos = Integer.parseInt(split_line[0]);
                double val = Double.parseDouble(split_line[1]);
                x_i[pos++] = val;
                count++;
            } else
                LOG.error("Parse error in line: " + line);
        }

        LOG.info("Number of elements read for vector = " + count);
    }
}

From source file:edu.umd.cloud9.webgraph.driver.wt10g.GenericExtractLinks.java

License:Apache License

@Override
public int runTool() throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf);

    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(new Path(mappingFile))) {
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");
    }/*from   w w  w.  j a v  a 2s. c  om*/

    DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration());

    job.setJobName("ExtractLinks");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);

    job.setNumReduceTasks(numReducers);

    job.setMapperClass(GenericExtractLinks.Map.class);
    job.setCombinerClass(GenericExtractLinks.Reduce.class);
    job.setReducerClass(GenericExtractLinks.Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ArrayListWritable.class);

    configer.applyJobConfig(job);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    recursivelyAddInputPaths(job, inputPath);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    LOG.info("ExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    job.waitForCompletion(true);
    return 0;
}

From source file:edu.umd.cloud9.webgraph.TrecExtractLinks.java

License:Apache License

@Override
public int runTool() throws Exception {

    Configuration conf = getConf();
    conf.set("mapred.child.java.opts", "-Xmx3072m");
    conf.setInt("mapred.task.timeout", 60000000);
    Job job = new Job(conf);

    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(new Path(mappingFile))) {
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");
    }//from  w w w  .j  ava  2  s  . c om

    DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration());

    job.setJobName("ExtractLinks");
    job.setNumReduceTasks(numReducers);

    job.setJarByClass(TrecExtractLinks.class);
    job.setMapperClass(TrecExtractLinks.Map.class);
    job.setCombinerClass(TrecExtractLinks.Reduce.class);
    job.setReducerClass(TrecExtractLinks.Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ArrayListWritable.class);

    configer.applyJobConfig(job);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    recursivelyAddInputPaths(job, inputPath);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    LOG.info("ExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    job.waitForCompletion(true);
    return 0;
}

From source file:edu.umn.cs.spatialHadoop.mapred.RandomShapeGenerator.java

License:Open Source License

/**
 * Initialize from a FileSplit//w  w w .  j av a  2 s .  c om
 * @param job
 * @param split
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public RandomShapeGenerator(Configuration job, RandomInputFormat.GeneratedSplit split) throws IOException {
    this(split.length, OperationsParams.getShape(job, "mbr").getMBR(),
            SpatialSite.getDistributionType(job, "type", DistributionType.UNIFORM), job.getInt("rectsize", 100),
            split.index + job.getLong("seed", System.currentTimeMillis()), job.getFloat("thickness", 1));
    setShape((S) SpatialSite.createStockShape(job));
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java

License:Open Source License

/**
 * Initialize from a path and file range
 * @param job/*from www  .ja  v  a2 s  . c o  m*/
 * @param s
 * @param l
 * @param p
 * @throws IOException
 */
public SpatialRecordReader(Configuration job, long s, long l, Path p) throws IOException {
    this.start = s;
    this.end = s + l;
    this.path = p;
    LOG.info("Open a SpatialRecordReader to file: " + p + "[" + s + "," + (s + l) + ")");
    this.fs = this.path.getFileSystem(job);
    this.directIn = fs.open(this.path);
    this.blockSize = fs.getFileStatus(this.path).getBlockSize();
    this.cellMbr = new Rectangle();

    codec = new CompressionCodecFactory(job).getCodec(this.path);

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = cIn;
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = codec.createInputStream(directIn, decompressor);
            filePosition = directIn;
        }
    } else {
        directIn.seek(start);
        in = directIn;
        filePosition = directIn;
    }
    this.pos = start;
    this.maxShapesInOneRead = job.getInt(SpatialSite.MaxShapesInOneRead, 1000000);
    this.maxBytesInOneRead = job.getInt(SpatialSite.MaxBytesInOneRead, 32 * 1024 * 1024);

    initializeReader();
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = super.getSplits(job);
    Configuration jobConf = job.getConfiguration();
    if (jobConf.getInt(CombineSplits, 1) > 1) {
        long t1 = System.currentTimeMillis();
        int combine = jobConf.getInt(CombineSplits, 1);
        /*/*  w  w  w  .j  ava  2s  .  c o  m*/
         * Combine splits to reduce number of map tasks. Currently, this is done
         * using a greedy algorithm that combines splits based on how many hosts
         * they share.
         * TODO: Use a graph clustering algorithm where each vertex represents a
         * split, and each edge is weighted with number of shared hosts between
         * the two splits
         */
        Vector<Vector<FileSplit>> openSplits = new Vector<Vector<FileSplit>>();
        int maxNumberOfSplits = (int) Math.ceil((float) splits.size() / combine);
        List<InputSplit> combinedSplits = new Vector<InputSplit>();
        for (InputSplit split : splits) {
            FileSplit fsplit = (FileSplit) split;
            int maxSimilarity = -1; // Best similarity found so far
            int bestFit = -1; // Index of a random open split with max similarity
            int numMatches = 0; // Number of splits with max similarity
            for (int i = 0; i < openSplits.size(); i++) {
                Vector<FileSplit> splitList = openSplits.elementAt(i);
                int similarity = 0;
                for (FileSplit otherSplit : splitList) {
                    for (String host1 : fsplit.getLocations())
                        for (String host2 : otherSplit.getLocations())
                            if (host1.equals(host2))
                                similarity++;
                }
                if (similarity > maxSimilarity) {
                    maxSimilarity = similarity;
                    bestFit = i;
                    numMatches = 1;
                } else if (similarity == maxSimilarity) {
                    numMatches++;
                    // Replace with a probability () for a reservoir sample
                    double random = Math.random();
                    if (random < (double) 1 / numMatches) {
                        // Replace the element in the reservoir
                        bestFit = i;
                    }
                }
            }
            if (maxSimilarity > 0 || (openSplits.size() + combinedSplits.size()) >= maxNumberOfSplits) {
                // Good fit || cannot create more open splits,
                // add it to an existing open split.
                Vector<FileSplit> bestList = openSplits.elementAt(bestFit);
                bestList.add(fsplit);
                if (bestList.size() > combine) {
                    // Reached threshold for this list. Add it to combined splits
                    combinedSplits.add(FileSplitUtil.combineFileSplits(bestList, 0, bestList.size()));
                    // Remove it from open splits
                    openSplits.remove(bestFit);
                }
            } else {
                // Bad fit && can add a new split
                // Create a new open split just for this one
                Vector<FileSplit> newOpenSplit = new Vector<FileSplit>();
                newOpenSplit.add(fsplit);
                openSplits.addElement(newOpenSplit);
            }
        }

        // Add all remaining open splits to the list of combined splits
        for (Vector<FileSplit> openSplit : openSplits) {
            combinedSplits.add(FileSplitUtil.combineFileSplits(openSplit, 0, openSplit.size()));
        }

        String msg = String.format("Combined %d splits into %d combined splits", splits.size(),
                combinedSplits.size());
        splits.clear();
        splits.addAll(combinedSplits);
        long t2 = System.currentTimeMillis();
        LOG.info(msg + " in " + ((t2 - t1) / 1000.0) + " seconds");
    }
    return splits;
}