Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:com.odiago.flumebase.exec.HashJoinElement.java

License:Apache License

public HashJoinElement(FlowElementContext ctxt, String leftName, String rightName, TypedField leftKey,
        TypedField rightKey, WindowSpec windowWidth, String outName, List<TypedField> leftFieldNames,
        List<TypedField> rightFieldNames, Configuration conf) {
    super(ctxt);//  w w  w.j  a v  a 2  s .  c o  m

    mSlackTime = conf.getInt(BucketedAggregationElement.SLACK_INTERVAL_KEY,
            BucketedAggregationElement.DEFAULT_SLACK_INTERVAL);
    if (mSlackTime < 0) {
        mSlackTime = BucketedAggregationElement.DEFAULT_SLACK_INTERVAL;
    }

    mLeftMap = new WindowedHashMap<Object, EventWrapper, Long>();
    mRightMap = new WindowedHashMap<Object, EventWrapper, Long>();

    mLeftName = leftName;
    mRightName = rightName;
    mLeftKey = leftKey;
    mRightKey = rightKey;
    mWindowWidth = windowWidth;
    try {
        assert mWindowWidth.getRangeSpec().isConstant();
        mTimeSpan = (TimeSpan) mWindowWidth.getRangeSpec().eval(new EmptyEventWrapper());
    } catch (IOException ioe) {
        // This should be a constant expression, so this would be quite surprising.
        LOG.error("Unexpected IOE during timespan eval() in HashJoin: " + ioe);
    }
    mOutName = outName;

    initFieldMap(leftFieldNames, rightFieldNames);
}

From source file:com.phantom.hadoop.examples.dancing.DistributedPentomino.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    if (args.length == 0) {
        System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from  www  .  jav  a 2 s  .  com
    // check for passed parameters, otherwise use defaults
    int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
    int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
    int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-depth")) {
            depth = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-height")) {
            height = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-width")) {
            width = Integer.parseInt(args[++i].trim());
        }
    }
    // now set the values within conf for M/R tasks to read, this
    // will ensure values are set preventing MAPREDUCE-4678
    conf.setInt(Pentomino.WIDTH, width);
    conf.setInt(Pentomino.HEIGHT, height);
    conf.setInt(Pentomino.DEPTH, depth);
    Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS, OneSidedPentomino.class,
            Pentomino.class);
    int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        Job job = new Job(conf);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);
        job.setJarByClass(PentMap.class);

        job.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
        pent.initialize(width, height);
        long inputSize = createInputDirectory(fileSys, input, pent, depth);
        // for forcing the number of maps
        FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps));

        // the keys are the prefix strings
        job.setOutputKeyClass(Text.class);
        // the values are puzzle solutions
        job.setOutputValueClass(Text.class);

        job.setMapperClass(PentMap.class);
        job.setReducerClass(Reducer.class);

        job.setNumReduceTasks(1);

        return (job.waitForCompletion(true) ? 0 : 1);
    } finally {
        fileSys.delete(input, true);
    }
}

From source file:com.phantom.hadoop.examples.RandomTextWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything./*  w ww .  j a  va2 s  .  co  m*/
 * 
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        return printUsage();
    }

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = new Job(conf);

    job.setJarByClass(RandomTextWriter.class);
    job.setJobName("random-text-writer");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
    job.setMapperClass(RandomTextMapper.class);

    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}

From source file:com.phantom.hadoop.examples.RandomWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything.//  www. j a v  a 2  s  .  c o m
 * 
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }

    Path outDir = new Path(args[0]);
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = new Job(conf);

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(RandomInputFormat.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * /*from   w  w w.  j a  v a2  s .  c o  m*/
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.philiphubbard.sabe.MRMerVertex.java

License:Open Source License

public MRMerVertex(int id, Configuration config) {
    super(id, config);
    merString = new MerString(id, config.getInt(CONFIG_MER_LENGTH, 1));
}

From source file:com.philiphubbard.sabe.MRMerVertex.java

License:Open Source License

public String toDisplayString(Configuration config) {
    StringBuilder s = new StringBuilder();

    s.append("MRMerVertex ");
    s.append(getId());/*from www. j  a  v  a2 s . co  m*/

    s.append(" (");
    int merLength = config.getInt(CONFIG_MER_LENGTH, 1);
    s.append(Mer.fromInt(getId(), merLength));
    s.append(") ");

    MRVertex.AdjacencyIterator toIt = createToAdjacencyIterator();
    if (toIt.begin() != NO_VERTEX) {
        s.append("; to: ");
        for (int to = toIt.begin(); !toIt.done(); to = toIt.next()) {
            s.append(to);
            s.append(" ");
        }
    }

    MRVertex.AdjacencyIterator fromIt = createFromAdjacencyIterator();
    if (fromIt.begin() != NO_VERTEX) {
        s.append("; from: ");
        for (int from = fromIt.begin(); !fromIt.done(); from = fromIt.next()) {
            s.append(from);
            s.append(" ");
        }
    }

    if (merString != null) {
        s.append("; mer ");
        s.append(merString.toDisplayString());
    }

    return s.toString();
}

From source file:com.philiphubbard.sabe.MRMerVertex.java

License:Open Source License

@Override
protected void compressChainInternal(MRVertex other, Configuration config) {
    if (other instanceof MRMerVertex) {
        MRMerVertex otherMer = (MRMerVertex) other;

        int merLength = config.getInt(CONFIG_MER_LENGTH, 1);

        if (merString == null)
            merString = new MerString(getId(), merLength);
        MerString otherMerString = otherMer.merString;
        if (otherMerString == null)
            otherMerString = new MerString(other.getId(), merLength);

        merString.merge(otherMerString, merLength - 1);
    }/*from  w  w w . j  a  v  a  2s.co  m*/
}

From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java

License:Apache License

public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputDir.getFileSystem(conf);

    int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
    // Default to snappy.
    Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION));
    final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
            .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
            .withCompression(compressionAlgorithm).build();
    return new HFileRecordWriter(writer);
}

From source file:com.pinterest.terrapin.hadoop.HFileRecordWriterTest.java

License:Apache License

@Test
public void testWrite() throws Exception {
    Configuration conf = new Configuration();
    HColumnDescriptor columnDescriptor = new HColumnDescriptor();
    // Disable block cache to ensure it reads the actual file content.
    columnDescriptor.setBlockCacheEnabled(false);
    FileSystem fs = FileSystem.get(conf);
    int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
    final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf, columnDescriptor),
            fs, blockSize).withFilePath(new Path(tempFile.toURI())).build();
    /* Create our RecordWriter */
    RecordWriter<BytesWritable, BytesWritable> hfileWriter = new HFileRecordWriter(writer);

    List<String> keys = Lists.newArrayList();
    List<String> values = Lists.newArrayList();
    for (int i = 0; i < 100; ++i) {
        String key = String.format("%03d", i);
        String val = "value " + i;
        keys.add(key);//from   w  w  w  .  j a  va2  s . c o m
        values.add(val);
        hfileWriter.write(new BytesWritable(key.getBytes()), new BytesWritable(val.getBytes()));
    }
    /* This internally closes the StoreFile.Writer */
    hfileWriter.close(null);

    HFile.Reader reader = HFile.createReader(fs, new Path(tempFile.toURI()),
            new CacheConfig(conf, columnDescriptor));
    HFileScanner scanner = reader.getScanner(false, false, false);
    boolean valid = scanner.seekTo();
    List<String> gotKeys = Lists.newArrayListWithCapacity(keys.size());
    List<String> gotValues = Lists.newArrayListWithCapacity(values.size());
    while (valid) {
        KeyValue keyValue = scanner.getKeyValue();
        gotKeys.add(new String(keyValue.getRow()));
        gotValues.add(new String(keyValue.getValue()));
        valid = scanner.next();
    }
    assertEquals(keys, gotKeys);
    assertEquals(values, gotValues);
    reader.close();
}