Example usage for org.apache.hadoop.conf Configuration getLong

List of usage examples for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue) 

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:com.netflix.bdp.s3mper.listing.ConsistentListingAspect.java

License:Apache License

private void updateConfig(Configuration conf) {
    disabled = conf.getBoolean("s3mper.disable", disabled);

    if (disabled) {
        log.warn("S3mper Consistency explicitly disabled.");
        return;//from  w w w  . j  av  a2s  .  co m
    }

    darkload = conf.getBoolean("s3mper.darkload", darkload);
    failOnError = conf.getBoolean("s3mper.failOnError", failOnError);
    taskFailOnError = conf.getBoolean("s3mper.task.failOnError", taskFailOnError);
    checkTaskListings = conf.getBoolean("s3mper.listing.task.check", checkTaskListings);
    failOnTimeout = conf.getBoolean("s3mper.failOnTimeout", failOnTimeout);
    delistDeleteMarkedFiles = conf.getBoolean("s3mper.listing.delist.deleted", delistDeleteMarkedFiles);
    trackDirectories = conf.getBoolean("s3mper.listing.directory.tracking", trackDirectories);

    fileThreshold = conf.getFloat("s3mper.listing.threshold", fileThreshold);

    recheckCount = conf.getLong("s3mper.listing.recheck.count", recheckCount);
    recheckPeriod = conf.getLong("s3mper.listing.recheck.period", recheckPeriod);
    taskRecheckCount = conf.getLong("s3mper.listing.task.recheck.count", taskRecheckCount);
    taskRecheckPeriod = conf.getLong("s3mper.listing.task.recheck.period", taskRecheckPeriod);

    statOnMissingFile = conf.getBoolean("s3mper.listing.statOnMissingFile", false);
}

From source file:com.netflix.bdp.s3mper.metastore.impl.DynamoDBMetastore.java

License:Apache License

/**
 * Creates the metastore table in DynamoDB if it doesn't exist with the configured
 * read and write units./*from  w w w .j  a  v a2s.  c om*/
 * 
 * @param uri
 * @param conf
 * @throws Exception 
 */
@Override
public void initalize(URI uri, Configuration conf) throws Exception {
    scheme = uri.getScheme();

    String keyId = conf.get("fs." + uri.getScheme() + ".awsAccessKeyId");
    String keySecret = conf.get("fs." + uri.getScheme() + ".awsSecretAccessKey");

    //An override option for accessing across accounts
    keyId = conf.get("s3mper.override.awsAccessKeyId", keyId);
    keySecret = conf.get("s3mper.override.awsSecretAccessKey", keySecret);

    db = new AmazonDynamoDBClient(new BasicAWSCredentials(keyId, keySecret));

    readUnits = conf.getLong("s3mper.metastore.read.units", readUnits);
    writeUnits = conf.getLong("s3mper.metastore.write.units", writeUnits);

    retryCount = conf.getInt("s3mper.metastore.retry", retryCount);
    timeout = conf.getInt("s3mper.metastore.timeout", timeout);

    tableName = conf.get("s3mper.metastore.name", tableName);

    deleteMarkerEnabled = conf.getBoolean("s3mper.metastore.deleteMarker.enabled", false);

    boolean checkTableExists = conf.getBoolean("s3mper.metastore.create", false);

    if (checkTableExists) {
        ListTablesResult tables = db.listTables();

        if (!tables.getTableNames().contains(tableName)) {
            createTable();
        }
    }
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context) throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//  w w w.  j  a  va2s.  c  o  m

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue
        //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter()
        //getRecordWriter()
        final TaskAttemptContext context) throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// 

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);/*  w w w.  j a v  a 2 s .co m*/

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue

        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv

                throws IOException {
            // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);//

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {// if
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }

            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * 
         * @param family
         * 
         * @return A WriterLength, containing a new StoreFile.Writer.
         * 
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();// 
            dos.close();// 
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }

    };
}

From source file:com.odiago.flumebase.exec.BucketedAggregationElement.java

License:Apache License

public BucketedAggregationElement(FlowElementContext ctxt, AggregateNode aggregateNode) {
    super(ctxt, (Schema) aggregateNode.getAttr(PlanNode.OUTPUT_SCHEMA_ATTR));

    Configuration conf = aggregateNode.getConf();
    assert null != conf;
    mNumBuckets = conf.getInt(NUM_BUCKETS_KEY, DEFAULT_NUM_BUCKETS);
    mContinuousOutput = conf.getBoolean(CONTINUOUS_OUTPUT_KEY, DEFAULT_CONTINUOUS_OUTPUT);
    mMaxPriorEmitInterval = conf.getLong(MAX_PRIOR_EMIT_INTERVAL_KEY, DEFAULT_MAX_PRIOR_EMIT_INTERVAL);
    int slackTime = conf.getInt(SLACK_INTERVAL_KEY, DEFAULT_SLACK_INTERVAL);
    if (slackTime < 0) {
        mSlackTime = DEFAULT_SLACK_INTERVAL;
    } else {/* w  w w . j  ava  2 s . co m*/
        mSlackTime = slackTime;
    }

    assert mMaxPriorEmitInterval > 0;
    assert mMaxPriorEmitInterval > mSlackTime;

    List<TypedField> groupByFields = aggregateNode.getGroupByFields();
    if (null == groupByFields) {
        mGroupByFields = Collections.emptyList();
    } else {
        mGroupByFields = groupByFields;
    }

    mAggregateExprs = aggregateNode.getAggregateExprs();
    assert mAggregateExprs != null;
    mPropagateFields = aggregateNode.getPropagateFields();

    Expr windowExpr = aggregateNode.getWindowExpr();
    assert windowExpr.isConstant();
    try {
        mWindowSpec = (WindowSpec) windowExpr.eval(new EmptyEventWrapper());
        assert mWindowSpec.getRangeSpec().isConstant();
        mTimeSpan = (TimeSpan) mWindowSpec.getRangeSpec().eval(new EmptyEventWrapper());
    } catch (IOException ioe) {
        // The only way this can be thrown is if the window expr isn't actually constant.
        // This should not happen due to the assert above..
        LOG.error("Got IOException when calculating window width: " + ioe);
        throw new RuntimeException(ioe);
    }

    mBucketMap = new HashMap<Pair<Long, HashedEvent>, List<Bucket>>(mNumBuckets);
    mBucketsByGroup = new HashMap<HashedEvent, List<Pair<Long, List<Bucket>>>>();

    // Calculate the width of each bucket.
    mTimeModulus = mTimeSpan.getWidth() / mNumBuckets;
    if (mTimeModulus * mNumBuckets != mTimeSpan.getWidth()) {
        LOG.warn("Aggregation time step does not cleanly divide the time interval; "
                + "results may be inaccurate. Set " + NUM_BUCKETS_KEY + " to a better divisor.");
    }
}

From source file:com.odiago.flumebase.exec.local.LocalEnvironment.java

License:Apache License

/** Given a Configuration that has SUBMITTER_SESSION_ID_KEY set, return the
 * UserSession corresponding to this SessionId. This is used to resolve the
 * submitter of a LocalFlow, FlowSpecification, etc.
 *//*  w ww . j a v  a  2s .  c  om*/
private UserSession getSessionForConf(Configuration conf) {
    SessionId id = new SessionId(conf.getLong(SUBMITTER_SESSION_ID_KEY, -1));
    return getSession(id);
}

From source file:com.phantom.hadoop.examples.RandomTextWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything./*w w  w  . j a  v a 2 s .  c o  m*/
 * 
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        return printUsage();
    }

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = new Job(conf);

    job.setJarByClass(RandomTextWriter.class);
    job.setJobName("random-text-writer");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
    job.setMapperClass(RandomTextMapper.class);

    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}

From source file:com.phantom.hadoop.examples.RandomWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything./* ww  w  . java 2 s  . c  o  m*/
 * 
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }

    Path outDir = new Path(args[0]);
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = new Job(conf);

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(RandomInputFormat.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * /*from  w  w w.j a v  a 2  s.c  o m*/
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.quantcast.qfs.hadoop.QFSImpl.java

License:Apache License

public QFSImpl(String metaServerHost, int metaServerPort, FileSystem.Statistics stats, Configuration cfg)
        throws IOException {
    kfsAccess = new KfsAccess(metaServerHost, metaServerPort);
    final long kMaxUserGroupId = 0x0FFFFFFFFL;
    final long kDefaultUser = ~0L;
    final long kDefaultGroup = ~0L;
    final long euser = cfg.getLong("fs.qfs.euser", kDefaultUser);
    final long egroup = cfg.getLong("fs.qfs.egroup", kDefaultGroup);
    final String groupsCfgName = "fs.qfs.egroups";
    final String groupsSeparator = ","; // No regex special symbols.
    final String groupsCfg = cfg.get(groupsCfgName, "");
    long[] groups = null;
    CREATE_PARAMS = cfg.get("fs.qfs.createParams", "S");
    if (kDefaultUser != euser && (euser < 0 || kMaxUserGroupId <= euser)) {
        throw new IOException("invalid effective user id: " + euser);
    }/*from  w  w  w . j  a  v  a2s .  co  m*/
    if (kDefaultGroup != egroup && (egroup < 0 || kMaxUserGroupId <= egroup)) {
        throw new IOException("invalid effective group id: " + egroup);
    }
    if (groupsCfg.contains(groupsSeparator)) {
        try {
            final String[] tokens = groupsCfg.split(groupsSeparator);
            if (0 < tokens.length) {
                groups = new long[tokens.length];
                for (int i = 0; i < tokens.length; i++) {
                    groups[i] = Long.parseLong(tokens[i]);
                    if (groups[i] < 0 || kMaxUserGroupId <= groups[i]) {
                        throw new IOException("invalid group id: " + groups[i]);
                    }
                }
            }
        } catch (Exception ex) {
            throw new IOException(
                    "failed to parse configuration setting " + groupsCfgName + " " + ex.getMessage());
        }
    }
    if (kDefaultUser != euser || kDefaultGroup != egroup || null != groups) {
        // Ignore errors for now.
        // Setting effective user and group has effect for all QFS file system
        // client instances withing the process / JVM.
        // If any other KfsAccess method invoked prior to this point
        // kfs_setEUserAndEGroup() will return an error.
        // Effective user and group ids have no effect with QFS authentication.
        kfsAccess.kfs_setEUserAndEGroup(euser, egroup, groups);
    }
    statistics = stats;
}