Example usage for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue)

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:edu.uci.ics.pregelix.dataflow.util.IterationUtils.java

License:Apache License

public static void setProperties(String giraphJobId, IHyracksTaskContext ctx, Configuration conf) {
    INCApplicationContext appContext = ctx.getJobletContext().getApplicationContext();
    RuntimeContext context = (RuntimeContext) appContext.getApplicationObject();
    context.setVertexProperties(giraphJobId, conf.getLong(PregelixJob.NUM_VERTICE, -1),
            conf.getLong(PregelixJob.NUM_EDGES, -1));
}

From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java

License:Open Source License

/**
 * Create a partitioner for a particular job
 * @param ins//from  ww  w  .j ava  2 s.  c o  m
 * @param out
 * @param job
 * @param partitionerName
 * @return
 * @throws IOException
 */
public static Partitioner createPartitioner(Path[] ins, Path out, Configuration job, String partitionerName)
        throws IOException {
    try {
        Partitioner partitioner;
        Class<? extends Partitioner> partitionerClass = PartitionerClasses.get(partitionerName.toLowerCase());
        if (partitionerClass == null) {
            // Try to parse the name as a class name
            try {
                partitionerClass = Class.forName(partitionerName).asSubclass(Partitioner.class);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException("Unknown index type '" + partitionerName + "'");
            }
        }

        if (PartitionerReplicate.containsKey(partitionerName.toLowerCase())) {
            boolean replicate = PartitionerReplicate.get(partitionerName.toLowerCase());
            job.setBoolean("replicate", replicate);
        }
        partitioner = partitionerClass.newInstance();

        long t1 = System.currentTimeMillis();
        final Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr");
        // Determine number of partitions
        long inSize = 0;
        for (Path in : ins) {
            inSize += FileUtil.getPathSize(in.getFileSystem(job), in);
        }
        long estimatedOutSize = (long) (inSize * (1.0 + job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f)));
        FileSystem outFS = out.getFileSystem(job);
        long outBlockSize = outFS.getDefaultBlockSize(out);

        final List<Point> sample = new ArrayList<Point>();
        float sample_ratio = job.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f);
        long sample_size = job.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024);

        LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%");
        ResultCollector<Point> resultCollector = new ResultCollector<Point>() {
            @Override
            public void collect(Point p) {
                sample.add(p.clone());
            }
        };

        OperationsParams params2 = new OperationsParams(job);
        params2.setFloat("ratio", sample_ratio);
        params2.setLong("size", sample_size);
        if (job.get("shape") != null)
            params2.set("shape", job.get("shape"));
        if (job.get("local") != null)
            params2.set("local", job.get("local"));
        params2.setClass("outshape", Point.class, Shape.class);
        Sampler.sample(ins, resultCollector, params2);
        long t2 = System.currentTimeMillis();
        System.out.println("Total time for sampling in millis: " + (t2 - t1));
        LOG.info("Finished reading a sample of " + sample.size() + " records");

        int partitionCapacity = (int) Math.max(1,
                Math.floor((double) sample.size() * outBlockSize / estimatedOutSize));
        int numPartitions = Math.max(1, (int) Math.ceil((float) estimatedOutSize / outBlockSize));
        LOG.info("Partitioning the space into " + numPartitions + " partitions with capacity of "
                + partitionCapacity);

        partitioner.createFromPoints(inMBR, sample.toArray(new Point[sample.size()]), partitionCapacity);

        return partitioner;
    } catch (InstantiationException e) {
        e.printStackTrace();
        return null;
    } catch (IllegalAccessException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:edu.umn.cs.spatialHadoop.mapred.RandomShapeGenerator.java

License:Open Source License

/**
 * Initialize from a FileSplit//w  ww.  j  a  va2 s . c o m
 * @param job
 * @param split
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public RandomShapeGenerator(Configuration job, RandomInputFormat.GeneratedSplit split) throws IOException {
    this(split.length, OperationsParams.getShape(job, "mbr").getMBR(),
            SpatialSite.getDistributionType(job, "type", DistributionType.UNIFORM), job.getInt("rectsize", 100),
            split.index + job.getLong("seed", System.currentTimeMillis()), job.getFloat("thickness", 1));
    setShape((S) SpatialSite.createStockShape(job));
}

From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java

License:Open Source License

/***
 * Create a partitioner for a particular job
 * @param in/*from   www. j a va2  s  .c o m*/
 * @param out
 * @param job
 * @param partitionerName
 * @return
 * @throws IOException
 */
public static Partitioner createPartitioner(Path[] ins, Path out, Configuration job, String partitionerName)
        throws IOException {
    try {
        Partitioner partitioner = null;
        Class<? extends Partitioner> partitionerClass = PartitionerClasses.get(partitionerName.toLowerCase());
        if (partitionerClass == null) {
            // Try to parse the name as a class name
            try {
                partitionerClass = Class.forName(partitionerName).asSubclass(Partitioner.class);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException("Unknown index type '" + partitionerName + "'");
            }
        }

        if (PartitionerReplicate.containsKey(partitionerName.toLowerCase())) {
            boolean replicate = PartitionerReplicate.get(partitionerName.toLowerCase());
            job.setBoolean("replicate", replicate);
        }
        partitioner = partitionerClass.newInstance();

        long t1 = System.currentTimeMillis();
        final Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr");
        // Determine number of partitions
        long inSize = 0;
        for (Path in : ins) {
            inSize += FileUtil.getPathSize(in.getFileSystem(job), in);
        }
        long estimatedOutSize = (long) (inSize * (1.0 + job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f)));
        FileSystem outFS = out.getFileSystem(job);
        long outBlockSize = outFS.getDefaultBlockSize(out);
        int numPartitions = Math.max(1, (int) Math.ceil((float) estimatedOutSize / outBlockSize));
        LOG.info("Partitioning the space into " + numPartitions + " partitions");

        final Vector<Point> sample = new Vector<Point>();
        float sample_ratio = job.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f);
        long sample_size = job.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024);

        LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%");
        ResultCollector<Point> resultCollector = new ResultCollector<Point>() {
            @Override
            public void collect(Point p) {
                sample.add(p.clone());
            }
        };
        OperationsParams params2 = new OperationsParams(job);
        params2.setFloat("ratio", sample_ratio);
        params2.setLong("size", sample_size);
        params2.setClass("outshape", Point.class, Shape.class);
        Sampler.sample(ins, resultCollector, params2);
        long t2 = System.currentTimeMillis();
        System.out.println("Total time for sampling in millis: " + (t2 - t1));
        LOG.info("Finished reading a sample of " + sample.size() + " records");

        partitioner.createFromPoints(inMBR, sample.toArray(new Point[sample.size()]), numPartitions);

        return partitioner;
    } catch (InstantiationException e) {
        e.printStackTrace();
        return null;
    } catch (IllegalAccessException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Reads a random sample of up-to count from the given set of file splits.
 * @param files/*from   w w w  .  j  a  v  a  2 s  .  c  om*/
 * @param ratioOrCount
 * @param output
 * @param conf
 * @return the actual number of lines read from the file
 * @throws IOException
 * @throws InterruptedException
 */
public static long sampleLocal(final FileSplit[] files, final float ratioOrCount,
        final ResultCollector<Text> output, final Configuration conf) throws IOException, InterruptedException {
    // A prefix sum of all files sizes. Used to draw a different sample size
    // from each file according to its size
    long[] fileStartOffset = new long[files.length + 1];
    fileStartOffset[0] = 0;
    for (int i = 0; i < files.length; i++)
        fileStartOffset[i + 1] = fileStartOffset[i] + files[i].getLength();

    // Decide number of samples to read from each file according to its size
    final int[] sampleSizePerFile = new int[files.length];
    Random rand = new Random(conf.getLong("seed", System.currentTimeMillis()));

    if (ratioOrCount > 1) {
        // This indicates a count
        for (int i = 0; i < ratioOrCount; i++) {
            long sampleOffset = Math.abs(rand.nextLong()) % fileStartOffset[files.length];
            int iFile = Arrays.binarySearch(fileStartOffset, sampleOffset);
            // An offset in the middle of a file.
            if (iFile < 0)
                iFile = -iFile - 1 - 1;
            sampleSizePerFile[iFile]++;
        }
    }

    List<Integer> actualSampleSizes = Parallel.forEach(files.length, new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            int sampledLines;

            sampledLines = 0;
            for (int iFile = i1; iFile < i2; iFile++) {
                try {
                    long randomSeed = conf.getLong("seed", System.currentTimeMillis()) + iFile;
                    if (ratioOrCount > 1)
                        sampledLines += sampleFileSplitByCount(files[iFile], conf, sampleSizePerFile[iFile],
                                randomSeed, output);
                    else
                        sampledLines += sampleFileSplitByRatio(files[iFile], conf, ratioOrCount, randomSeed,
                                output);
                } catch (IOException e) {
                    throw new RuntimeException("Error while sampling file " + files[iFile]);
                }
            }
            return sampledLines;
        }
    });

    int totalSampledLines = 0;
    for (int actualSampleSize : actualSampleSizes)
        totalSampledLines += actualSampleSize;
    return totalSampledLines;
}

From source file:FormatStorage.FormatDataFile.java

License:Open Source License

public FormatDataFile(Configuration conf) throws Exception {
    this.conf = conf;

    fs = FileSystem.get(conf);//from   ww  w.jav  a 2 s .c o m
    confUnitSize = conf.getLong(ConstVar.ConfUnitSize, ConstVar.DefaultUnitSize);

    this.conf.setInt("io.compression.codec.lzo.buffersize", 128 * 1024);

    if (confUnitSize < 0) {
        throw new SEException.InvalidParameterException("invalid ConfUnitSize:" + confUnitSize);
    }

    confSegmentSize = fs.getDefaultBlockSize();

    if (confSegmentSize < 0) {
        throw new SEException.InvalidParameterException("invalid confSegmentSize:" + confSegmentSize);
    }

    if (confUnitSize + ConstVar.LineIndexRecordLen + ConstVar.IndexMetaOffset > confSegmentSize) {
        throw new SEException.InvalidParameterException(
                "unitSize(" + confUnitSize + ") > segmentSize(" + confSegmentSize + ")");
    }

    long poolSize = conf.getLong(ConstVar.ConfPoolSize, ConstVar.DefaultPoolSize);
    unitPool = new UnitPoolManager((int) poolSize, this);

    counter++;
}

From source file:FormatStorage1.IFileInfo.java

License:Open Source License

public IFileInfo(Configuration conf) throws IOException {
    this.workStatus = ConstVar.WS_Init;
    this.conf = conf;
    fs = FileSystem.get(conf);//from  w w  w  .j a  va 2 s.  c o  m

    this.confSegmentSize = conf.getLong("dfs.block.size", ConstVar.DefaultSegmentSize);
    this.confUnitSize = conf.getLong(ConstVar.ConfUnitSize, ConstVar.DefaultUnitSize);
    this.conf.setInt("io.compression.codec.lzo.buffersize", 128 * 1024);
    this.currentline = 0;
    this.printlog = conf.getBoolean("printlog", false);
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*from   ww w  . j a v  a2  s . co  m*/
 */
private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args)
        throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    // set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (null == parent) {
                // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                // will return null. In this case, use '/' as its own parent to
                // prevent
                // NPE errors below.
                parent = args.dst;
            }
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        // skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        // skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            // if (LOG.isTraceEnabled()) {
                            // LOG.trace("adding file " + child.getPath());
                            // }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        getLogger().info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

    // Explicitly create the tmpDir to ensure that it can be cleaned
    // up by fullyDelete() later.
    tmpDir.getFileSystem(conf).mkdirs(tmpDir);

    getLogger().info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:gobblin.runtime.TaskExecutor.java

License:Apache License

/**
 * Constructor to work with Hadoop {@link org.apache.hadoop.conf.Configuration}.
 *//*  ww  w.  j ava 2s  .  com*/
public TaskExecutor(Configuration conf) {
    this(conf.getInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY,
            ConfigurationKeys.DEFAULT_TASK_EXECUTOR_THREADPOOL_SIZE),
            conf.getInt(ConfigurationKeys.TASK_RETRY_THREAD_POOL_CORE_SIZE_KEY,
                    ConfigurationKeys.DEFAULT_TASK_RETRY_THREAD_POOL_CORE_SIZE),
            conf.getLong(ConfigurationKeys.TASK_RETRY_INTERVAL_IN_SEC_KEY,
                    ConfigurationKeys.DEFAULT_TASK_RETRY_INTERVAL_IN_SEC));
}

From source file:gobblin.util.limiter.stressTest.MRStressTest.java

License:Apache License

static Limiter createLimiter(Configuration configuration, SharedResourcesBroker<SimpleScopeType> broker) {
    try {/*from   ww w  .j  av  a2s .co m*/
        Limiter limiter = new NoopLimiter();

        long localQps = configuration.getLong(LOCALLY_ENFORCED_QPS, 0);
        if (localQps > 0) {
            log.info("Setting up local qps " + localQps);
            limiter = new MultiLimiter(limiter, new RateBasedLimiter(localQps));
        }

        if (configuration.getBoolean(USE_THROTTLING_SERVER, false)) {
            log.info("Setting up remote throttling.");
            String resourceId = configuration.get(RESOURCE_ID);
            Limiter globalLimiter = broker.getSharedResource(new RestliLimiterFactory<SimpleScopeType>(),
                    new SharedLimiterKey(resourceId));
            limiter = new MultiLimiter(limiter, globalLimiter);
        }
        return limiter;
    } catch (NotConfiguredException nce) {
        throw new RuntimeException(nce);
    }
}