List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:edu.uci.ics.pregelix.dataflow.util.IterationUtils.java
License:Apache License
public static void setProperties(String giraphJobId, IHyracksTaskContext ctx, Configuration conf) { INCApplicationContext appContext = ctx.getJobletContext().getApplicationContext(); RuntimeContext context = (RuntimeContext) appContext.getApplicationObject(); context.setVertexProperties(giraphJobId, conf.getLong(PregelixJob.NUM_VERTICE, -1), conf.getLong(PregelixJob.NUM_EDGES, -1)); }
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
/** * Create a partitioner for a particular job * @param ins//from ww w .j ava 2 s. c o m * @param out * @param job * @param partitionerName * @return * @throws IOException */ public static Partitioner createPartitioner(Path[] ins, Path out, Configuration job, String partitionerName) throws IOException { try { Partitioner partitioner; Class<? extends Partitioner> partitionerClass = PartitionerClasses.get(partitionerName.toLowerCase()); if (partitionerClass == null) { // Try to parse the name as a class name try { partitionerClass = Class.forName(partitionerName).asSubclass(Partitioner.class); } catch (ClassNotFoundException e) { throw new RuntimeException("Unknown index type '" + partitionerName + "'"); } } if (PartitionerReplicate.containsKey(partitionerName.toLowerCase())) { boolean replicate = PartitionerReplicate.get(partitionerName.toLowerCase()); job.setBoolean("replicate", replicate); } partitioner = partitionerClass.newInstance(); long t1 = System.currentTimeMillis(); final Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr"); // Determine number of partitions long inSize = 0; for (Path in : ins) { inSize += FileUtil.getPathSize(in.getFileSystem(job), in); } long estimatedOutSize = (long) (inSize * (1.0 + job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f))); FileSystem outFS = out.getFileSystem(job); long outBlockSize = outFS.getDefaultBlockSize(out); final List<Point> sample = new ArrayList<Point>(); float sample_ratio = job.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f); long sample_size = job.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024); LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%"); ResultCollector<Point> resultCollector = new ResultCollector<Point>() { @Override public void collect(Point p) { sample.add(p.clone()); } }; OperationsParams params2 = new OperationsParams(job); params2.setFloat("ratio", sample_ratio); params2.setLong("size", sample_size); if (job.get("shape") != null) params2.set("shape", job.get("shape")); if (job.get("local") != null) params2.set("local", job.get("local")); params2.setClass("outshape", Point.class, Shape.class); Sampler.sample(ins, resultCollector, params2); long t2 = System.currentTimeMillis(); System.out.println("Total time for sampling in millis: " + (t2 - t1)); LOG.info("Finished reading a sample of " + sample.size() + " records"); int partitionCapacity = (int) Math.max(1, Math.floor((double) sample.size() * outBlockSize / estimatedOutSize)); int numPartitions = Math.max(1, (int) Math.ceil((float) estimatedOutSize / outBlockSize)); LOG.info("Partitioning the space into " + numPartitions + " partitions with capacity of " + partitionCapacity); partitioner.createFromPoints(inMBR, sample.toArray(new Point[sample.size()]), partitionCapacity); return partitioner; } catch (InstantiationException e) { e.printStackTrace(); return null; } catch (IllegalAccessException e) { e.printStackTrace(); return null; } }
From source file:edu.umn.cs.spatialHadoop.mapred.RandomShapeGenerator.java
License:Open Source License
/** * Initialize from a FileSplit//w ww. j a va2 s . c o m * @param job * @param split * @throws IOException */ @SuppressWarnings("unchecked") public RandomShapeGenerator(Configuration job, RandomInputFormat.GeneratedSplit split) throws IOException { this(split.length, OperationsParams.getShape(job, "mbr").getMBR(), SpatialSite.getDistributionType(job, "type", DistributionType.UNIFORM), job.getInt("rectsize", 100), split.index + job.getLong("seed", System.currentTimeMillis()), job.getFloat("thickness", 1)); setShape((S) SpatialSite.createStockShape(job)); }
From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
/*** * Create a partitioner for a particular job * @param in/*from www. j a va2 s .c o m*/ * @param out * @param job * @param partitionerName * @return * @throws IOException */ public static Partitioner createPartitioner(Path[] ins, Path out, Configuration job, String partitionerName) throws IOException { try { Partitioner partitioner = null; Class<? extends Partitioner> partitionerClass = PartitionerClasses.get(partitionerName.toLowerCase()); if (partitionerClass == null) { // Try to parse the name as a class name try { partitionerClass = Class.forName(partitionerName).asSubclass(Partitioner.class); } catch (ClassNotFoundException e) { throw new RuntimeException("Unknown index type '" + partitionerName + "'"); } } if (PartitionerReplicate.containsKey(partitionerName.toLowerCase())) { boolean replicate = PartitionerReplicate.get(partitionerName.toLowerCase()); job.setBoolean("replicate", replicate); } partitioner = partitionerClass.newInstance(); long t1 = System.currentTimeMillis(); final Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr"); // Determine number of partitions long inSize = 0; for (Path in : ins) { inSize += FileUtil.getPathSize(in.getFileSystem(job), in); } long estimatedOutSize = (long) (inSize * (1.0 + job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f))); FileSystem outFS = out.getFileSystem(job); long outBlockSize = outFS.getDefaultBlockSize(out); int numPartitions = Math.max(1, (int) Math.ceil((float) estimatedOutSize / outBlockSize)); LOG.info("Partitioning the space into " + numPartitions + " partitions"); final Vector<Point> sample = new Vector<Point>(); float sample_ratio = job.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f); long sample_size = job.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024); LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%"); ResultCollector<Point> resultCollector = new ResultCollector<Point>() { @Override public void collect(Point p) { sample.add(p.clone()); } }; OperationsParams params2 = new OperationsParams(job); params2.setFloat("ratio", sample_ratio); params2.setLong("size", sample_size); params2.setClass("outshape", Point.class, Shape.class); Sampler.sample(ins, resultCollector, params2); long t2 = System.currentTimeMillis(); System.out.println("Total time for sampling in millis: " + (t2 - t1)); LOG.info("Finished reading a sample of " + sample.size() + " records"); partitioner.createFromPoints(inMBR, sample.toArray(new Point[sample.size()]), numPartitions); return partitioner; } catch (InstantiationException e) { e.printStackTrace(); return null; } catch (IllegalAccessException e) { e.printStackTrace(); return null; } }
From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java
License:Open Source License
/** * Reads a random sample of up-to count from the given set of file splits. * @param files/*from w w w . j a v a 2 s . c om*/ * @param ratioOrCount * @param output * @param conf * @return the actual number of lines read from the file * @throws IOException * @throws InterruptedException */ public static long sampleLocal(final FileSplit[] files, final float ratioOrCount, final ResultCollector<Text> output, final Configuration conf) throws IOException, InterruptedException { // A prefix sum of all files sizes. Used to draw a different sample size // from each file according to its size long[] fileStartOffset = new long[files.length + 1]; fileStartOffset[0] = 0; for (int i = 0; i < files.length; i++) fileStartOffset[i + 1] = fileStartOffset[i] + files[i].getLength(); // Decide number of samples to read from each file according to its size final int[] sampleSizePerFile = new int[files.length]; Random rand = new Random(conf.getLong("seed", System.currentTimeMillis())); if (ratioOrCount > 1) { // This indicates a count for (int i = 0; i < ratioOrCount; i++) { long sampleOffset = Math.abs(rand.nextLong()) % fileStartOffset[files.length]; int iFile = Arrays.binarySearch(fileStartOffset, sampleOffset); // An offset in the middle of a file. if (iFile < 0) iFile = -iFile - 1 - 1; sampleSizePerFile[iFile]++; } } List<Integer> actualSampleSizes = Parallel.forEach(files.length, new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { int sampledLines; sampledLines = 0; for (int iFile = i1; iFile < i2; iFile++) { try { long randomSeed = conf.getLong("seed", System.currentTimeMillis()) + iFile; if (ratioOrCount > 1) sampledLines += sampleFileSplitByCount(files[iFile], conf, sampleSizePerFile[iFile], randomSeed, output); else sampledLines += sampleFileSplitByRatio(files[iFile], conf, ratioOrCount, randomSeed, output); } catch (IOException e) { throw new RuntimeException("Error while sampling file " + files[iFile]); } } return sampledLines; } }); int totalSampledLines = 0; for (int actualSampleSize : actualSampleSizes) totalSampledLines += actualSampleSize; return totalSampledLines; }
From source file:FormatStorage.FormatDataFile.java
License:Open Source License
public FormatDataFile(Configuration conf) throws Exception { this.conf = conf; fs = FileSystem.get(conf);//from ww w.jav a 2 s .c o m confUnitSize = conf.getLong(ConstVar.ConfUnitSize, ConstVar.DefaultUnitSize); this.conf.setInt("io.compression.codec.lzo.buffersize", 128 * 1024); if (confUnitSize < 0) { throw new SEException.InvalidParameterException("invalid ConfUnitSize:" + confUnitSize); } confSegmentSize = fs.getDefaultBlockSize(); if (confSegmentSize < 0) { throw new SEException.InvalidParameterException("invalid confSegmentSize:" + confSegmentSize); } if (confUnitSize + ConstVar.LineIndexRecordLen + ConstVar.IndexMetaOffset > confSegmentSize) { throw new SEException.InvalidParameterException( "unitSize(" + confUnitSize + ") > segmentSize(" + confSegmentSize + ")"); } long poolSize = conf.getLong(ConstVar.ConfPoolSize, ConstVar.DefaultPoolSize); unitPool = new UnitPoolManager((int) poolSize, this); counter++; }
From source file:FormatStorage1.IFileInfo.java
License:Open Source License
public IFileInfo(Configuration conf) throws IOException { this.workStatus = ConstVar.WS_Init; this.conf = conf; fs = FileSystem.get(conf);//from w w w .j a va 2 s. c o m this.confSegmentSize = conf.getLong("dfs.block.size", ConstVar.DefaultSegmentSize); this.confUnitSize = conf.getLong(ConstVar.ConfUnitSize, ConstVar.DefaultUnitSize); this.conf.setInt("io.compression.codec.lzo.buffersize", 128 * 1024); this.currentline = 0; this.printlog = conf.getBoolean("printlog", false); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from ww w . j a v a2 s . co m*/ */ private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); // set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to // prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { // skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); // skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); // if (LOG.isTraceEnabled()) { // LOG.trace("adding file " + child.getPath()); // } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { getLogger().info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); getLogger().info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:gobblin.runtime.TaskExecutor.java
License:Apache License
/** * Constructor to work with Hadoop {@link org.apache.hadoop.conf.Configuration}. *//* ww w. j ava 2s . com*/ public TaskExecutor(Configuration conf) { this(conf.getInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY, ConfigurationKeys.DEFAULT_TASK_EXECUTOR_THREADPOOL_SIZE), conf.getInt(ConfigurationKeys.TASK_RETRY_THREAD_POOL_CORE_SIZE_KEY, ConfigurationKeys.DEFAULT_TASK_RETRY_THREAD_POOL_CORE_SIZE), conf.getLong(ConfigurationKeys.TASK_RETRY_INTERVAL_IN_SEC_KEY, ConfigurationKeys.DEFAULT_TASK_RETRY_INTERVAL_IN_SEC)); }
From source file:gobblin.util.limiter.stressTest.MRStressTest.java
License:Apache License
static Limiter createLimiter(Configuration configuration, SharedResourcesBroker<SimpleScopeType> broker) { try {/*from ww w .j av a2s .co m*/ Limiter limiter = new NoopLimiter(); long localQps = configuration.getLong(LOCALLY_ENFORCED_QPS, 0); if (localQps > 0) { log.info("Setting up local qps " + localQps); limiter = new MultiLimiter(limiter, new RateBasedLimiter(localQps)); } if (configuration.getBoolean(USE_THROTTLING_SERVER, false)) { log.info("Setting up remote throttling."); String resourceId = configuration.get(RESOURCE_ID); Limiter globalLimiter = broker.getSharedResource(new RestliLimiterFactory<SimpleScopeType>(), new SharedLimiterKey(resourceId)); limiter = new MultiLimiter(limiter, globalLimiter); } return limiter; } catch (NotConfiguredException nce) { throw new RuntimeException(nce); } }