List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java
License:Apache License
/** * Construct the MergeManager. Must call start before it becomes usable. *//* w w w . j av a 2s .co m*/ public MergeManager(Configuration conf, FileSystem localFS, LocalDirAllocator localDirAllocator, InputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter, TezCounter reduceCombineInputCounter, TezCounter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, long initialMemoryAvailable, CompressionCodec codec, boolean ifileReadAheadEnabled, int ifileReadAheadLength) { this.inputContext = inputContext; this.conf = conf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.initialMemoryAvailable = initialMemoryAvailable; this.combiner = combiner; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = new TezTaskOutputFiles(conf, inputContext.getUniqueIdentifier()); this.localFS = localFS; this.rfs = ((LocalFileSystem) localFS).getRaw(); this.numDiskToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_DISK_TO_DISK_MERGES); this.numMemToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_MEM_TO_DISK_MERGES); this.additionalBytesWritten = inputContext.getCounters() .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); this.additionalBytesRead = inputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); this.codec = codec; this.ifileReadAhead = ifileReadAheadEnabled; if (this.ifileReadAhead) { this.ifileReadAheadLength = ifileReadAheadLength; } else { this.ifileReadAheadLength = 0; } this.ifileBufferSize = conf.getInt("io.file.buffer.size", TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT); // Figure out initial memory req start final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, (long) (inputContext.getTotalMemoryAvailableToTask() * maxInMemCopyUse)); float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT); if (maxRedPer > 1.0 || maxRedPer < 0.0) { throw new TezUncheckedException( TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer); } long maxRedBuffer = (long) (inputContext.getTotalMemoryAvailableToTask() * maxRedPer); // Figure out initial memory req end if (this.initialMemoryAvailable < memLimit) { this.memoryLimit = this.initialMemoryAvailable; } else { this.memoryLimit = memLimit; } if (this.initialMemoryAvailable < maxRedBuffer) { this.postMergeMemLimit = this.initialMemoryAvailable; } else { this.postMergeMemLimit = maxRedBuffer; } LOG.info("InitialRequest: ShuffleMem=" + memLimit + ", postMergeMem=" + maxRedBuffer + ", RuntimeTotalAvailable=" + this.initialMemoryAvailable + ". Updated to: ShuffleMem=" + this.memoryLimit + ", postMergeMem: " + this.postMergeMemLimit); this.ioSortFactor = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } //TODO: Cap it to MAX_VALUE until MapOutput starts supporting > 2 GB this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent), Integer.MAX_VALUE); this.memToMemMergeOutputsThreshold = conf .getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, ioSortFactor); this.mergeThreshold = (long) (this.memoryLimit * conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT_DEFAULT)); LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invlaid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + ", mergeThreshold: " + this.mergeThreshold); } boolean allowMemToMemMerge = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM_DEFAULT); if (allowMemToMemMerge) { this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold); } else { this.memToMemMerger = null; } this.inMemoryMerger = new InMemoryMerger(this); this.onDiskMerger = new OnDiskMerger(this); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java
License:Apache License
/** * Exposing this to get an initial memory ask without instantiating the object. *///ww w .jav a 2s.c o m @Private static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) { final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, (long) (maxAvailableTaskMemory * maxInMemCopyUse)); LOG.info("Initial Shuffle Memory Required: " + memLimit + ", based on INPUT_BUFFER_factor: " + maxInMemCopyUse); float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT); if (maxRedPer > 1.0 || maxRedPer < 0.0) { throw new TezUncheckedException( TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer); } long maxRedBuffer = (long) (maxAvailableTaskMemory * maxRedPer); LOG.info("Initial Memory required for final merged output: " + maxRedBuffer + ", using factor: " + maxRedPer); long reqMem = Math.max(maxRedBuffer, memLimit); return reqMem; }
From source file:org.apache.tez.runtime.library.shuffle.common.impl.SimpleFetchedInputAllocator.java
License:Apache License
public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory, long memoryAvailable) { this.conf = conf; this.maxAvailableTaskMemory = maxTaskAvailableMemory; this.initialMemoryAvailable = memoryAvailable; this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier); this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); // Setup configuration final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); }//w w w. j a va 2s . c om long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse); if (memReq <= this.initialMemoryAvailable) { this.memoryLimit = memReq; } else { this.memoryLimit = initialMemoryAvailable; } LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent); LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: " + this.maxSingleShuffleLimit); }
From source file:org.apache.twill.yarn.LocationCacheCleaner.java
License:Apache License
LocationCacheCleaner(Configuration config, Location cacheBaseLocation, String sessionId, Predicate<Location> cleanupPredicate) { this.cacheBaseLocation = cacheBaseLocation; this.sessionId = sessionId; this.expiry = config.getLong(Configs.Keys.LOCATION_CACHE_EXPIRY_MS, Configs.Defaults.LOCATION_CACHE_EXPIRY_MS); this.antiqueExpiry = config.getLong(Configs.Keys.LOCATION_CACHE_ANTIQUE_EXPIRY_MS, Configs.Defaults.LOCATION_CACHE_ANTIQUE_EXPIRY_MS); this.cleanupPredicate = cleanupPredicate; this.pendingCleanups = new HashSet<>(); }
From source file:org.dennisit.graph.RandomWalkWorkerContext.java
License:Apache License
/** * Initialize sources for Random Walk with Restart. First option * (preferential) is single source given from the command line as a parameter. * Second option is a file with a list of vertex IDs, one per line. In this * second case the preference vector is a uniform distribution over these * vertexes./*from w ww .jav a 2s .com*/ * @param configuration The configuration. * @return a (possibly empty) set of source vertices */ private ImmutableSet<Long> initializeSources(Configuration configuration) { ImmutableSet.Builder<Long> builder = ImmutableSet.builder(); long sourceVertex = configuration.getLong(SOURCE_VERTEX, Long.MIN_VALUE); if (sourceVertex != Long.MIN_VALUE) { return ImmutableSet.of(sourceVertex); } else { Path sourceFile = null; try { Path[] cacheFiles = DistributedCache.getLocalCacheFiles(configuration); if (cacheFiles == null || cacheFiles.length == 0) { // empty set if no source vertices configured return ImmutableSet.of(); } sourceFile = cacheFiles[0]; FileSystem fs = FileSystem.getLocal(configuration); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(sourceFile))); String line; while ((line = in.readLine()) != null) { builder.add(Long.parseLong(line)); } in.close(); } catch (IOException e) { getContext().setStatus("Could not load local cache files: " + sourceFile); LOG.error("Could not load local cache files: " + sourceFile, e); } } return builder.build(); }
From source file:org.elasticsearch.hadoop.yarn.util.YarnUtils.java
License:Apache License
public static long getAmHeartBeatRate(Configuration cfg) { return cfg.getLong(RM_AM_EXPIRY_INTERVAL_MS, DEFAULT_RM_AM_EXPIRY_INTERVAL_MS); }
From source file:org.godhuli.rhipe.RHMRHelper.java
License:Apache License
void setup(Configuration cfg, String argv, boolean doPipe) { try {//from ww w . jav a 2s . c om // InetAddress addr = InetAddress.getLocalHost(); // hostname = addr.getHostName(); doPartitionRelatedSetup(cfg); String squote = cfg.get("rhipe_string_quote"); if (squote == null) squote = ""; REXPHelper.setFieldSep(cfg.get("mapred.field.separator", " ")); REXPHelper.setStringQuote(squote); if (cfg.get("rhipe_test_output") != null && cfg.get("rhipe_test_output").equals("TRUE")) writeErr = true; else writeErr = false; BUFFER_SIZE = cfg.getInt("rhipe_stream_buffer", 10 * 1024); joinDelay_ = cfg.getLong("rhipe_joindelay_milli", 0); nonZeroExitIsFailure_ = cfg.getBoolean("rhipe_non_zero_exit_is_failure", true); doPipe_ = doPipe; thisfs = FileSystem.get(cfg); Class<?> _kc = null; if (callID.equals("Mapper")) { if (cfg.getInt("mapred.reduce.tasks", 0) == 0) _kc = Class.forName(cfg.get("rhipe_outputformat_keyclass")); else _kc = Class.forName(cfg.get("rhipe_map_output_keyclass")); } else { _kc = Class.forName(cfg.get("rhipe_outputformat_keyclass")); } keyclass = _kc.asSubclass(RHBytesWritable.class); if (cfg.get("rhipe_output_folder") != null) outputFolder = new Path(cfg.get("rhipe_output_folder")); if (!doPipe_) return; copyFile = cfg.get("rhipe_copy_file").equals("TRUE") ? true : false; String[] argvSplit = argv.split(" "); String prog = argvSplit[0]; Environment childEnv = (Environment) env().clone(); cfg.set("io_sort_mb", cfg.get("io.sort.mb")); addJobConfToEnvironment(cfg, childEnv); childEnv.put("TMPDIR", System.getProperty("java.io.tmpdir")); // Start the process ProcessBuilder builder = new ProcessBuilder(argvSplit); builder.environment().putAll(childEnv.toMap()); sim = builder.start(); clientOut_ = new DataOutputStream(new BufferedOutputStream(sim.getOutputStream(), BUFFER_SIZE)); clientIn_ = new DataInputStream(new BufferedInputStream(sim.getInputStream(), BUFFER_SIZE)); clientErr_ = new DataInputStream(new BufferedInputStream(sim.getErrorStream())); startTime_ = System.currentTimeMillis(); LOG.info(callID + ":" + "Started external program:" + argv); errThread_ = new MRErrorThread(); LOG.info(callID + ":" + "Started Error Thread"); errThread_.start(); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("configuration exception", e); } }
From source file:org.jd.copier.mapred.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments//from ww w. j av a2s.c o m * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(jClient.getFs(), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst), skipCRCCheck); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToCopyCount=" + fileCount); LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); return fileCount > 0; }
From source file:org.lab41.hbase.TitanHbaseIdSplitter.java
License:Apache License
public HTableDescriptor createAndSplitTable(String tableName, HBaseAdmin hbaseAdmin, Configuration configuration) throws IOException { Long maxId = configuration.getLong(MAXID_KEY, MAXID_DEFAULT); Long regionSize = configuration.getLong(REGION_SIZE_KEY, REGION_SIZE_DEFAULT); ArrayList<byte[]> arrayList = new ArrayList<byte[]>(); HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName); int numSplits = configuration.getInt(NUM_SPLITS_KEY, NUM_SPLITS_DEFAULT); for (long i = 1; i < maxId; i += regionSize) { byte[] splitPoint = longToBytes(TitanId.toVertexId(i)); arrayList.add(splitPoint);//from ww w . j a v a2 s. c o m } byte[] midStart = new byte[] { 0x01, (byte) 0x00, (int) 0x00, (byte) 0x00, 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00 }; byte[] midEnd = new byte[] { (byte) 0x01, (byte) 0x01, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00 }; byte[][] midsplits = Bytes.split(midStart, midEnd, (int) Math.ceil(numSplits * 0.75)); ; midsplits = Arrays.copyOfRange(midsplits, 0, midsplits.length - 1); for (int i = 0; i < midsplits.length; i++) { arrayList.add(midsplits[i]); } byte[] highStart = new byte[] { 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }; byte[] highEnd = new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff }; byte[][] highSplits = Bytes.split(highStart, highEnd, (int) Math.ceil(numSplits * 0.25)); highSplits = Arrays.copyOfRange(highSplits, 0, highSplits.length - 1); for (int i = 0; i < highSplits.length; i++) { arrayList.add(highSplits[i]); } byte[][] splits = new byte[arrayList.size()][8]; arrayList.toArray(splits); //debug loop logger.info("Splits : " + splits.length); for (int j = 0; j < splits.length; j++) { logger.info("createAndSplitTable" + Hex.encodeHexString(splits[j]) + " Bytes.toBytesString : " + Bytes.toStringBinary(splits[j])); } hbaseAdmin.createTable(hTableDescriptor, splits); return hTableDescriptor; }
From source file:org.mrgeo.data.vector.VectorInputFormatContext.java
License:Apache License
public static VectorInputFormatContext load(final Configuration conf) { VectorInputFormatContext context = new VectorInputFormatContext(); context.inputs = new HashSet<String>(); context.featureCount = conf.getLong(FEATURE_COUNT_KEY, -1L); context.minFeaturesPerSplit = conf.getInt(MIN_FEATURES_PER_SPLIT_KEY, -1); int inputsCount = conf.getInt(INPUTS_COUNT, 0); for (int inputIndex = 0; inputIndex < inputsCount; inputIndex++) { String input = conf.get(INPUTS_PREFIX + inputIndex); context.inputs.add(input);/*w w w.ja v a2 s . c om*/ } String strProviderProperties = conf.get(PROVIDER_PROPERTY_KEY); if (strProviderProperties != null) { context.inputProviderProperties = ProviderProperties.fromDelimitedString(strProviderProperties); } return context; }