List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:com.ssamples.hbase.stochasticbalancer.StochasticLoadBalancer.java
License:Apache License
@Override public synchronized void setConf(Configuration conf) { super.setConf(conf); maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps); stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion); maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime); runMaxSteps = conf.getBoolean(RUN_MAX_STEPS_KEY, runMaxSteps); numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember); isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); minCostNeedBalance = conf.getFloat(MIN_COST_NEED_BALANCE_KEY, minCostNeedBalance); if (localityCandidateGenerator == null) { localityCandidateGenerator = new LocalityBasedCandidateGenerator(services); }//from ww w .j a v a 2 s. com localityCost = new ServerLocalityCostFunction(conf, services); rackLocalityCost = new RackLocalityCostFunction(conf, services); if (this.candidateGenerators == null) { candidateGenerators = Lists.newArrayList(); candidateGenerators.add(new RandomCandidateGenerator()); candidateGenerators.add(new LoadCandidateGenerator()); candidateGenerators.add(localityCandidateGenerator); candidateGenerators.add(new RegionReplicaRackCandidateGenerator()); } regionLoadFunctions = new CostFromRegionLoadFunction[] { new ReadRequestCostFunction(conf), new CPRequestCostFunction(conf), new WriteRequestCostFunction(conf), new MemStoreSizeCostFunction(conf), new StoreFileCostFunction(conf) }; regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf); regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf); costFunctions = new CostFunction[] { new RegionCountSkewCostFunction(conf), new PrimaryRegionCountSkewCostFunction(conf), new MoveCostFunction(conf), localityCost, rackLocalityCost, new TableSkewCostFunction(conf), regionReplicaHostCostFunction, regionReplicaRackCostFunction, regionLoadFunctions[0], regionLoadFunctions[1], regionLoadFunctions[2], regionLoadFunctions[3], regionLoadFunctions[4] }; curFunctionCosts = new Double[costFunctions.length]; tempFunctionCosts = new Double[costFunctions.length]; LOG.info("Loaded config; maxSteps=" + maxSteps + ", stepsPerRegion=" + stepsPerRegion + ", maxRunningTime=" + maxRunningTime + ", isByTable=" + isByTable + ", etc."); }
From source file:com.ssamples.hbase.stochasticbalancer.StochasticLoadBalancerNew.java
License:Apache License
@Override public synchronized void setConf(Configuration conf) { super.setConf(conf); maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps); stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion); maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime); runMaxSteps = conf.getBoolean(RUN_MAX_STEPS_KEY, runMaxSteps); numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember); isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); minCostNeedBalance = conf.getFloat(MIN_COST_NEED_BALANCE_KEY, minCostNeedBalance); if (localityCandidateGenerator == null) { localityCandidateGenerator = new LocalityBasedCandidateGenerator(services); }/*from w ww . j a v a2s. co m*/ localityCost = new ServerLocalityCostFunction(conf, services); rackLocalityCost = new RackLocalityCostFunction(conf, services); if (this.candidateGenerators == null) { candidateGenerators = Lists.newArrayList(); candidateGenerators.add(new RandomCandidateGenerator()); candidateGenerators.add(new LoadCandidateGenerator()); candidateGenerators.add(localityCandidateGenerator); candidateGenerators.add(new RegionReplicaRackCandidateGenerator()); } regionLoadFunctions = new CostFromRegionLoadFunction[] { new ReadRequestCostFunction(conf), new CPRequestCostFunction(conf), new WriteRequestCostFunction(conf), new MemStoreSizeCostFunction(conf), new StoreFileCostFunction(conf) }; regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf); regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf); costFunctions = new CostFunction[] { //new RegionCountSkewCostFunction(conf), new PrimaryRegionCountSkewCostFunction(conf), new MoveCostFunction(conf), localityCost, rackLocalityCost, //new TableSkewCostFunction(conf), new TableSkewCostFunctionNew(conf), //new TableRegionSkewCostFunction(conf), new ServerResourceCostFunction(conf), regionReplicaHostCostFunction, regionReplicaRackCostFunction, regionLoadFunctions[0], regionLoadFunctions[1], regionLoadFunctions[2], regionLoadFunctions[3], regionLoadFunctions[4] }; curFunctionCosts = new Double[costFunctions.length]; tempFunctionCosts = new Double[costFunctions.length]; LOG.info("Loaded config; maxSteps=" + maxSteps + ", stepsPerRegion=" + stepsPerRegion + ", maxRunningTime=" + maxRunningTime + ", isByTable=" + isByTable + ", etc."); }
From source file:com.taobao.adfs.database.MysqlServerController.java
License:Apache License
public String backupData(Configuration conf) throws IOException { String mysqlDataPath = Utilities.getNormalPath(conf.get("mysql.server.data.path", ".")); long expireTimeOfOldMysqlData = conf.getLong("mysql.server.data.path.old.expire.time", 30L * 24 * 3600 * 1000);// ww w . ja va 2 s. c o m return moveData(mysqlDataPath, expireTimeOfOldMysqlData); }
From source file:com.transwarp.hbase.bulkload.combine.remote.CombineRemoteFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; Configuration conf = job.getConfiguration(); // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode;/* w w w . j a v a2s . c om*/ } else { minSizeNode = conf.getLong("mapred.min.split.size.per.node", 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = conf.getLong("mapred.min.split.size.per.rack", 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = conf.getLong("mapred.max.split.size", 0); } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException("Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException("Minimum split size per rack" + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException("Minimum split size per node" + minSizeNode + " cannot be smaller than minimum split " + "size per rack " + minSizeRack); } // all the files in input set Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0])); List<InputSplit> splits = new ArrayList<InputSplit>(); if (paths.length == 0) { return splits; } // Convert them to Paths first. This is a costly operation and // we should do it first, otherwise we will incur doing it multiple // times, one time each for each pool in the next loop. List<Path> newpaths = new LinkedList<Path>(); for (int i = 0; i < paths.length; i++) { //Path p = new Path(paths[i].toUri().getPath()); Path p = paths[i]; newpaths.add(p); } paths = null; // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contains paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<Path> myPaths = new ArrayList<Path>(); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) { Path p = iter.next(); if (onepool.accept(p)) { myPaths.add(p); // add it to my output set iter.remove(); } } // create splits for all files in this pool. getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); } // create splits for all files that are not in any pool. getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits; }
From source file:com.tuplejump.calliope.hadoop.AbstractColumnFamilyRecordWriter.java
License:Apache License
protected AbstractColumnFamilyRecordWriter(Configuration conf) { this.conf = conf; this.ringCache = new RingCache(conf); this.queueSize = conf.getInt(AbstractColumnFamilyOutputFormat.QUEUE_SIZE, 32 * FBUtilities.getAvailableProcessors()); batchThreshold = conf.getLong(AbstractColumnFamilyOutputFormat.BATCH_THRESHOLD, 32); consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getWriteConsistencyLevel(conf)); }
From source file:com.twitter.algebra.nmf.NMFDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w ww.ja v a 2 s . c om*/ addOutputOption(); addOption(DefaultOptionCreator.methodOption().create()); addOption(ROWSOPTION, "rows", "Number of rows"); addOption(COLSOPTION, "cols", "Number of cols"); addOption(PRINCIPALSOPTION, "pcs", "Number of principal components"); addOption(PARTITIONSOPTION, "parts", "Number of partitions in principal components"); addOption(SAMPLE_RATE, SAMPLE_RATE, "sample rate for error calculation"); if (parseArguments(args) == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); final int nRows = Integer.parseInt(getOption(ROWSOPTION)); final int nCols = Integer.parseInt(getOption(COLSOPTION)); final int nPCs = Integer.parseInt(getOption(PRINCIPALSOPTION)); final int nColPartitions = Integer.parseInt(getOption(PARTITIONSOPTION)); alpha1 = Float.parseFloat(getOption(ALPHA1, "0.01")); alpha2 = Float.parseFloat(getOption(ALPHA2, "1")); lambda1 = Float.parseFloat(getOption(LAMBDA1, "0.01")); lambda2 = Float.parseFloat(getOption(LAMBDA2, "0")); sampleRate = Float.parseFloat(getOption(SAMPLE_RATE, "0.0001f")); Configuration conf = getConf(); if (conf == null) { throw new IOException("No Hadoop configuration present"); } MIN_ERROR_CHANGE = conf.getLong(MIN_ERROR_CHANGE_STR, Long.MAX_VALUE); MAX_ROUNDS = conf.getInt(MAX_ROUNDS_STR, 100); run(conf, input, output, nRows, nCols, nPCs, nColPartitions); return 0; }
From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockOffsetMapper.java
License:Open Source License
@Override protected void setup( Mapper<LongWritable, BinaryWritable<M>, TextLongPairWritable, LongPairWritable>.Context context) throws IOException, InterruptedException { long splitStart; // the start offset of the input split; long splitLength; // the length of the input split long splitEnd; // the last byte of this input split; FileSplit fileSplit = (FileSplit) context.getInputSplit(); splitStart = fileSplit.getStart();//w ww. j a v a2 s. c o m splitLength = fileSplit.getLength(); splitEnd = splitStart + splitLength; Configuration conf = context.getConfiguration(); maxBlockSize = Math.max(conf.getLong("dfs.block.size", 256 * 1024 * 1024), splitLength); //we don't want to create more indexed splits than original splits, //the original split size could be more than dfs.block.size. gapsize = conf.getInt(GAPSIZE_CONF, defaultGapSize); LOG.info("gap size allowed to cobmine blocks is set:" + gapsize); String valueClassName = context.getConfiguration().get(CLASSNAME_CONF); Class<?> c = BlockIndexedFileInputFormat.getValueClassByName(valueClassName); // column = // c.getDeclaredField(context.getConfiguration().get(COLUMNNAME)); String methodName; String columnName = context.getConfiguration().get(COLUMNNAME_CONF); methodName = BlockIndexedFileInputFormat.getCamelCaseMethodName(columnName, c); try { method = c.getMethod(methodName); } catch (Exception e) { LOG.error("cannot instantiate the column to be read", e); throw new IOException(e); } Path file = fileSplit.getPath(); fileSize = file.getFileSystem(context.getConfiguration()).getFileStatus(file).getLen(); LzoIndex lzoBlockIndex = LzoIndex.readIndex(file.getFileSystem(context.getConfiguration()), file); if (lzoBlockIndex.isEmpty()) { throw new IOException("No LZO index file exists for the input file " + file.toString() + " cannot index the input file"); } int num_lzo_blocks = lzoBlockIndex.getNumberOfBlocks(); lastLZOBlockStartOffset = lzoBlockIndex.getPosition(num_lzo_blocks - 1); LOG.info(context.getTaskAttemptID() + " splitStart= " + splitStart + " splitEnd=" + splitEnd + " splitLength=" + splitLength); LOG.info(context.getTaskAttemptID() + ":total LZOblocks in this file: " + num_lzo_blocks); // first loop to get the range of block offsets in lzoBlockIndex this mapper // is responsible for; int startPos = 0; int endPos = 0; boolean foundStartPos = false; boolean foundEndPos = false; for (int i = 0; i < num_lzo_blocks; i++) { long currentBlockOffset = lzoBlockIndex.getPosition(i); if (currentBlockOffset >= splitStart) { if (!foundStartPos) { startPos = i; foundStartPos = true; } } if (currentBlockOffset >= splitEnd) { if (!foundEndPos) { endPos = i; foundEndPos = true; } } if (foundStartPos && foundEndPos) break; } if (!foundEndPos) { endPos = num_lzo_blocks - 1; totalLZOBlocks = endPos - startPos + 1; // the last split, we need to copy from startPos to the end and additional // add the end of the file to the array lzoBlockOffset } else { if (endPos < num_lzo_blocks - 1) endPos++; if (endPos == num_lzo_blocks - 1) // treat as if it's the last split; { totalLZOBlocks = endPos - startPos + 1; foundEndPos = false; } else totalLZOBlocks = endPos - startPos; } // special treatment for the first lzoblock offset, due to the current // lzoindex implementation, we have to // use 0 for the first lzo block in any lzo compressed file though in fact // the actual start offset to the first lzoblock is not 0. // later we may consider to change the lzo related package to make sure all // lzo block start offsets are treated the same way. lzoOffsets = new long[totalLZOBlocks + 1]; if (foundEndPos) { for (int i = 0; i <= totalLZOBlocks; i++) lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos); } else { // treat the last InputSplit differently if (LOG.isDebugEnabled()) { LOG.debug( "read the last lzo block offset, add the file end offset to the last element in the index array"); } for (int i = 0; i < totalLZOBlocks; i++) lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos); lzoOffsets[totalLZOBlocks] = fileSize; } if (splitStart == 0) { lzoOffsets[0] = 0; } currentLzoBlock = 0; outputValue.setFirst(lzoOffsets[0]); outputValue.setSecond(lzoOffsets[1]); previousRowLineOffset = lzoOffsets[0]; if (LOG.isDebugEnabled()) { LOG.debug("lzoOffsets= " + Arrays.toString(lzoOffsets)); LOG.debug("lzoOffsets # of elements:" + lzoOffsets.length); } }
From source file:com.twitter.elephanttwin.retrieval.OneSplitInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); FileSplit split = (FileSplit) super.getSplits(job).get(0); List<InputSplit> lists = new ArrayList<InputSplit>(); lists.add(new FileSplit(split.getPath(), conf.getLong(START, 0), conf.getLong(END, 0) - conf.getLong(START, 0), split.getLocations())); return lists; }
From source file:com.twitter.hraven.mapreduce.CombineFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; Configuration conf = job.getConfiguration(); // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode;//w ww .j ava 2s . co m } else { minSizeNode = conf.getLong("mapred.min.split.size.per.node", 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = conf.getLong("mapred.min.split.size.per.rack", 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = conf.getLong("mapred.max.split.size", 0); } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException("Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException("Minimum split size per rack" + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException("Minimum split size per node" + minSizeNode + " cannot be smaller than minimum split " + "size per rack " + minSizeRack); } // all the files in input set Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0])); List<InputSplit> splits = new ArrayList<InputSplit>(); if (paths.length == 0) { return splits; } // Convert them to Paths first. This is a costly operation and // we should do it first, otherwise we will incur doing it multiple // times, one time each for each pool in the next loop. List<Path> newpaths = new LinkedList<Path>(); for (int i = 0; i < paths.length; i++) { Path p = new Path(paths[i].toUri().getPath()); newpaths.add(p); } paths = null; System.out.println("Getting splits for: " + newpaths.size() + " paths."); // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contains paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<Path> myPaths = new ArrayList<Path>(); System.out.println("Getting splits for a pool"); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) { Path p = iter.next(); if (onepool.accept(p)) { myPaths.add(p); // add it to my output set iter.remove(); } } System.out.println("Getting splits. myPaths size: " + myPaths.size()); // create splits for all files in this pool. getMoreSplits(conf, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); } // create splits for all files that are not in any pool. getMoreSplits(conf, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits; }
From source file:com.twitter.hraven.MRJobDescFactory.java
License:Apache License
@Override JobDesc create(QualifiedJobId qualifiedJobId, long submitTimeMillis, Configuration jobConf) { // TODO: Get the actual values appropriate for the plain Hadoop jobs. String appId = getAppId(jobConf); long appSubmitTimeMillis = jobConf.getLong(Constants.MR_RUN_CONF_KEY, submitTimeMillis); return create(qualifiedJobId, jobConf, appId, Constants.UNKNOWN, Framework.NONE, appSubmitTimeMillis); }