List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.twitter.algebra.nmf.NMFDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w ww. j a v a 2 s. c o m*/ addOutputOption(); addOption(DefaultOptionCreator.methodOption().create()); addOption(ROWSOPTION, "rows", "Number of rows"); addOption(COLSOPTION, "cols", "Number of cols"); addOption(PRINCIPALSOPTION, "pcs", "Number of principal components"); addOption(PARTITIONSOPTION, "parts", "Number of partitions in principal components"); addOption(SAMPLE_RATE, SAMPLE_RATE, "sample rate for error calculation"); if (parseArguments(args) == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); final int nRows = Integer.parseInt(getOption(ROWSOPTION)); final int nCols = Integer.parseInt(getOption(COLSOPTION)); final int nPCs = Integer.parseInt(getOption(PRINCIPALSOPTION)); final int nColPartitions = Integer.parseInt(getOption(PARTITIONSOPTION)); alpha1 = Float.parseFloat(getOption(ALPHA1, "0.01")); alpha2 = Float.parseFloat(getOption(ALPHA2, "1")); lambda1 = Float.parseFloat(getOption(LAMBDA1, "0.01")); lambda2 = Float.parseFloat(getOption(LAMBDA2, "0")); sampleRate = Float.parseFloat(getOption(SAMPLE_RATE, "0.0001f")); Configuration conf = getConf(); if (conf == null) { throw new IOException("No Hadoop configuration present"); } MIN_ERROR_CHANGE = conf.getLong(MIN_ERROR_CHANGE_STR, Long.MAX_VALUE); MAX_ROUNDS = conf.getInt(MAX_ROUNDS_STR, 100); run(conf, input, output, nRows, nCols, nPCs, nColPartitions); return 0; }
From source file:com.twitter.algebra.nmf.RowColPartitioner.java
License:Apache License
@Override public void setConf(Configuration conf) { this.conf = conf; totalRows = conf.getInt(TOTAL_ROWS, -1); totalCols = conf.getInt(TOTAL_COLS, -1); totalColPartitions = conf.getInt(TOTAL_COL_PARTITIONS, -1); checkTotalKeys();/*w w w.j a va 2 s . co m*/ }
From source file:com.twitter.ambrose.hive.AmbroseHivePreHook.java
License:Apache License
/** * Waiting <tt>ambrose.wf.between.sleep.seconds</tt> before processing the * next statement (workflow) in the submitted script * // w ww. ja v a2s . c o m * @param hookContext * @param reporter * @param queryId */ private void waitBetween(HookContext hookContext, EmbeddedAmbroseHiveProgressReporter reporter, String queryId) { Configuration conf = hookContext.getConf(); boolean justStarted = conf.getBoolean(SCRIPT_STARTED_PARAM, true); if (justStarted) { conf.setBoolean(SCRIPT_STARTED_PARAM, false); } else { // sleeping between workflows int sleepTimeMs = conf.getInt(WF_BETWEEN_SLEEP_SECS_PARAM, 10); try { LOG.info("One workflow complete, sleeping for " + sleepTimeMs + " sec(s) before moving to the next one if exists. Hit ctrl-c to exit."); Thread.sleep(sleepTimeMs * 1000L); //send progressbar reset event Map<WorkflowProgressField, String> eventData = Maps.newHashMapWithExpectedSize(1); eventData.put(WorkflowProgressField.workflowProgress, "0"); reporter.pushEvent(queryId, new Event.WorkflowProgressEvent(eventData)); reporter.saveEventStack(); reporter.reset(); } catch (InterruptedException e) { LOG.warn("Sleep interrupted", e); } } }
From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockOffsetMapper.java
License:Open Source License
@Override protected void setup( Mapper<LongWritable, BinaryWritable<M>, TextLongPairWritable, LongPairWritable>.Context context) throws IOException, InterruptedException { long splitStart; // the start offset of the input split; long splitLength; // the length of the input split long splitEnd; // the last byte of this input split; FileSplit fileSplit = (FileSplit) context.getInputSplit(); splitStart = fileSplit.getStart();/*from w w w . j a v a 2 s. com*/ splitLength = fileSplit.getLength(); splitEnd = splitStart + splitLength; Configuration conf = context.getConfiguration(); maxBlockSize = Math.max(conf.getLong("dfs.block.size", 256 * 1024 * 1024), splitLength); //we don't want to create more indexed splits than original splits, //the original split size could be more than dfs.block.size. gapsize = conf.getInt(GAPSIZE_CONF, defaultGapSize); LOG.info("gap size allowed to cobmine blocks is set:" + gapsize); String valueClassName = context.getConfiguration().get(CLASSNAME_CONF); Class<?> c = BlockIndexedFileInputFormat.getValueClassByName(valueClassName); // column = // c.getDeclaredField(context.getConfiguration().get(COLUMNNAME)); String methodName; String columnName = context.getConfiguration().get(COLUMNNAME_CONF); methodName = BlockIndexedFileInputFormat.getCamelCaseMethodName(columnName, c); try { method = c.getMethod(methodName); } catch (Exception e) { LOG.error("cannot instantiate the column to be read", e); throw new IOException(e); } Path file = fileSplit.getPath(); fileSize = file.getFileSystem(context.getConfiguration()).getFileStatus(file).getLen(); LzoIndex lzoBlockIndex = LzoIndex.readIndex(file.getFileSystem(context.getConfiguration()), file); if (lzoBlockIndex.isEmpty()) { throw new IOException("No LZO index file exists for the input file " + file.toString() + " cannot index the input file"); } int num_lzo_blocks = lzoBlockIndex.getNumberOfBlocks(); lastLZOBlockStartOffset = lzoBlockIndex.getPosition(num_lzo_blocks - 1); LOG.info(context.getTaskAttemptID() + " splitStart= " + splitStart + " splitEnd=" + splitEnd + " splitLength=" + splitLength); LOG.info(context.getTaskAttemptID() + ":total LZOblocks in this file: " + num_lzo_blocks); // first loop to get the range of block offsets in lzoBlockIndex this mapper // is responsible for; int startPos = 0; int endPos = 0; boolean foundStartPos = false; boolean foundEndPos = false; for (int i = 0; i < num_lzo_blocks; i++) { long currentBlockOffset = lzoBlockIndex.getPosition(i); if (currentBlockOffset >= splitStart) { if (!foundStartPos) { startPos = i; foundStartPos = true; } } if (currentBlockOffset >= splitEnd) { if (!foundEndPos) { endPos = i; foundEndPos = true; } } if (foundStartPos && foundEndPos) break; } if (!foundEndPos) { endPos = num_lzo_blocks - 1; totalLZOBlocks = endPos - startPos + 1; // the last split, we need to copy from startPos to the end and additional // add the end of the file to the array lzoBlockOffset } else { if (endPos < num_lzo_blocks - 1) endPos++; if (endPos == num_lzo_blocks - 1) // treat as if it's the last split; { totalLZOBlocks = endPos - startPos + 1; foundEndPos = false; } else totalLZOBlocks = endPos - startPos; } // special treatment for the first lzoblock offset, due to the current // lzoindex implementation, we have to // use 0 for the first lzo block in any lzo compressed file though in fact // the actual start offset to the first lzoblock is not 0. // later we may consider to change the lzo related package to make sure all // lzo block start offsets are treated the same way. lzoOffsets = new long[totalLZOBlocks + 1]; if (foundEndPos) { for (int i = 0; i <= totalLZOBlocks; i++) lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos); } else { // treat the last InputSplit differently if (LOG.isDebugEnabled()) { LOG.debug( "read the last lzo block offset, add the file end offset to the last element in the index array"); } for (int i = 0; i < totalLZOBlocks; i++) lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos); lzoOffsets[totalLZOBlocks] = fileSize; } if (splitStart == 0) { lzoOffsets[0] = 0; } currentLzoBlock = 0; outputValue.setFirst(lzoOffsets[0]); outputValue.setSecond(lzoOffsets[1]); previousRowLineOffset = lzoOffsets[0]; if (LOG.isDebugEnabled()) { LOG.debug("lzoOffsets= " + Arrays.toString(lzoOffsets)); LOG.debug("lzoOffsets # of elements:" + lzoOffsets.length); } }
From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java
License:Apache License
/** * Go through each original inputsplit, get its file path, and check the * index file,/* w w w.ja v a2s.com*/ * a) keep it, when there is no index prebuilt on this file * (or the index file doesn't match with the base file's checksum; * b) remove it when no matching value is found in existing index file; * c) construct new smaller inputsplits using indexed blocks found * in the index file; */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { String inputformat = job.getConfiguration().get(REALINPUTFORMAT); String valueClass = job.getConfiguration().get(VALUECLASS); List<InputSplit> filteredList = new ArrayList<InputSplit>(); FileInputFormat<K, V> realInputFormat = getInputFormatClass(inputformat, valueClass); List<InputSplit> splits = realInputFormat.getSplits(job); //if indexing jobs, don't skip any input splits. //if searching job but no searching filter, skip the index as well. if (isIndexingJob(job) || getFilterCondition(job) == null) return splits; Path prevFile = null; // remember the last input file we saw boolean foundIndexedFile = false; // is there a index file for // prevFile? boolean firstTime = true; // is this the first time we see this file? long totalOriginalBytes = 0; //the bytes to be scanned without indexes. totalBytesNewSplits = 0; long startTime = System.currentTimeMillis(); LOG.info("start filtering out original input splits (total " + splits.size() + ") using indexes"); Configuration conf = job.getConfiguration(); long splitMaxSize; // for each original input split check if we can filter it out. for (InputSplit split : splits) { FileSplit fileSplit = (FileSplit) split; Path path = fileSplit.getPath(); splitLength = fileSplit.getLength(); totalOriginalBytes += fileSplit.getLength(); splitMaxSize = Math.max(splitLength, conf.getInt(INDEXED_SPLIT_SIZE, conf.getInt("dfs.block.size", 256 * 1024 * 1024))); /* * for each new file we see, we first check if it has been indexed or not; * if not, we just add the original input split; if yes, we use the index * file to add filtered splits for the file */ if (prevFile != null && path.equals(prevFile)) { firstTime = false; } else { prevFile = path; firstTime = true; foundIndexedFile = foundIndexFile(job, path); } // if no index file, we'll have to read all original input // splits if (!foundIndexedFile) filteredList.add(fileSplit); else { // for each file we only add once its filtered input splits using index // file if (firstTime) { // LOG.info("first time saw " + path // + ", adding filtered splits from index file"); filteredList.addAll(getFilteredSplits(job, path, fileSplit.getLocations(), splitMaxSize)); } } } long endTime = System.currentTimeMillis(); LOG.info("finished filtering out input splits, now total splits:" + filteredList.size() + ", seconds used: " + (endTime - startTime) / 1000); LOG.info(String.format("total bytes to read before filtering: %s," + " after filtering %s, bytes ratio: %s", totalOriginalBytes, totalBytesNewSplits, totalOriginalBytes / Math.max(1, totalBytesNewSplits))); return filteredList; }
From source file:com.twitter.hraven.datasource.JobHistoryService.java
License:Apache License
public JobHistoryService(Configuration myConf) throws IOException { this.myConf = myConf; this.historyTable = new HTable(myConf, Constants.HISTORY_TABLE_BYTES); this.taskTable = new HTable(myConf, Constants.HISTORY_TASK_TABLE_BYTES); this.idService = new JobHistoryByIdService(this.myConf); this.defaultScannerCaching = myConf.getInt("hbase.client.scanner.caching", 100); }
From source file:com.uber.hoodie.common.file.HoodieAppendLog.java
License:Apache License
/** Get the configured buffer size */ private static int getBufferSize(Configuration conf) { return conf.getInt("io.file.buffer.size", 4096); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.AbstractSplitter.java
License:Apache License
@Override public List<InputSplit> split(Configuration conf) throws IOException { // load configuration boolean interpolate = DynamoDBQueryInputFormat.getInterpolateAcrossRangeKeyValues(conf); Types hashKeyType = DynamoDBQueryInputFormat.getHashKeyType(conf); AttributeValue hashKeyValue = DynamoDBQueryInputFormat.getHashKeyValue(conf); Types rangeKeyType = DynamoDBQueryInputFormat.getRangeKeyType(conf); Collection<AttributeValue> rangeKeyValues = DynamoDBQueryInputFormat.getRangeKeyValues(conf); ComparisonOperator rangeKeyoperator = DynamoDBQueryInputFormat.getRangeKeyComparisonOperator(conf); AttributeValue minRangeKeyValue = DynamoDBQueryInputFormat.getRangeKeyInterpolateMinValue(conf); AttributeValue maxRangeKeyValue = DynamoDBQueryInputFormat.getRangeKeyInterpolateMaxValue(conf); // ensure DynamoDBQueryInputFormat was configured correctly if (interpolate) { rangeKeyValues = new ArrayList<AttributeValue>(); } else {//www . j a v a 2 s.c o m minRangeKeyValue = null; maxRangeKeyValue = null; } // compute number of input splits int numSplits = conf.getInt("mapred.map.tasks", 1); int numHashKeys = 1; int numRangeSplits = numSplits / numHashKeys; numRangeSplits = (!interpolate) ? 1 : numRangeSplits; numRangeSplits = (numRangeSplits <= 0) ? 1 : numRangeSplits; // generate input spits List<InputSplit> splits = new ArrayList<InputSplit>(); // handle cases where interpolation is turned off or unnecessary if (!interpolate || numRangeSplits <= 1 || minRangeKeyValue == null || maxRangeKeyValue == null) { LOG.info("Generating 1 split for each HashKey"); DynamoDBQueryInputFormat.DynamoDBQueryInputSplit split = new DynamoDBQueryInputFormat.DynamoDBQueryInputSplit( hashKeyType, hashKeyValue, rangeKeyType, rangeKeyValues, rangeKeyoperator); splits.add(split); } else { // interpolate between RangeKey values LOG.info(String.format("Generating %d RangeKey splits for each HashKey", numRangeSplits)); if (null == hashKeyValue) { LOG.error("Cannot create a range when the HashKey is NULL. Ignoring range key interpolation."); } else { generateRangeKeySplits(conf, splits, hashKeyType, hashKeyValue, rangeKeyType, minRangeKeyValue, maxRangeKeyValue, numRangeSplits); } } return splits; }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java
License:Apache License
public static Types getHashKeyType(Configuration conf) { return Types.values()[conf.getInt(DynamoDBConfiguration.HASH_KEY_TYPE_PROPERTY, Types.STRING.ordinal())]; }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java
License:Apache License
public static Types getRangeKeyType(Configuration conf) { return Types.values()[conf.getInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, Types.STRING.ordinal())]; }