Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:com.twitter.algebra.nmf.NMFDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from w  ww. j a  v a 2  s. c o m*/
    addOutputOption();
    addOption(DefaultOptionCreator.methodOption().create());
    addOption(ROWSOPTION, "rows", "Number of rows");
    addOption(COLSOPTION, "cols", "Number of cols");
    addOption(PRINCIPALSOPTION, "pcs", "Number of principal components");
    addOption(PARTITIONSOPTION, "parts", "Number of partitions in principal components");
    addOption(SAMPLE_RATE, SAMPLE_RATE, "sample rate for error calculation");
    if (parseArguments(args) == null) {
        return -1;
    }
    Path input = getInputPath();
    Path output = getOutputPath();
    final int nRows = Integer.parseInt(getOption(ROWSOPTION));
    final int nCols = Integer.parseInt(getOption(COLSOPTION));
    final int nPCs = Integer.parseInt(getOption(PRINCIPALSOPTION));
    final int nColPartitions = Integer.parseInt(getOption(PARTITIONSOPTION));

    alpha1 = Float.parseFloat(getOption(ALPHA1, "0.01"));
    alpha2 = Float.parseFloat(getOption(ALPHA2, "1"));
    lambda1 = Float.parseFloat(getOption(LAMBDA1, "0.01"));
    lambda2 = Float.parseFloat(getOption(LAMBDA2, "0"));

    sampleRate = Float.parseFloat(getOption(SAMPLE_RATE, "0.0001f"));

    Configuration conf = getConf();
    if (conf == null) {
        throw new IOException("No Hadoop configuration present");
    }
    MIN_ERROR_CHANGE = conf.getLong(MIN_ERROR_CHANGE_STR, Long.MAX_VALUE);
    MAX_ROUNDS = conf.getInt(MAX_ROUNDS_STR, 100);

    run(conf, input, output, nRows, nCols, nPCs, nColPartitions);
    return 0;
}

From source file:com.twitter.algebra.nmf.RowColPartitioner.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    totalRows = conf.getInt(TOTAL_ROWS, -1);
    totalCols = conf.getInt(TOTAL_COLS, -1);
    totalColPartitions = conf.getInt(TOTAL_COL_PARTITIONS, -1);
    checkTotalKeys();/*w  w w.j a  va 2 s  .  co  m*/
}

From source file:com.twitter.ambrose.hive.AmbroseHivePreHook.java

License:Apache License

/**
 * Waiting <tt>ambrose.wf.between.sleep.seconds</tt> before processing the
 * next statement (workflow) in the submitted script
 * // w  ww.  ja v a2s .  c  o  m
 * @param hookContext
 * @param reporter
 * @param queryId
 */
private void waitBetween(HookContext hookContext, EmbeddedAmbroseHiveProgressReporter reporter,
        String queryId) {

    Configuration conf = hookContext.getConf();
    boolean justStarted = conf.getBoolean(SCRIPT_STARTED_PARAM, true);
    if (justStarted) {
        conf.setBoolean(SCRIPT_STARTED_PARAM, false);
    } else {
        // sleeping between workflows
        int sleepTimeMs = conf.getInt(WF_BETWEEN_SLEEP_SECS_PARAM, 10);
        try {

            LOG.info("One workflow complete, sleeping for " + sleepTimeMs
                    + " sec(s) before moving to the next one if exists. Hit ctrl-c to exit.");
            Thread.sleep(sleepTimeMs * 1000L);

            //send progressbar reset event
            Map<WorkflowProgressField, String> eventData = Maps.newHashMapWithExpectedSize(1);
            eventData.put(WorkflowProgressField.workflowProgress, "0");
            reporter.pushEvent(queryId, new Event.WorkflowProgressEvent(eventData));

            reporter.saveEventStack();
            reporter.reset();
        } catch (InterruptedException e) {
            LOG.warn("Sleep interrupted", e);
        }
    }
}

From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockOffsetMapper.java

License:Open Source License

@Override
protected void setup(
        Mapper<LongWritable, BinaryWritable<M>, TextLongPairWritable, LongPairWritable>.Context context)
        throws IOException, InterruptedException {

    long splitStart; // the start offset of the input split;
    long splitLength; // the length of the input split
    long splitEnd; // the last byte of this input split;

    FileSplit fileSplit = (FileSplit) context.getInputSplit();
    splitStart = fileSplit.getStart();/*from  w  w w . j a  v a 2 s.  com*/
    splitLength = fileSplit.getLength();
    splitEnd = splitStart + splitLength;
    Configuration conf = context.getConfiguration();
    maxBlockSize = Math.max(conf.getLong("dfs.block.size", 256 * 1024 * 1024), splitLength);
    //we don't want to create more indexed splits than original splits,
    //the original split size could be more than dfs.block.size.

    gapsize = conf.getInt(GAPSIZE_CONF, defaultGapSize);
    LOG.info("gap size allowed to cobmine blocks is set:" + gapsize);
    String valueClassName = context.getConfiguration().get(CLASSNAME_CONF);

    Class<?> c = BlockIndexedFileInputFormat.getValueClassByName(valueClassName);
    // column =
    // c.getDeclaredField(context.getConfiguration().get(COLUMNNAME));
    String methodName;
    String columnName = context.getConfiguration().get(COLUMNNAME_CONF);
    methodName = BlockIndexedFileInputFormat.getCamelCaseMethodName(columnName, c);

    try {
        method = c.getMethod(methodName);
    } catch (Exception e) {
        LOG.error("cannot instantiate the column to be read", e);
        throw new IOException(e);
    }

    Path file = fileSplit.getPath();
    fileSize = file.getFileSystem(context.getConfiguration()).getFileStatus(file).getLen();

    LzoIndex lzoBlockIndex = LzoIndex.readIndex(file.getFileSystem(context.getConfiguration()), file);

    if (lzoBlockIndex.isEmpty()) {

        throw new IOException("No LZO index file exists for the input file " + file.toString()
                + " cannot index the input file");
    }

    int num_lzo_blocks = lzoBlockIndex.getNumberOfBlocks();
    lastLZOBlockStartOffset = lzoBlockIndex.getPosition(num_lzo_blocks - 1);

    LOG.info(context.getTaskAttemptID() + " splitStart= " + splitStart + " splitEnd=" + splitEnd
            + " splitLength=" + splitLength);
    LOG.info(context.getTaskAttemptID() + ":total LZOblocks in this file: " + num_lzo_blocks);

    // first loop to get the range of block offsets in lzoBlockIndex this mapper
    // is responsible for;
    int startPos = 0;
    int endPos = 0;

    boolean foundStartPos = false;
    boolean foundEndPos = false;
    for (int i = 0; i < num_lzo_blocks; i++) {
        long currentBlockOffset = lzoBlockIndex.getPosition(i);
        if (currentBlockOffset >= splitStart) {
            if (!foundStartPos) {
                startPos = i;
                foundStartPos = true;
            }
        }
        if (currentBlockOffset >= splitEnd) {
            if (!foundEndPos) {
                endPos = i;
                foundEndPos = true;
            }
        }

        if (foundStartPos && foundEndPos)
            break;
    }

    if (!foundEndPos) {
        endPos = num_lzo_blocks - 1;
        totalLZOBlocks = endPos - startPos + 1;
        // the last split, we need to copy from startPos to the end and additional
        // add the end of the file to the array lzoBlockOffset
    } else {
        if (endPos < num_lzo_blocks - 1)
            endPos++;
        if (endPos == num_lzo_blocks - 1) // treat as if it's the last split;
        {
            totalLZOBlocks = endPos - startPos + 1;
            foundEndPos = false;
        } else
            totalLZOBlocks = endPos - startPos;
    }

    // special treatment for the first lzoblock offset, due to the current
    // lzoindex implementation, we have to
    // use 0 for the first lzo block in any lzo compressed file though in fact
    // the actual start offset to the first lzoblock is not 0.
    // later we may consider to change the lzo related package to make sure all
    // lzo block start offsets are treated the same way.

    lzoOffsets = new long[totalLZOBlocks + 1];

    if (foundEndPos) {
        for (int i = 0; i <= totalLZOBlocks; i++)
            lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos);
    } else {
        // treat the last InputSplit differently
        if (LOG.isDebugEnabled()) {
            LOG.debug(
                    "read the last lzo block offset, add the file end offset to the last element in the index array");
        }
        for (int i = 0; i < totalLZOBlocks; i++)
            lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos);
        lzoOffsets[totalLZOBlocks] = fileSize;
    }

    if (splitStart == 0) {
        lzoOffsets[0] = 0;
    }

    currentLzoBlock = 0;
    outputValue.setFirst(lzoOffsets[0]);
    outputValue.setSecond(lzoOffsets[1]);
    previousRowLineOffset = lzoOffsets[0];

    if (LOG.isDebugEnabled()) {
        LOG.debug("lzoOffsets= " + Arrays.toString(lzoOffsets));
        LOG.debug("lzoOffsets # of elements:" + lzoOffsets.length);
    }
}

From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java

License:Apache License

/**
 * Go through each original inputsplit, get its file path, and check the
 *  index file,/* w w w.ja  v  a2s.com*/
 * a)  keep it, when there is no index prebuilt on this file
 *  (or the index file doesn't match with the base file's checksum;
 * b)  remove it when no matching value is found in existing index file;
 * c)  construct new smaller inputsplits using indexed blocks found
 * in the index file;
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    String inputformat = job.getConfiguration().get(REALINPUTFORMAT);
    String valueClass = job.getConfiguration().get(VALUECLASS);

    List<InputSplit> filteredList = new ArrayList<InputSplit>();

    FileInputFormat<K, V> realInputFormat = getInputFormatClass(inputformat, valueClass);

    List<InputSplit> splits = realInputFormat.getSplits(job);

    //if indexing jobs, don't skip any input splits.
    //if searching job but no searching filter, skip the index as well.
    if (isIndexingJob(job) || getFilterCondition(job) == null)
        return splits;

    Path prevFile = null; // remember the last input file we saw
    boolean foundIndexedFile = false; // is there a index file for
    // prevFile?
    boolean firstTime = true; // is this the first time we see this file?

    long totalOriginalBytes = 0; //the bytes to be scanned without indexes.
    totalBytesNewSplits = 0;
    long startTime = System.currentTimeMillis();
    LOG.info("start filtering out original input splits (total " + splits.size() + ") using indexes");
    Configuration conf = job.getConfiguration();
    long splitMaxSize;

    // for each original input split check if we can filter it out.
    for (InputSplit split : splits) {
        FileSplit fileSplit = (FileSplit) split;
        Path path = fileSplit.getPath();
        splitLength = fileSplit.getLength();
        totalOriginalBytes += fileSplit.getLength();
        splitMaxSize = Math.max(splitLength,
                conf.getInt(INDEXED_SPLIT_SIZE, conf.getInt("dfs.block.size", 256 * 1024 * 1024)));

        /*
         * for each new file we see, we first check if it has been indexed or not;
         * if not, we just add the original input split; if yes, we use the index
         * file to add filtered splits for the file
         */
        if (prevFile != null && path.equals(prevFile)) {
            firstTime = false;
        } else {
            prevFile = path;
            firstTime = true;
            foundIndexedFile = foundIndexFile(job, path);
        }

        // if no index file, we'll have to read all original input
        // splits
        if (!foundIndexedFile)
            filteredList.add(fileSplit);
        else {
            // for each file we only add once its filtered input splits using index
            // file
            if (firstTime) {
                // LOG.info("first time saw " + path
                // + ", adding filtered splits from index file");
                filteredList.addAll(getFilteredSplits(job, path, fileSplit.getLocations(), splitMaxSize));
            }
        }
    }

    long endTime = System.currentTimeMillis();
    LOG.info("finished filtering out input splits, now total splits:" + filteredList.size() + ", seconds used: "
            + (endTime - startTime) / 1000);
    LOG.info(String.format("total bytes to read before filtering: %s," + " after filtering %s, bytes ratio: %s",
            totalOriginalBytes, totalBytesNewSplits, totalOriginalBytes / Math.max(1, totalBytesNewSplits)));
    return filteredList;
}

From source file:com.twitter.hraven.datasource.JobHistoryService.java

License:Apache License

public JobHistoryService(Configuration myConf) throws IOException {
    this.myConf = myConf;
    this.historyTable = new HTable(myConf, Constants.HISTORY_TABLE_BYTES);
    this.taskTable = new HTable(myConf, Constants.HISTORY_TASK_TABLE_BYTES);
    this.idService = new JobHistoryByIdService(this.myConf);
    this.defaultScannerCaching = myConf.getInt("hbase.client.scanner.caching", 100);
}

From source file:com.uber.hoodie.common.file.HoodieAppendLog.java

License:Apache License

/** Get the configured buffer size */
private static int getBufferSize(Configuration conf) {
    return conf.getInt("io.file.buffer.size", 4096);
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.AbstractSplitter.java

License:Apache License

@Override
public List<InputSplit> split(Configuration conf) throws IOException {

    // load configuration
    boolean interpolate = DynamoDBQueryInputFormat.getInterpolateAcrossRangeKeyValues(conf);

    Types hashKeyType = DynamoDBQueryInputFormat.getHashKeyType(conf);
    AttributeValue hashKeyValue = DynamoDBQueryInputFormat.getHashKeyValue(conf);

    Types rangeKeyType = DynamoDBQueryInputFormat.getRangeKeyType(conf);
    Collection<AttributeValue> rangeKeyValues = DynamoDBQueryInputFormat.getRangeKeyValues(conf);
    ComparisonOperator rangeKeyoperator = DynamoDBQueryInputFormat.getRangeKeyComparisonOperator(conf);
    AttributeValue minRangeKeyValue = DynamoDBQueryInputFormat.getRangeKeyInterpolateMinValue(conf);
    AttributeValue maxRangeKeyValue = DynamoDBQueryInputFormat.getRangeKeyInterpolateMaxValue(conf);

    // ensure DynamoDBQueryInputFormat was configured correctly
    if (interpolate) {
        rangeKeyValues = new ArrayList<AttributeValue>();
    } else {//www . j  a  v a 2  s.c  o  m
        minRangeKeyValue = null;
        maxRangeKeyValue = null;
    }

    // compute number of input splits
    int numSplits = conf.getInt("mapred.map.tasks", 1);
    int numHashKeys = 1;
    int numRangeSplits = numSplits / numHashKeys;
    numRangeSplits = (!interpolate) ? 1 : numRangeSplits;
    numRangeSplits = (numRangeSplits <= 0) ? 1 : numRangeSplits;

    // generate input spits
    List<InputSplit> splits = new ArrayList<InputSplit>();

    // handle cases where interpolation is turned off or unnecessary
    if (!interpolate || numRangeSplits <= 1 || minRangeKeyValue == null || maxRangeKeyValue == null) {
        LOG.info("Generating 1 split for each HashKey");

        DynamoDBQueryInputFormat.DynamoDBQueryInputSplit split = new DynamoDBQueryInputFormat.DynamoDBQueryInputSplit(
                hashKeyType, hashKeyValue, rangeKeyType, rangeKeyValues, rangeKeyoperator);

        splits.add(split);
    } else {
        // interpolate between RangeKey values
        LOG.info(String.format("Generating %d RangeKey splits for each HashKey", numRangeSplits));

        if (null == hashKeyValue) {
            LOG.error("Cannot create a range when the HashKey is NULL. Ignoring range key interpolation.");
        } else {
            generateRangeKeySplits(conf, splits, hashKeyType, hashKeyValue, rangeKeyType, minRangeKeyValue,
                    maxRangeKeyValue, numRangeSplits);
        }
    }

    return splits;
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java

License:Apache License

public static Types getHashKeyType(Configuration conf) {
    return Types.values()[conf.getInt(DynamoDBConfiguration.HASH_KEY_TYPE_PROPERTY, Types.STRING.ordinal())];
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java

License:Apache License

public static Types getRangeKeyType(Configuration conf) {
    return Types.values()[conf.getInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, Types.STRING.ordinal())];
}