Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:com.twitter.algebra.nmf.ErrDMJ.java

License:Apache License

public Job run(Configuration conf, Path xPath, Path matrixAInputPath, Path ytPath, Path outPath, int aRows,
        int ytRows, int ytCols) throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set(MAPDIRMATRIXX, xPath.toString());
    conf.set(MAPDIRMATRIXYT, ytPath.toString());
    conf.setInt(YTROWS, ytRows);
    conf.setInt(YTCOLS, ytCols);/*from  w w  w  . j  av  a 2  s .co  m*/
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixAInputPath, "err");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ErrDMJ.class);
    job.setJobName(ErrDMJ.class.getSimpleName() + "-" + outPath.getName());

    matrixAInputPath = fs.makeQualified(matrixAInputPath);
    MultipleInputs.addInputPath(job, matrixAInputPath, SequenceFileInputFormat.class);

    outPath = fs.makeQualified(outPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = 1;
    job.setNumReduceTasks(numReducers);
    job.setCombinerClass(SumVectorsReducer.class);
    job.setReducerClass(SumVectorsReducer.class);

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed! ");
    return job;
}

From source file:com.twitter.algebra.nmf.SampleColsJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, int cols, Path matrixOutputPath, float sampleRate)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setFloat(SAMPLERATE, sampleRate);
    conf.setInt(COLS, cols);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplecol");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(SampleColsJob.class);
    job.setJobName(SampleColsJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);

    job.setNumReduceTasks(0);/*w w  w  .  j  a  v a  2s  .c o  m*/
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.XtXJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, int numCols, String xmPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setInt(MATRIXCOLS, numCols);
    //    conf.set(XMPATH, xmPath);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, new Path[] { matrixInputPath }, "xtx");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJobName("XtXJob-" + matrixOutputPath.getName());
    job.setJarByClass(XtXJob.class);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "xtx");
    job.setNumReduceTasks(numReducers);//from   w w w .j av  a  2  s  .  c o m
    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numCols);

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    job.waitForCompletion(true);
}

From source file:com.twitter.algebra.TransposeJob.java

License:Apache License

/**
 * Perform transpose of A, where A refers to the path that contains a matrix
 * in {@link SequenceFileInputFormat}./*from www  . j a  va  2 s  .com*/
 * 
 * @param conf
 *          the initial configuration
 * @param matrixInputPath
 *          the path to the input files that we process
 * @param matrixOutputPath
 *          the path of the resulting transpose matrix
 * @param numInputRows
 *          rows
 * @param numInputCols
 *          cols
 * @return the running job
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int numInputRows,
        int numInputCols) throws IOException, InterruptedException, ClassNotFoundException {
    conf.setInt(NUM_ORIG_ROWS_KEY, numInputRows);
    conf.setInt(RowPartitioner.TOTAL_KEYS, numInputCols);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "transpose");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(TransposeJob.class);
    job.setJobName(TransposeJob.class.getSimpleName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(TransposeMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "transpose");
    job.setNumReduceTasks(numReducers);
    //    job.setPartitionerClass(RowPartitioner.IntRowPartitioner.class);
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputCols);
    job.setCombinerClass(MergeVectorsCombiner.class);
    job.setReducerClass(MergeVectorsReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    LOG = Logger.getLogger(this.getClass());
    params = newIndexConfig();//from w  w  w.  j a va 2s  . c  o  m

    LOG.info("Starting up indexer...");
    LOG.info(" - input: " + Joiner.on(" ").join(IndexConfig.input.get()));
    LOG.info(" - index: " + IndexConfig.index);
    LOG.info(" - number of shards: " + IndexConfig.numPartitions.get());

    Configuration conf = getConf();

    conf.set(AbstractLuceneIndexingReducer.HDFS_INDEX_LOCATION, IndexConfig.index.get());
    conf.set(AbstractLuceneIndexingReducer.ANALYZER, IndexConfig.analyzer.get());
    conf.set(AbstractLuceneIndexingReducer.SIMILARITY, IndexConfig.similarity.get());
    conf.setInt(AbstractSamplingIndexingMapper.SAMPLE_PERCENTAGE, IndexConfig.samplePercentage.get());

    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    Job job = new Job(conf, getJobName(params));

    // Job's constructor copies conf, we need a reference to the one job
    // is actually using
    conf = job.getConfiguration();

    job.setJarByClass(this.getClass());

    job.setNumReduceTasks(IndexConfig.numPartitions.get());

    for (String s : IndexConfig.input.get()) {
        Path spath = new Path(s);
        FileSystem fs = spath.getFileSystem(getConf());
        List<FileStatus> stats = Lists.newArrayList();
        addInputPathRecursively(stats, fs, spath, HdfsUtils.HIDDEN_FILE_FILTER);
        for (FileStatus foundStat : stats) {
            FileInputFormat.addInputPath(job, foundStat.getPath());
        }
    }

    FileOutputFormat.setOutputPath(job, new Path(IndexConfig.index.get()));

    setupJob(job);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(IndexConfig.index.get());
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    LOG.info("Job " + getJobName(params) + " started.");
    // TODO Jimmy has a parameter that controls whether we wait in Thud but not in ES.
    // when would we not want to wait?
    job.waitForCompletion(true);
    LOG.info("Job " + getJobName(params) + " Finished in " + (System.currentTimeMillis() - startTime) / 1000.0
            + " seconds");

    if (job.isSuccessful()) {
        writeIndexDescriptors(getIndexDescriptor());
    }
    return job.isSuccessful() ? 0 : 1;
}

From source file:com.twitter.hraven.etl.JobFileProcessor.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration hbaseConf = HBaseConfiguration.create(getConf());

    // Grab input args and allow for -Dxyz style arguments
    String[] otherArgs = new GenericOptionsParser(hbaseConf, args).getRemainingArgs();

    // Grab the arguments we're looking for.
    CommandLine commandLine = parseArgs(otherArgs);

    // Grab the cluster argument
    String cluster = commandLine.getOptionValue("c");
    LOG.info("cluster=" + cluster);

    // Number of parallel threads to use
    int threadCount = 1;
    if (commandLine.hasOption("t")) {
        try {//ww  w.ja  v a2s.c  o m
            threadCount = Integer.parseInt(commandLine.getOptionValue("t"));
        } catch (NumberFormatException nfe) {
            throw new IllegalArgumentException(
                    "Provided thread-count argument (-t) is not a number: " + commandLine.getOptionValue("t"),
                    nfe);
        }
        if (threadCount < 1) {
            throw new IllegalArgumentException(
                    "Cannot run fewer than 1 thread. Provided thread-count argument (-t): " + threadCount);
        }
    }
    LOG.info("threadCount=" + threadCount);

    boolean reprocess = commandLine.hasOption("r");
    LOG.info("reprocess=" + reprocess);

    // Grab the batch-size argument
    int batchSize;
    if (commandLine.hasOption("b")) {
        try {
            batchSize = Integer.parseInt(commandLine.getOptionValue("b"));
        } catch (NumberFormatException nfe) {
            throw new IllegalArgumentException(
                    "batch size option -b is is not a valid number: " + commandLine.getOptionValue("b"), nfe);
        }
        // Additional check
        if (batchSize < 1) {
            throw new IllegalArgumentException(
                    "Cannot process files in batches smaller than 1. Specified batch size option -b is: "
                            + commandLine.getOptionValue("b"));
        }
    } else {
        batchSize = DEFAULT_BATCH_SIZE;
    }

    // Grab the costfile argument

    String costFilePath = commandLine.getOptionValue("zf");
    LOG.info("cost properties file on hdfs=" + costFilePath);
    if (costFilePath == null)
        costFilePath = Constants.COST_PROPERTIES_HDFS_DIR;
    Path hdfsPath = new Path(costFilePath + Constants.COST_PROPERTIES_FILENAME);
    // add to distributed cache
    DistributedCache.addCacheFile(hdfsPath.toUri(), hbaseConf);

    // Grab the machine type argument
    String machineType = commandLine.getOptionValue("m");
    // set it as part of conf so that the
    // hRaven job can access it in the mapper
    hbaseConf.set(Constants.HRAVEN_MACHINE_TYPE, machineType);

    // check if re-aggregate option is forced on
    // if yes, we need to aggregate for this job inspite of
    // job having aggregation done status in raw table
    boolean reAggregateFlagValue = false;
    if (commandLine.hasOption("ra")) {
        String reaggregateFlag = commandLine.getOptionValue("ra");
        // set it as part of conf so that the
        // hRaven jobProcessor can access it in the mapper
        if (StringUtils.isNotBlank(reaggregateFlag)) {
            LOG.info(" reaggregateFlag is: " + reaggregateFlag);
            if (StringUtils.equalsIgnoreCase(reaggregateFlag, Boolean.TRUE.toString())) {
                reAggregateFlagValue = true;
            }
        }
    }
    LOG.info(AggregationConstants.RE_AGGREGATION_FLAG_NAME + "=" + reAggregateFlagValue);
    hbaseConf.setBoolean(AggregationConstants.RE_AGGREGATION_FLAG_NAME, reAggregateFlagValue);

    // set aggregation to off by default
    boolean aggFlagValue = false;
    if (commandLine.hasOption("a")) {
        String aggregateFlag = commandLine.getOptionValue("a");
        // set it as part of conf so that the
        // hRaven jobProcessor can access it in the mapper
        if (StringUtils.isNotBlank(aggregateFlag)) {
            LOG.info(" aggregateFlag is: " + aggregateFlag);
            if (StringUtils.equalsIgnoreCase(aggregateFlag, Boolean.TRUE.toString())) {
                aggFlagValue = true;
            }
        }
    }
    if (reprocess) {
        // turn off aggregation if reprocessing is true
        // we don't want to inadvertently aggregate again while re-processing
        // re-aggregation needs to be a conscious setting
        aggFlagValue = false;
    }
    LOG.info(AggregationConstants.AGGREGATION_FLAG_NAME + "=" + aggFlagValue);
    hbaseConf.setBoolean(AggregationConstants.AGGREGATION_FLAG_NAME, aggFlagValue);

    String processFileSubstring = null;
    if (commandLine.hasOption("p")) {
        processFileSubstring = commandLine.getOptionValue("p");
    }
    LOG.info("processFileSubstring=" + processFileSubstring);

    // hbase.client.keyvalue.maxsize somehow defaults to 10 MB and we have
    // history files exceeding that. Disable limit.
    hbaseConf.setInt("hbase.client.keyvalue.maxsize", 0);

    // Shove this into the jobConf so that we can get it out on the task side.
    hbaseConf.setStrings(Constants.CLUSTER_JOB_CONF_KEY, cluster);

    boolean success = false;
    if (reprocess) {
        success = reProcessRecords(hbaseConf, cluster, batchSize, threadCount);
    } else {
        success = processRecords(hbaseConf, cluster, batchSize, threadCount, processFileSubstring);
    }

    // Return the status
    return success ? 0 : 1;
}

From source file:com.twitter.hraven.etl.JobFileRawLoader.java

License:Apache License

public int run(String[] args) throws ParseException, IOException, ClassNotFoundException, InterruptedException {

    Configuration myHBaseConf = HBaseConfiguration.create(getConf());
    hdfs = FileSystem.get(myHBaseConf);

    // Grab input args and allow for -Dxyz style arguments
    String[] otherArgs = new GenericOptionsParser(myHBaseConf, args).getRemainingArgs();

    // Grab the arguments we're looking for.
    CommandLine commandLine = parseArgs(otherArgs);

    String input = null;/*from   w  w  w  .j a  v  a 2  s.c  o m*/
    boolean inputSpecified = commandLine.hasOption("i");
    if (inputSpecified) {
        // Grab the input path argument
        input = commandLine.getOptionValue("i");
        LOG.info("input=" + input);
    } else {
        LOG.info("Processing input from HBase ProcessRecords");
    }

    // Grab the cluster argument
    String cluster = commandLine.getOptionValue("c");
    LOG.info("cluster=" + cluster);

    String processFileSubstring = null;
    if (commandLine.hasOption("p")) {
        processFileSubstring = commandLine.getOptionValue("p");
    }
    LOG.info("processFileSubstring=" + processFileSubstring);

    boolean forceReprocess = commandLine.hasOption("f");
    LOG.info("forceReprocess: " + forceReprocess);

    // hbase.client.keyvalue.maxsize somehow defaults to 10 MB and we have
    // history files exceeding that. Disable limit.
    myHBaseConf.setInt("hbase.client.keyvalue.maxsize", 0);

    // Shove this into the jobConf so that we can get it out on the task side.
    myHBaseConf.setStrings(Constants.CLUSTER_JOB_CONF_KEY, cluster);

    boolean success = processRecordsFromHBase(myHBaseConf, cluster, processFileSubstring, forceReprocess);

    // Return the status
    return success ? 0 : 1;
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

public boolean testTask(TaskType taskType, String confParamName, long durationMin, final int MAX_RUN,
        float progress, boolean enforce, boolean dryRun, TIPStatus status, boolean wellBahaved, boolean killed)
        throws Exception {
    setTaskAttemptXML(durationMin * MIN, progress);

    TaskReport taskReport = mock(TaskReport.class);
    when(taskReport.getCurrentStatus()).thenReturn(status);
    Collection<TaskAttemptID> attempts = new ArrayList<TaskAttemptID>();
    attempts.add(taskAttemptId);// w  w  w . ja va  2  s  . c o m
    when(taskReport.getRunningTaskAttemptIds()).thenReturn(attempts);
    when(taskReport.getTaskID()).thenReturn(org.apache.hadoop.mapred.TaskID.downgrade(taskId));
    when(taskReport.getProgress()).thenReturn(progress);

    vConf.setBoolean(HadoopJobMonitorConfiguration.DRY_RUN, dryRun);
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(confParamName, MAX_RUN);
    remoteAppConf.setBoolean(HadoopJobMonitorConfiguration.enforced(confParamName), enforce);
    when(taskReport.getStartTime()).thenReturn(now - durationMin * MIN);
    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);
    appStatusChecker.init();
    appStatusChecker.loadClientService();

    boolean res = appStatusChecker.checkTask(taskType, taskReport, now);

    if (wellBahaved)
        assertEquals("Well-bahved task does not pass the check", wellBahaved, res);
    else
        assertEquals("Not Well-bahved task passes the check", wellBahaved, res);
    if (killed) {
        killCounter++;
        verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean());
    } else
        verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean());
    return res;
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

@Test
public void testUnsetEnforce() throws IOException, ConfigurationAccessException {
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10);
    //remoteAppConf.setBoolean(HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true);
    when(appReport.getStartTime()).thenReturn(now - 15 * MIN);

    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);
    appStatusChecker.init();/*from  w ww  .j ava 2s. c  o m*/

    boolean res = appStatusChecker.checkApp();
    Assert.assertTrue("fails job duration check even though enforce is not set", res);
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

@Test
public void testLongJobDryRun() throws IOException, ConfigurationAccessException, YarnException {
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10);
    remoteAppConf.setBoolean(// w w w. j  ava  2 s .co  m
            HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true);
    when(appReport.getStartTime()).thenReturn(now - 15 * MIN);

    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);
    appStatusChecker.init();

    boolean res = appStatusChecker.checkApp();
    Assert.assertFalse("does not fail job duration check even though enforce is set", res);
    verify(rm, times(0)).killApplication(appId);
}