Example usage for org.apache.hadoop.conf Configuration setBoolean

List of usage examples for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:com.trendmicro.hdfs.webdav.test.TestPropfindSimple.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    Configuration conf = minicluster.getConfiguration();
    conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".groups",
            "users");
    conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".hosts",
            "localhost");
    conf.set("hadoop.webdav.authentication.type", "simple");
    conf.setBoolean("hadoop.webdav.authentication.simple.anonymous.allowed", true);

    minicluster.startMiniCluster(gatewayUser);
    LOG.info("Gateway started on port " + minicluster.getGatewayPort());

    FsPermission.setUMask(conf, new FsPermission((short) 0));

    FileSystem fs = minicluster.getTestFileSystem();
    Path path = new Path("/test");
    assertTrue(fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)));
    fs.setOwner(path, ownerUser.getShortUserName(), ownerUser.getGroupNames()[0]);

    ownerUser.doAs(new PrivilegedExceptionAction<Void>() {
        public Void run() throws Exception {
            FileSystem fs = minicluster.getTestFileSystem();
            for (Path dir : publicDirPaths) {
                assertTrue(//from   www  .  j  av a  2  s  .c  om
                        fs.mkdirs(dir, new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.NONE)));
            }
            for (Path dir : privateDirPaths) {
                assertTrue(fs.mkdirs(dir, new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE)));
            }
            for (Path path : publicFilePaths) {
                FSDataOutputStream os = fs.create(path,
                        new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE), true, 4096, (short) 1,
                        65536, null);
                assertNotNull(os);
                os.write(testPublicData.getBytes());
                os.close();
            }
            for (Path path : privateFilePaths) {
                FSDataOutputStream os = fs.create(path,
                        new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE), true, 4096, (short) 1,
                        65536, null);
                assertNotNull(os);
                os.write(testPrivateData.getBytes());
                os.close();
            }
            return null;
        }
    });

}

From source file:com.trendmicro.hdfs.webdav.test.TestPutSimple.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    Configuration conf = minicluster.getConfiguration();
    conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".groups",
            "users");
    conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".hosts",
            "localhost");
    conf.set("hadoop.webdav.authentication.type", "simple");
    conf.setBoolean("hadoop.webdav.authentication.simple.anonymous.allowed", true);

    minicluster.startMiniCluster(gatewayUser);
    LOG.info("Gateway started on port " + minicluster.getGatewayPort());

    FsPermission.setUMask(conf, new FsPermission((short) 0));

    FileSystem fs = minicluster.getTestFileSystem();
    Path path = new Path("/test");
    assertTrue(fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)));
    fs.setOwner(path, ownerUser.getShortUserName(), ownerUser.getGroupNames()[0]);

    ownerUser.doAs(new PrivilegedExceptionAction<Void>() {
        public Void run() throws Exception {
            FileSystem fs = minicluster.getTestFileSystem();
            assertTrue(fs.mkdirs(new Path("/test/rw"),
                    new FsPermission(FsAction.ALL, FsAction.WRITE_EXECUTE, FsAction.NONE)));
            assertTrue(fs.mkdirs(new Path("/test/ro"),
                    new FsPermission(FsAction.READ_EXECUTE, FsAction.NONE, FsAction.NONE)));
            assertTrue(fs.mkdirs(new Path("/test/public"),
                    new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)));
            return null;
        }//  ww  w.  ja  v  a 2 s  . c o  m
    });
}

From source file:com.tuplejump.calliope.hadoop.cql3.CqlConfigHelper.java

License:Apache License

public static void setMultirangeInputSplit(Configuration conf, boolean useMultirange) {
    conf.setBoolean(MULTIRANGE_INPUT_SPLIT, useMultirange);
}

From source file:com.twitter.algebra.matrix.multiply.AtB_DMJ.java

License:Apache License

/**
 * Perform A x B, where At and B refer to the paths that contain matrices in
 * {@link SequenceFileInputFormat}. One of At and B must also conform with
 * {@link MapDir} format. Refer to {@link AtB_DMJ} for further details.
 * // w w  w . j  a v a2 s .c  om
 * @param conf the initial configuration
 * @param mapDirPath path to the matrix in {@link MapDir} format
 * @param matrixInputPaths the list of paths to matrix input partitions over
 *          which we iterate
 * @param matrixOutputPath path to which AxB will be written
 * @param atCols number of columns of At (rows of A)
 * @param bCols
 * @param colsPerPartition cols per partition of the input matrix (whether At or B)
 * @param aIsMapDir is A chosen to be loaded as MapDir
 * @param useInMemCombiner
 * @param numberOfJobs the hint for the desired number of parallel jobs
 * @return the running job
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols,
        int bCols, int colsPerPartition, boolean aIsMapDir, boolean useInMemCombiner)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set(MATRIXINMEMORY, mapDirPath.toString());
    conf.setBoolean(AISMAPDIR, aIsMapDir);
    conf.setBoolean(USEINMEMCOMBINER, useInMemCombiner);
    conf.setInt(RESULTROWS, atCols);
    conf.setInt(RESULTCOLS, bCols);
    conf.setInt(PARTITIONCOLS, colsPerPartition);
    FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "dmj");

    if (useInMemCombiner) {
        Configuration newConf = new Configuration(conf);
        newConf.set("mapreduce.task.io.sort.mb", "1");
        conf = newConf;
    }

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(AtB_DMJ.class);
    job.setJobName(AtB_DMJ.class.getSimpleName());
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    matrixInputPaths = fs.makeQualified(matrixInputPaths);
    MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    if (!useInMemCombiner)
        job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "dmj");
    job.setNumReduceTasks(numReducers);
    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols);

    job.setReducerClass(EpsilonReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    return job;
}

From source file:com.twitter.algebra.nmf.CompositeDMJ.java

License:Apache License

public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols,
        boolean aIsMapDir, String inMemCStr, int inMemCRows, int inMemCCols, float alpha1, float alpha2)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);
    conf.set(MATRIXINMEMORY, inMemCStr);
    conf.setInt(MATRIXINMEMORYROWS, inMemCRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemCCols);

    conf.setFloat(ALPHA1, alpha1);//from w  ww  . java 2 s .c o  m
    conf.setFloat(ALPHA2, alpha2);

    FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "compositedmj");

    conf.set(MAPDIRMATRIX, mapDirPath.toString());
    conf.setBoolean(AISMAPDIR, aIsMapDir);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(CompositeDMJ.class);
    job.setJobName(CompositeDMJ.class.getSimpleName() + "-" + matrixOutputPath.getName());
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    matrixInputPaths = fs.makeQualified(matrixInputPaths);
    MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    return job;
}

From source file:com.twitter.ambrose.hive.AmbroseHivePreHook.java

License:Apache License

/**
 * Waiting <tt>ambrose.wf.between.sleep.seconds</tt> before processing the
 * next statement (workflow) in the submitted script
 * /*  w  w w.  ja  va2 s .  com*/
 * @param hookContext
 * @param reporter
 * @param queryId
 */
private void waitBetween(HookContext hookContext, EmbeddedAmbroseHiveProgressReporter reporter,
        String queryId) {

    Configuration conf = hookContext.getConf();
    boolean justStarted = conf.getBoolean(SCRIPT_STARTED_PARAM, true);
    if (justStarted) {
        conf.setBoolean(SCRIPT_STARTED_PARAM, false);
    } else {
        // sleeping between workflows
        int sleepTimeMs = conf.getInt(WF_BETWEEN_SLEEP_SECS_PARAM, 10);
        try {

            LOG.info("One workflow complete, sleeping for " + sleepTimeMs
                    + " sec(s) before moving to the next one if exists. Hit ctrl-c to exit.");
            Thread.sleep(sleepTimeMs * 1000L);

            //send progressbar reset event
            Map<WorkflowProgressField, String> eventData = Maps.newHashMapWithExpectedSize(1);
            eventData.put(WorkflowProgressField.workflowProgress, "0");
            reporter.pushEvent(queryId, new Event.WorkflowProgressEvent(eventData));

            reporter.saveEventStack();
            reporter.reset();
        } catch (InterruptedException e) {
            LOG.warn("Sleep interrupted", e);
        }
    }
}

From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java

License:Open Source License

public int work(Calendar start, Calendar end, int batchId) {

    LOG.info("Starting up indexer...");
    LOG.info(" - input: " + Joiner.on(" ").join(getInput()));
    LOG.info(" - index: " + getIndex());
    LOG.info(" - number of reducers: " + getNumPartitions());

    Configuration conf = getConf();
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    totalInputFiles = 0; // total number files from input directories

    try {/*from w  w  w.j  av a2  s .c  o m*/
        ExecutorService pool = Executors.newFixedThreadPool(getJobPoolSize());

        for (String s : getInput()) {
            Path spath = new Path(s);
            FileSystem fs = spath.getFileSystem(conf);

            List<FileStatus> stats = Lists.newArrayList();

            // get all files from the input paths/directories
            HdfsUtils.addInputPathRecursively(stats, fs, spath, HdfsUtils.hiddenDirectoryFilter,
                    getFileFilter());

            totalInputFiles += stats.size();
            LOG.info(" total files under " + s + ":" + stats.size());

            if (isDryRun()) {
                continue;
            }

            int filesToIndex = 0;
            for (FileStatus stat : stats) {
                // check if we still want to index the file
                if (!fileIsOk(stat, fs)) {
                    continue;
                }
                if (!doOverwrite()) {
                    if (hasPreviousIndex(stat, fs))
                        continue;
                }
                filesToIndex++;
                totalFiles2Index++;
                Job job = setupJob(conf);
                job = setMapper(job);
                FileInputFormat.setInputPaths(job, stat.getPath());
                job.setJobName(getJobName() + ":" + stat.getPath());
                Thread.sleep(getSleepTime());
                pool.execute(new IndexingWorker(job, stat, fs));
            }

            LOG.info("total files submitted for indexing under" + s + ":" + filesToIndex);
        }

        if (isDryRun()) {
            return 0;
        }

        while (finishedJobs.get() < totalFiles2Index) {
            Thread.sleep(getSleepTime());
        }
        LOG.info(" total number of files from input directories: " + totalInputFiles);
        LOG.info(" total number of files submitted for indexing job: " + totalFiles2Index);
        LOG.info(" number of files successfully indexed is: " + indexedFiles);
        if (failedFiles.size() > 0)
            LOG.info(" these files were not indexed:" + Arrays.toString(failedFiles.toArray()));
        else
            LOG.info(" all files have been successfully indexed");
        pool.shutdown();
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    }

    if (totalFiles2Index == 0)
        return 0;
    else if (totalFiles2Index != indexedFiles.get())
        return -1;
    else
        return 1;
}

From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    LOG = Logger.getLogger(this.getClass());
    params = newIndexConfig();/*from   w  w  w  . j av a  2  s. co  m*/

    LOG.info("Starting up indexer...");
    LOG.info(" - input: " + Joiner.on(" ").join(IndexConfig.input.get()));
    LOG.info(" - index: " + IndexConfig.index);
    LOG.info(" - number of shards: " + IndexConfig.numPartitions.get());

    Configuration conf = getConf();

    conf.set(AbstractLuceneIndexingReducer.HDFS_INDEX_LOCATION, IndexConfig.index.get());
    conf.set(AbstractLuceneIndexingReducer.ANALYZER, IndexConfig.analyzer.get());
    conf.set(AbstractLuceneIndexingReducer.SIMILARITY, IndexConfig.similarity.get());
    conf.setInt(AbstractSamplingIndexingMapper.SAMPLE_PERCENTAGE, IndexConfig.samplePercentage.get());

    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    Job job = new Job(conf, getJobName(params));

    // Job's constructor copies conf, we need a reference to the one job
    // is actually using
    conf = job.getConfiguration();

    job.setJarByClass(this.getClass());

    job.setNumReduceTasks(IndexConfig.numPartitions.get());

    for (String s : IndexConfig.input.get()) {
        Path spath = new Path(s);
        FileSystem fs = spath.getFileSystem(getConf());
        List<FileStatus> stats = Lists.newArrayList();
        addInputPathRecursively(stats, fs, spath, HdfsUtils.HIDDEN_FILE_FILTER);
        for (FileStatus foundStat : stats) {
            FileInputFormat.addInputPath(job, foundStat.getPath());
        }
    }

    FileOutputFormat.setOutputPath(job, new Path(IndexConfig.index.get()));

    setupJob(job);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(IndexConfig.index.get());
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    LOG.info("Job " + getJobName(params) + " started.");
    // TODO Jimmy has a parameter that controls whether we wait in Thud but not in ES.
    // when would we not want to wait?
    job.waitForCompletion(true);
    LOG.info("Job " + getJobName(params) + " Finished in " + (System.currentTimeMillis() - startTime) / 1000.0
            + " seconds");

    if (job.isSuccessful()) {
        writeIndexDescriptors(getIndexDescriptor());
    }
    return job.isSuccessful() ? 0 : 1;
}

From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java

License:Apache License

/**Set values for the underlying inputformat class.
 * We allow indexing job to have filters for debugging/testing purposes.
 * Thus relying on whether there is a filter to determine
 * if it is a indexing or searching job is not sufficient anymore.
 * @param job//ww  w . ja v a 2s. co m
 * @param inputformatClass
 * @param valueClass
 * @param indexDir
 * @param filterConditions
 * @param indexColumn
 * @param indexingFlag: true for indexing jobs; false for searching jobs.
 */

private static void setOptions(Job job, String inputformatClass, String valueClass, String indexDir,
        String filterConditions, String indexColumn, boolean indexingFlag) {
    Configuration conf = job.getConfiguration();
    conf.set(REALINPUTFORMAT, inputformatClass);
    conf.set(VALUECLASS, valueClass);
    conf.set(INDEXDIR, indexDir);
    if (filterConditions != null)
        conf.set(FILTERCONDITIONS, filterConditions);
    if (indexColumn != null)
        conf.set(COLUMNNAME, indexColumn);
    conf.setBoolean(INDEXINGJOBFLAG, indexingFlag);
}

From source file:com.twitter.hraven.etl.JobFileProcessor.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration hbaseConf = HBaseConfiguration.create(getConf());

    // Grab input args and allow for -Dxyz style arguments
    String[] otherArgs = new GenericOptionsParser(hbaseConf, args).getRemainingArgs();

    // Grab the arguments we're looking for.
    CommandLine commandLine = parseArgs(otherArgs);

    // Grab the cluster argument
    String cluster = commandLine.getOptionValue("c");
    LOG.info("cluster=" + cluster);

    // Number of parallel threads to use
    int threadCount = 1;
    if (commandLine.hasOption("t")) {
        try {//w ww .j  a va 2  s .c om
            threadCount = Integer.parseInt(commandLine.getOptionValue("t"));
        } catch (NumberFormatException nfe) {
            throw new IllegalArgumentException(
                    "Provided thread-count argument (-t) is not a number: " + commandLine.getOptionValue("t"),
                    nfe);
        }
        if (threadCount < 1) {
            throw new IllegalArgumentException(
                    "Cannot run fewer than 1 thread. Provided thread-count argument (-t): " + threadCount);
        }
    }
    LOG.info("threadCount=" + threadCount);

    boolean reprocess = commandLine.hasOption("r");
    LOG.info("reprocess=" + reprocess);

    // Grab the batch-size argument
    int batchSize;
    if (commandLine.hasOption("b")) {
        try {
            batchSize = Integer.parseInt(commandLine.getOptionValue("b"));
        } catch (NumberFormatException nfe) {
            throw new IllegalArgumentException(
                    "batch size option -b is is not a valid number: " + commandLine.getOptionValue("b"), nfe);
        }
        // Additional check
        if (batchSize < 1) {
            throw new IllegalArgumentException(
                    "Cannot process files in batches smaller than 1. Specified batch size option -b is: "
                            + commandLine.getOptionValue("b"));
        }
    } else {
        batchSize = DEFAULT_BATCH_SIZE;
    }

    // Grab the costfile argument

    String costFilePath = commandLine.getOptionValue("zf");
    LOG.info("cost properties file on hdfs=" + costFilePath);
    if (costFilePath == null)
        costFilePath = Constants.COST_PROPERTIES_HDFS_DIR;
    Path hdfsPath = new Path(costFilePath + Constants.COST_PROPERTIES_FILENAME);
    // add to distributed cache
    DistributedCache.addCacheFile(hdfsPath.toUri(), hbaseConf);

    // Grab the machine type argument
    String machineType = commandLine.getOptionValue("m");
    // set it as part of conf so that the
    // hRaven job can access it in the mapper
    hbaseConf.set(Constants.HRAVEN_MACHINE_TYPE, machineType);

    // check if re-aggregate option is forced on
    // if yes, we need to aggregate for this job inspite of
    // job having aggregation done status in raw table
    boolean reAggregateFlagValue = false;
    if (commandLine.hasOption("ra")) {
        String reaggregateFlag = commandLine.getOptionValue("ra");
        // set it as part of conf so that the
        // hRaven jobProcessor can access it in the mapper
        if (StringUtils.isNotBlank(reaggregateFlag)) {
            LOG.info(" reaggregateFlag is: " + reaggregateFlag);
            if (StringUtils.equalsIgnoreCase(reaggregateFlag, Boolean.TRUE.toString())) {
                reAggregateFlagValue = true;
            }
        }
    }
    LOG.info(AggregationConstants.RE_AGGREGATION_FLAG_NAME + "=" + reAggregateFlagValue);
    hbaseConf.setBoolean(AggregationConstants.RE_AGGREGATION_FLAG_NAME, reAggregateFlagValue);

    // set aggregation to off by default
    boolean aggFlagValue = false;
    if (commandLine.hasOption("a")) {
        String aggregateFlag = commandLine.getOptionValue("a");
        // set it as part of conf so that the
        // hRaven jobProcessor can access it in the mapper
        if (StringUtils.isNotBlank(aggregateFlag)) {
            LOG.info(" aggregateFlag is: " + aggregateFlag);
            if (StringUtils.equalsIgnoreCase(aggregateFlag, Boolean.TRUE.toString())) {
                aggFlagValue = true;
            }
        }
    }
    if (reprocess) {
        // turn off aggregation if reprocessing is true
        // we don't want to inadvertently aggregate again while re-processing
        // re-aggregation needs to be a conscious setting
        aggFlagValue = false;
    }
    LOG.info(AggregationConstants.AGGREGATION_FLAG_NAME + "=" + aggFlagValue);
    hbaseConf.setBoolean(AggregationConstants.AGGREGATION_FLAG_NAME, aggFlagValue);

    String processFileSubstring = null;
    if (commandLine.hasOption("p")) {
        processFileSubstring = commandLine.getOptionValue("p");
    }
    LOG.info("processFileSubstring=" + processFileSubstring);

    // hbase.client.keyvalue.maxsize somehow defaults to 10 MB and we have
    // history files exceeding that. Disable limit.
    hbaseConf.setInt("hbase.client.keyvalue.maxsize", 0);

    // Shove this into the jobConf so that we can get it out on the task side.
    hbaseConf.setStrings(Constants.CLUSTER_JOB_CONF_KEY, cluster);

    boolean success = false;
    if (reprocess) {
        success = reProcessRecords(hbaseConf, cluster, batchSize, threadCount);
    } else {
        success = processRecords(hbaseConf, cluster, batchSize, threadCount, processFileSubstring);
    }

    // Return the status
    return success ? 0 : 1;
}