List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:com.trendmicro.hdfs.webdav.test.TestPropfindSimple.java
License:Apache License
@BeforeClass public static void setup() throws Exception { Configuration conf = minicluster.getConfiguration(); conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".groups", "users"); conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".hosts", "localhost"); conf.set("hadoop.webdav.authentication.type", "simple"); conf.setBoolean("hadoop.webdav.authentication.simple.anonymous.allowed", true); minicluster.startMiniCluster(gatewayUser); LOG.info("Gateway started on port " + minicluster.getGatewayPort()); FsPermission.setUMask(conf, new FsPermission((short) 0)); FileSystem fs = minicluster.getTestFileSystem(); Path path = new Path("/test"); assertTrue(fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL))); fs.setOwner(path, ownerUser.getShortUserName(), ownerUser.getGroupNames()[0]); ownerUser.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { FileSystem fs = minicluster.getTestFileSystem(); for (Path dir : publicDirPaths) { assertTrue(//from www . j av a 2 s .c om fs.mkdirs(dir, new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.NONE))); } for (Path dir : privateDirPaths) { assertTrue(fs.mkdirs(dir, new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE))); } for (Path path : publicFilePaths) { FSDataOutputStream os = fs.create(path, new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE), true, 4096, (short) 1, 65536, null); assertNotNull(os); os.write(testPublicData.getBytes()); os.close(); } for (Path path : privateFilePaths) { FSDataOutputStream os = fs.create(path, new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE), true, 4096, (short) 1, 65536, null); assertNotNull(os); os.write(testPrivateData.getBytes()); os.close(); } return null; } }); }
From source file:com.trendmicro.hdfs.webdav.test.TestPutSimple.java
License:Apache License
@BeforeClass public static void setup() throws Exception { Configuration conf = minicluster.getConfiguration(); conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".groups", "users"); conf.set("hadoop.proxyuser." + UserGroupInformation.getCurrentUser().getShortUserName() + ".hosts", "localhost"); conf.set("hadoop.webdav.authentication.type", "simple"); conf.setBoolean("hadoop.webdav.authentication.simple.anonymous.allowed", true); minicluster.startMiniCluster(gatewayUser); LOG.info("Gateway started on port " + minicluster.getGatewayPort()); FsPermission.setUMask(conf, new FsPermission((short) 0)); FileSystem fs = minicluster.getTestFileSystem(); Path path = new Path("/test"); assertTrue(fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL))); fs.setOwner(path, ownerUser.getShortUserName(), ownerUser.getGroupNames()[0]); ownerUser.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { FileSystem fs = minicluster.getTestFileSystem(); assertTrue(fs.mkdirs(new Path("/test/rw"), new FsPermission(FsAction.ALL, FsAction.WRITE_EXECUTE, FsAction.NONE))); assertTrue(fs.mkdirs(new Path("/test/ro"), new FsPermission(FsAction.READ_EXECUTE, FsAction.NONE, FsAction.NONE))); assertTrue(fs.mkdirs(new Path("/test/public"), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL))); return null; }// ww w. ja v a 2 s . c o m }); }
From source file:com.tuplejump.calliope.hadoop.cql3.CqlConfigHelper.java
License:Apache License
public static void setMultirangeInputSplit(Configuration conf, boolean useMultirange) { conf.setBoolean(MULTIRANGE_INPUT_SPLIT, useMultirange); }
From source file:com.twitter.algebra.matrix.multiply.AtB_DMJ.java
License:Apache License
/** * Perform A x B, where At and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat}. One of At and B must also conform with * {@link MapDir} format. Refer to {@link AtB_DMJ} for further details. * // w w w . j a v a2 s .c om * @param conf the initial configuration * @param mapDirPath path to the matrix in {@link MapDir} format * @param matrixInputPaths the list of paths to matrix input partitions over * which we iterate * @param matrixOutputPath path to which AxB will be written * @param atCols number of columns of At (rows of A) * @param bCols * @param colsPerPartition cols per partition of the input matrix (whether At or B) * @param aIsMapDir is A chosen to be loaded as MapDir * @param useInMemCombiner * @param numberOfJobs the hint for the desired number of parallel jobs * @return the running job * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols, int bCols, int colsPerPartition, boolean aIsMapDir, boolean useInMemCombiner) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set(MATRIXINMEMORY, mapDirPath.toString()); conf.setBoolean(AISMAPDIR, aIsMapDir); conf.setBoolean(USEINMEMCOMBINER, useInMemCombiner); conf.setInt(RESULTROWS, atCols); conf.setInt(RESULTCOLS, bCols); conf.setInt(PARTITIONCOLS, colsPerPartition); FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "dmj"); if (useInMemCombiner) { Configuration newConf = new Configuration(conf); newConf.set("mapreduce.task.io.sort.mb", "1"); conf = newConf; } @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(AtB_DMJ.class); job.setJobName(AtB_DMJ.class.getSimpleName()); matrixOutputPath = fs.makeQualified(matrixOutputPath); matrixInputPaths = fs.makeQualified(matrixInputPaths); MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); if (!useInMemCombiner) job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "dmj"); job.setNumReduceTasks(numReducers); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols); job.setReducerClass(EpsilonReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); return job; }
From source file:com.twitter.algebra.nmf.CompositeDMJ.java
License:Apache License
public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols, boolean aIsMapDir, String inMemCStr, int inMemCRows, int inMemCCols, float alpha1, float alpha2) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set(MATRIXINMEMORY, inMemCStr); conf.setInt(MATRIXINMEMORYROWS, inMemCRows); conf.setInt(MATRIXINMEMORYCOLS, inMemCCols); conf.setFloat(ALPHA1, alpha1);//from w ww . java 2 s .c o m conf.setFloat(ALPHA2, alpha2); FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "compositedmj"); conf.set(MAPDIRMATRIX, mapDirPath.toString()); conf.setBoolean(AISMAPDIR, aIsMapDir); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(CompositeDMJ.class); job.setJobName(CompositeDMJ.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixOutputPath = fs.makeQualified(matrixOutputPath); matrixInputPaths = fs.makeQualified(matrixInputPaths); MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols); job.setNumReduceTasks(0); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); return job; }
From source file:com.twitter.ambrose.hive.AmbroseHivePreHook.java
License:Apache License
/** * Waiting <tt>ambrose.wf.between.sleep.seconds</tt> before processing the * next statement (workflow) in the submitted script * /* w w w. ja va2 s . com*/ * @param hookContext * @param reporter * @param queryId */ private void waitBetween(HookContext hookContext, EmbeddedAmbroseHiveProgressReporter reporter, String queryId) { Configuration conf = hookContext.getConf(); boolean justStarted = conf.getBoolean(SCRIPT_STARTED_PARAM, true); if (justStarted) { conf.setBoolean(SCRIPT_STARTED_PARAM, false); } else { // sleeping between workflows int sleepTimeMs = conf.getInt(WF_BETWEEN_SLEEP_SECS_PARAM, 10); try { LOG.info("One workflow complete, sleeping for " + sleepTimeMs + " sec(s) before moving to the next one if exists. Hit ctrl-c to exit."); Thread.sleep(sleepTimeMs * 1000L); //send progressbar reset event Map<WorkflowProgressField, String> eventData = Maps.newHashMapWithExpectedSize(1); eventData.put(WorkflowProgressField.workflowProgress, "0"); reporter.pushEvent(queryId, new Event.WorkflowProgressEvent(eventData)); reporter.saveEventStack(); reporter.reset(); } catch (InterruptedException e) { LOG.warn("Sleep interrupted", e); } } }
From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java
License:Open Source License
public int work(Calendar start, Calendar end, int batchId) { LOG.info("Starting up indexer..."); LOG.info(" - input: " + Joiner.on(" ").join(getInput())); LOG.info(" - index: " + getIndex()); LOG.info(" - number of reducers: " + getNumPartitions()); Configuration conf = getConf(); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); totalInputFiles = 0; // total number files from input directories try {/*from w w w.j av a2 s .c o m*/ ExecutorService pool = Executors.newFixedThreadPool(getJobPoolSize()); for (String s : getInput()) { Path spath = new Path(s); FileSystem fs = spath.getFileSystem(conf); List<FileStatus> stats = Lists.newArrayList(); // get all files from the input paths/directories HdfsUtils.addInputPathRecursively(stats, fs, spath, HdfsUtils.hiddenDirectoryFilter, getFileFilter()); totalInputFiles += stats.size(); LOG.info(" total files under " + s + ":" + stats.size()); if (isDryRun()) { continue; } int filesToIndex = 0; for (FileStatus stat : stats) { // check if we still want to index the file if (!fileIsOk(stat, fs)) { continue; } if (!doOverwrite()) { if (hasPreviousIndex(stat, fs)) continue; } filesToIndex++; totalFiles2Index++; Job job = setupJob(conf); job = setMapper(job); FileInputFormat.setInputPaths(job, stat.getPath()); job.setJobName(getJobName() + ":" + stat.getPath()); Thread.sleep(getSleepTime()); pool.execute(new IndexingWorker(job, stat, fs)); } LOG.info("total files submitted for indexing under" + s + ":" + filesToIndex); } if (isDryRun()) { return 0; } while (finishedJobs.get() < totalFiles2Index) { Thread.sleep(getSleepTime()); } LOG.info(" total number of files from input directories: " + totalInputFiles); LOG.info(" total number of files submitted for indexing job: " + totalFiles2Index); LOG.info(" number of files successfully indexed is: " + indexedFiles); if (failedFiles.size() > 0) LOG.info(" these files were not indexed:" + Arrays.toString(failedFiles.toArray())); else LOG.info(" all files have been successfully indexed"); pool.shutdown(); } catch (Exception e) { LOG.error(e); return -1; } if (totalFiles2Index == 0) return 0; else if (totalFiles2Index != indexedFiles.get()) return -1; else return 1; }
From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { LOG = Logger.getLogger(this.getClass()); params = newIndexConfig();/*from w w w . j av a 2 s. co m*/ LOG.info("Starting up indexer..."); LOG.info(" - input: " + Joiner.on(" ").join(IndexConfig.input.get())); LOG.info(" - index: " + IndexConfig.index); LOG.info(" - number of shards: " + IndexConfig.numPartitions.get()); Configuration conf = getConf(); conf.set(AbstractLuceneIndexingReducer.HDFS_INDEX_LOCATION, IndexConfig.index.get()); conf.set(AbstractLuceneIndexingReducer.ANALYZER, IndexConfig.analyzer.get()); conf.set(AbstractLuceneIndexingReducer.SIMILARITY, IndexConfig.similarity.get()); conf.setInt(AbstractSamplingIndexingMapper.SAMPLE_PERCENTAGE, IndexConfig.samplePercentage.get()); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); Job job = new Job(conf, getJobName(params)); // Job's constructor copies conf, we need a reference to the one job // is actually using conf = job.getConfiguration(); job.setJarByClass(this.getClass()); job.setNumReduceTasks(IndexConfig.numPartitions.get()); for (String s : IndexConfig.input.get()) { Path spath = new Path(s); FileSystem fs = spath.getFileSystem(getConf()); List<FileStatus> stats = Lists.newArrayList(); addInputPathRecursively(stats, fs, spath, HdfsUtils.HIDDEN_FILE_FILTER); for (FileStatus foundStat : stats) { FileInputFormat.addInputPath(job, foundStat.getPath()); } } FileOutputFormat.setOutputPath(job, new Path(IndexConfig.index.get())); setupJob(job); // Delete the output directory if it exists already. Path outputDir = new Path(IndexConfig.index.get()); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); LOG.info("Job " + getJobName(params) + " started."); // TODO Jimmy has a parameter that controls whether we wait in Thud but not in ES. // when would we not want to wait? job.waitForCompletion(true); LOG.info("Job " + getJobName(params) + " Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (job.isSuccessful()) { writeIndexDescriptors(getIndexDescriptor()); } return job.isSuccessful() ? 0 : 1; }
From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java
License:Apache License
/**Set values for the underlying inputformat class. * We allow indexing job to have filters for debugging/testing purposes. * Thus relying on whether there is a filter to determine * if it is a indexing or searching job is not sufficient anymore. * @param job//ww w . ja v a 2s. co m * @param inputformatClass * @param valueClass * @param indexDir * @param filterConditions * @param indexColumn * @param indexingFlag: true for indexing jobs; false for searching jobs. */ private static void setOptions(Job job, String inputformatClass, String valueClass, String indexDir, String filterConditions, String indexColumn, boolean indexingFlag) { Configuration conf = job.getConfiguration(); conf.set(REALINPUTFORMAT, inputformatClass); conf.set(VALUECLASS, valueClass); conf.set(INDEXDIR, indexDir); if (filterConditions != null) conf.set(FILTERCONDITIONS, filterConditions); if (indexColumn != null) conf.set(COLUMNNAME, indexColumn); conf.setBoolean(INDEXINGJOBFLAG, indexingFlag); }
From source file:com.twitter.hraven.etl.JobFileProcessor.java
License:Apache License
public int run(String[] args) throws Exception { Configuration hbaseConf = HBaseConfiguration.create(getConf()); // Grab input args and allow for -Dxyz style arguments String[] otherArgs = new GenericOptionsParser(hbaseConf, args).getRemainingArgs(); // Grab the arguments we're looking for. CommandLine commandLine = parseArgs(otherArgs); // Grab the cluster argument String cluster = commandLine.getOptionValue("c"); LOG.info("cluster=" + cluster); // Number of parallel threads to use int threadCount = 1; if (commandLine.hasOption("t")) { try {//w ww .j a va 2 s .c om threadCount = Integer.parseInt(commandLine.getOptionValue("t")); } catch (NumberFormatException nfe) { throw new IllegalArgumentException( "Provided thread-count argument (-t) is not a number: " + commandLine.getOptionValue("t"), nfe); } if (threadCount < 1) { throw new IllegalArgumentException( "Cannot run fewer than 1 thread. Provided thread-count argument (-t): " + threadCount); } } LOG.info("threadCount=" + threadCount); boolean reprocess = commandLine.hasOption("r"); LOG.info("reprocess=" + reprocess); // Grab the batch-size argument int batchSize; if (commandLine.hasOption("b")) { try { batchSize = Integer.parseInt(commandLine.getOptionValue("b")); } catch (NumberFormatException nfe) { throw new IllegalArgumentException( "batch size option -b is is not a valid number: " + commandLine.getOptionValue("b"), nfe); } // Additional check if (batchSize < 1) { throw new IllegalArgumentException( "Cannot process files in batches smaller than 1. Specified batch size option -b is: " + commandLine.getOptionValue("b")); } } else { batchSize = DEFAULT_BATCH_SIZE; } // Grab the costfile argument String costFilePath = commandLine.getOptionValue("zf"); LOG.info("cost properties file on hdfs=" + costFilePath); if (costFilePath == null) costFilePath = Constants.COST_PROPERTIES_HDFS_DIR; Path hdfsPath = new Path(costFilePath + Constants.COST_PROPERTIES_FILENAME); // add to distributed cache DistributedCache.addCacheFile(hdfsPath.toUri(), hbaseConf); // Grab the machine type argument String machineType = commandLine.getOptionValue("m"); // set it as part of conf so that the // hRaven job can access it in the mapper hbaseConf.set(Constants.HRAVEN_MACHINE_TYPE, machineType); // check if re-aggregate option is forced on // if yes, we need to aggregate for this job inspite of // job having aggregation done status in raw table boolean reAggregateFlagValue = false; if (commandLine.hasOption("ra")) { String reaggregateFlag = commandLine.getOptionValue("ra"); // set it as part of conf so that the // hRaven jobProcessor can access it in the mapper if (StringUtils.isNotBlank(reaggregateFlag)) { LOG.info(" reaggregateFlag is: " + reaggregateFlag); if (StringUtils.equalsIgnoreCase(reaggregateFlag, Boolean.TRUE.toString())) { reAggregateFlagValue = true; } } } LOG.info(AggregationConstants.RE_AGGREGATION_FLAG_NAME + "=" + reAggregateFlagValue); hbaseConf.setBoolean(AggregationConstants.RE_AGGREGATION_FLAG_NAME, reAggregateFlagValue); // set aggregation to off by default boolean aggFlagValue = false; if (commandLine.hasOption("a")) { String aggregateFlag = commandLine.getOptionValue("a"); // set it as part of conf so that the // hRaven jobProcessor can access it in the mapper if (StringUtils.isNotBlank(aggregateFlag)) { LOG.info(" aggregateFlag is: " + aggregateFlag); if (StringUtils.equalsIgnoreCase(aggregateFlag, Boolean.TRUE.toString())) { aggFlagValue = true; } } } if (reprocess) { // turn off aggregation if reprocessing is true // we don't want to inadvertently aggregate again while re-processing // re-aggregation needs to be a conscious setting aggFlagValue = false; } LOG.info(AggregationConstants.AGGREGATION_FLAG_NAME + "=" + aggFlagValue); hbaseConf.setBoolean(AggregationConstants.AGGREGATION_FLAG_NAME, aggFlagValue); String processFileSubstring = null; if (commandLine.hasOption("p")) { processFileSubstring = commandLine.getOptionValue("p"); } LOG.info("processFileSubstring=" + processFileSubstring); // hbase.client.keyvalue.maxsize somehow defaults to 10 MB and we have // history files exceeding that. Disable limit. hbaseConf.setInt("hbase.client.keyvalue.maxsize", 0); // Shove this into the jobConf so that we can get it out on the task side. hbaseConf.setStrings(Constants.CLUSTER_JOB_CONF_KEY, cluster); boolean success = false; if (reprocess) { success = reProcessRecords(hbaseConf, cluster, batchSize, threadCount); } else { success = processRecords(hbaseConf, cluster, batchSize, threadCount, processFileSubstring); } // Return the status return success ? 0 : 1; }