List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:gaffer.analytic.impl.GraphStatistics.java
License:Apache License
public int run(String[] args) throws Exception { // Usage/*www . j ava 2s . c o m*/ if (args.length != 6 && args.length != 7) { System.err.println(USAGE); return 1; } // Parse options Path outputPath = new Path(args[0]); String accumuloPropertiesFile = args[1]; int numReduceTasks; try { numReduceTasks = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println(USAGE); return 1; } Date startDate = null; Date endDate = null; boolean useTimeWindow = false; if (!args[3].equals("null") && !args[4].equals("null")) { try { startDate = DATE_FORMAT.parse(args[3]); endDate = DATE_FORMAT.parse(args[4]); } catch (ParseException e) { System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage()); return 1; } useTimeWindow = true; } boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]); boolean seedsSpecified = (args.length == 7); String seedsFile = ""; if (seedsSpecified) { seedsFile = args[6]; } // Hadoop configuration Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); // Connect to Accumulo, so we can check connection and check that the // table exists AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName()); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist."); return 1; } // Create graph and update configuration based on the view AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName); if (useTimeWindow) { graph.setTimeWindow(startDate, endDate); } graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility); if (seedsSpecified) { Set<TypeValue> typeValues = new HashSet<TypeValue>(); BufferedReader reader = new BufferedReader(new FileReader(seedsFile)); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\\|"); if (tokens.length != 2) { System.err.println("Invalid line: " + line); continue; } String type = tokens[0]; String value = tokens[1]; typeValues.add(new TypeValue(type, value)); } reader.close(); graph.setConfiguration(conf, typeValues, accConf); } else { graph.setConfiguration(conf, accConf); } // Conf conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); // Job Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = " + outputPath); // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat. if (seedsSpecified) { job.setInputFormatClass(BatchScannerElementInputFormat.class); } else { job.setInputFormatClass(ElementInputFormat.class); } // Mapper job.setMapperClass(GraphStatisticsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SetOfStatistics.class); // Combiner job.setCombinerClass(GraphStatisticsReducer.class); // Reducer job.setReducerClass(GraphStatisticsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SetOfStatistics.class); job.setNumReduceTasks(numReduceTasks); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); System.out.println("Running MapReduce job over:"); System.out.println("\tTable: " + accConf.getTable()); System.out.println("\tUser: " + accConf.getUserName()); System.out.println("\tAuths: " + authorizations); if (useTimeWindow) { System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", " + DATE_FORMAT.format(endDate)); } else { System.out.println("\tFilter by time is off"); } System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } // Write results out System.out.println("Summary of graph"); for (FileStatus file : fs.listStatus(outputPath)) { if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); Text text = new Text(); SetOfStatistics stats = new SetOfStatistics(); while (reader.next(text, stats)) { System.out.println(text + ", " + stats); } reader.close(); } } return 0; }
From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java
License:Apache License
@Override public void run() { Configuration conf = HadoopUtils.getConfFromState(this.dataset.jobProps()); // Turn on mapreduce output compression by default if (conf.get("mapreduce.output.fileoutputformat.compress") == null && conf.get("mapred.output.compress") == null) { conf.setBoolean("mapreduce.output.fileoutputformat.compress", true); }/*from ww w. j a v a 2 s . c o m*/ // Disable delegation token cancellation by default if (conf.get("mapreduce.job.complete.cancel.delegation.tokens") == null) { conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); } try { DateTime compactionTimestamp = getCompactionTimestamp(); LOG.info("MR Compaction Job Timestamp " + compactionTimestamp.getMillis()); if (this.dataset.jobProps().getPropAsBoolean(MRCompactor.COMPACTION_JOB_LATE_DATA_MOVEMENT_TASK, false)) { List<Path> newLateFilePaths = Lists.newArrayList(); for (String filePathString : this.dataset.jobProps() .getPropAsList(MRCompactor.COMPACTION_JOB_LATE_DATA_FILES)) { if (FilenameUtils.isExtension(filePathString, getApplicableFileExtensions())) { newLateFilePaths.add(new Path(filePathString)); } } Path lateDataOutputPath = this.outputDeduplicated ? this.dataset.outputLatePath() : this.dataset.outputPath(); LOG.info(String.format("Copying %d late data files to %s", newLateFilePaths.size(), lateDataOutputPath)); if (this.outputDeduplicated) { if (!this.fs.exists(lateDataOutputPath)) { if (!this.fs.mkdirs(lateDataOutputPath)) { throw new RuntimeException( String.format("Failed to create late data output directory: %s.", lateDataOutputPath.toString())); } } } this.copyDataFiles(lateDataOutputPath, newLateFilePaths); if (this.outputDeduplicated) { dataset.checkIfNeedToRecompact(datasetHelper); } this.status = Status.COMMITTED; } else { if (this.fs.exists(this.dataset.outputPath()) && !canOverwriteOutputDir()) { LOG.warn(String.format("Output paths %s exists. Will not compact %s.", this.dataset.outputPath(), this.dataset.inputPaths())); this.status = Status.COMMITTED; return; } addJars(conf); Job job = Job.getInstance(conf); this.configureJob(job); this.submitAndWait(job); if (shouldPublishData(compactionTimestamp)) { if (!this.recompactAllData && this.recompactFromDestPaths) { // append new files without deleting output directory addFilesInTmpPathToOutputPath(); // clean up late data from outputLateDirectory, which has been set to inputPath deleteFilesByPaths(this.dataset.inputPaths()); } else { moveTmpPathToOutputPath(); if (this.recompactFromDestPaths) { deleteFilesByPaths(this.dataset.additionalInputPaths()); } } submitSlaEvent(job); LOG.info("Successfully published data for input folder " + this.dataset.inputPaths()); this.status = Status.COMMITTED; } else { LOG.info("Data not published for input folder " + this.dataset.inputPaths() + " due to incompleteness"); this.status = Status.ABORTED; return; } } if (renameSourceDir) { MRCompactor.renameSourceDirAsCompactionComplete(this.fs, this.dataset); } else { this.markOutputDirAsCompleted(compactionTimestamp); } this.submitRecordsCountsEvent(); } catch (Throwable t) { throw Throwables.propagate(t); } }
From source file:gobblin.util.limiter.stressTest.MRStressTest.java
License:Apache License
public static void main(String[] args) throws Exception { CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args); Configuration configuration = new Configuration(); if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) { configuration.setBoolean(USE_THROTTLING_SERVER, true); String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest"); configuration.set(RESOURCE_ID, resourceLimited); configuration.set(//from www .ja v a2s. com BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(), new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME), null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt())); } if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) { configuration.set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt())); } Job job = Job.getInstance(configuration, "ThrottlingStressTest"); job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS)); StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli); job.setJarByClass(MRStressTest.class); job.setMapperClass(StresserMapper.class); job.setReducerClass(AggregatorReducer.class); job.setInputFormatClass(MyInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis())); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hadoop.api.RecommenderJob.java
License:Apache License
/** * Calculate the multiplication of the co-occurrence matrix by the user vectors * * @param args Information about the input pathpartialMultiply, similarityClassname, maxObservationsPerRow * @return 0// ww w . j av a2 s. c o m */ public int multiplication(String[] args, String path1, String path2) { try { prepareRecommender(args); } catch (IOException e) { e.printStackTrace(); } Path similarityMatrixPath = new Path(path1); Path partialMultiplyPath = new Path(prepPath, "partialMultiply"); int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser")); String usersFile = getOption("usersFile"); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job partialMultiply = null; try { partialMultiply = new Job(getConf(), "partialMultiply"); } catch (IOException e) { e.printStackTrace(); } Configuration partialMultiplyConf = partialMultiply.getConfiguration(); MultipleInputs.addInputPath(partialMultiply, similarityMatrixPath, SequenceFileInputFormat.class, SimilarityMatrixRowWrapperMapper.class); MultipleInputs.addInputPath(partialMultiply, new Path(path2), SequenceFileInputFormat.class, UserVectorSplitterMapper.class); partialMultiply.setJarByClass(org.apache.mahout.cf.taste.hadoop.item.ToVectorAndPrefReducer.class); partialMultiply.setMapOutputKeyClass(VarIntWritable.class); partialMultiply.setMapOutputValueClass(VectorOrPrefWritable.class); partialMultiply.setReducerClass(ToVectorAndPrefReducer.class); partialMultiply.setOutputFormatClass(SequenceFileOutputFormat.class); partialMultiply.setOutputKeyClass(VarIntWritable.class); partialMultiply.setOutputValueClass(VectorAndPrefsWritable.class); partialMultiplyConf.setBoolean("mapred.compress.map.output", true); partialMultiplyConf.set("mapred.output.dir", partialMultiplyPath.toString()); if (usersFile != null) { partialMultiplyConf.set(UserVectorSplitterMapper.USERS_FILE, usersFile); } partialMultiplyConf.setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED, maxPrefsPerUser); boolean succeeded = false; try { succeeded = partialMultiply.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } if (!succeeded) { return -1; } } return 0; }
From source file:hadoop.api.RecommenderJob.java
License:Apache License
/** * Calculate the recommender//from w ww . j av a 2 s.c o m * * @param args Information about the input pathpartialMultiply, explicitFilterPath, numRecommendations * @return */ public int recommender(String[] args) { try { prepareRecommender(args); } catch (IOException e) { e.printStackTrace(); } Path explicitFilterPath = new Path(prepPath, "explicitFilterPath"); Path partialMultiplyPath = new Path(prepPath, "partialMultiply"); Path outputPath = getOutputPath(); String itemsFile = getOption("itemsFile"); String filterFile = getOption("filterFile"); boolean booleanData = Boolean.valueOf(getOption("booleanData")); int numRecommendations = Integer.parseInt(getOption("numRecommendations")); if (shouldRunNextPhase(parsedArgs, currentPhase)) { //filter out any users we don't care about if (filterFile != null) { Job itemFiltering = null; try { itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class, ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class, ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class, SequenceFileOutputFormat.class); } catch (IOException e) { e.printStackTrace(); } boolean succeeded = false; try { succeeded = itemFiltering.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } if (!succeeded) { return -1; } } String aggregateAndRecommendInput = partialMultiplyPath.toString(); if (filterFile != null) { aggregateAndRecommendInput += "," + explicitFilterPath; } Class<? extends OutputFormat> outputFormat = parsedArgs.containsKey("--sequencefileOutput") ? SequenceFileOutputFormat.class : TextOutputFormat.class; //extract out the recommendations Job aggregateAndRecommend = null; try { aggregateAndRecommend = prepareJob(new Path(aggregateAndRecommendInput), outputPath, SequenceFileInputFormat.class, PartialMultiplyMapper.class, VarLongWritable.class, PrefAndSimilarityColumnWritable.class, org.apache.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer.class, VarLongWritable.class, RecommendedItemsWritable.class, outputFormat); } catch (IOException e) { e.printStackTrace(); } Configuration aggregateAndRecommendConf = aggregateAndRecommend.getConfiguration(); if (itemsFile != null) { aggregateAndRecommendConf.set(hadoop.api.AggregateAndRecommendReducer.ITEMS_FILE, itemsFile); } if (filterFile != null) { try { setS3SafeCombinedInputPath(aggregateAndRecommend, getTempPath(), partialMultiplyPath, explicitFilterPath); } catch (IOException e) { e.printStackTrace(); } } setIOSort(aggregateAndRecommend); aggregateAndRecommendConf.set(hadoop.api.AggregateAndRecommendReducer.ITEMID_INDEX_PATH, new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString()); aggregateAndRecommendConf.setInt(hadoop.api.AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations); aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData); boolean succeeded = false; try { succeeded = aggregateAndRecommend.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } if (!succeeded) { return -1; } } return 0; }
From source file:hadoop.examples.mapreduce.WordCountV2.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = getConf(); List<String> other_args = new ArrayList<String>(); args = new GenericOptionsParser(conf, args).getRemainingArgs(); for (int i = 0; i < args.length; i++) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else if ("-D".equals(args[i])) { String[] arr = args[++i].split("="); conf.setBoolean(arr[0], Boolean.valueOf(arr[1])); } else//from w w w. j ava2 s. c o m other_args.add(args[i]); } Job job = new Job(conf); job.setJarByClass(WordCountV2.class); job.setJobName("word count version 2"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TokenizeMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(job, new Path(other_args.get(1))); return job.waitForCompletion(true) ? 0 : 1; }
From source file:hydrograph.engine.cascading.scheme.TextDelimitedAndFixedWidth.java
License:Apache License
@Override public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf)))) throw new IllegalStateException("cannot read zip files: " + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf)))); conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.input.format.class", DelimitedAndFixedWidthInputFormat.class, InputFormat.class); conf.set("charsetName", charsetName); conf.set("quote", quote); conf.set("lengthsAndDelimiters", DelimitedAndFixedWidthHelper.arrayToString(lengthsAndDelimiters)); conf.setStrings("lengthsAndDelimitersType", lengthsAndDelimitersType); }
From source file:hydrograph.engine.cascading.scheme.TextDelimitedAndFixedWidth.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (tap.getFullIdentifier(conf).endsWith(".zip")) throw new IllegalStateException("cannot write zip files: " + getOutputPath(conf)); conf.setBoolean("mapred.mapper.new-api", false); if (getSinkCompression() == Compress.DISABLE) conf.setBoolean("mapred.output.compress", false); else if (getSinkCompression() == Compress.ENABLE) conf.setBoolean("mapred.output.compress", true); conf.setClass("mapred.output.key.class", Text.class, Object.class); conf.setClass("mapred.output.value.class", Text.class, Object.class); conf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class); }
From source file:idgs.ConfVar.java
License:Open Source License
public static void initializeWithDefaults(Configuration conf) { if (conf.get(CLIPROMPT.varname) == null) { conf.set(CLIPROMPT.varname, CLIPROMPT.defaultVal); }//from w w w .j a v a 2 s .co m if (conf.get(EXEC_MODE.varname) == null) { conf.set(EXEC_MODE.varname, EXEC_MODE.defaultVal); } if (conf.get(EXPLAIN_MODE.varname) == null) { conf.set(EXPLAIN_MODE.varname, EXPLAIN_MODE.defaultVal); } if (conf.get(COLUMN_INITIALSIZE.varname) == null) { conf.setInt(COLUMN_INITIALSIZE.varname, COLUMN_INITIALSIZE.defaultIntVal); } if (conf.get(CHECK_TABLENAME_FLAG.varname) == null) { conf.setBoolean(CHECK_TABLENAME_FLAG.varname, CHECK_TABLENAME_FLAG.defaultBoolVal); } if (conf.get(COMPRESS_QUERY_PLAN.varname) == null) { conf.setBoolean(COMPRESS_QUERY_PLAN.varname, COMPRESS_QUERY_PLAN.defaultBoolVal); } if (conf.get(MAP_PRUNING.varname) == null) { conf.setBoolean(MAP_PRUNING.varname, MAP_PRUNING.defaultBoolVal); } if (conf.get(MAP_PRUNING_PRINT_DEBUG.varname) == null) { conf.setBoolean(MAP_PRUNING_PRINT_DEBUG.varname, MAP_PRUNING_PRINT_DEBUG.defaultBoolVal); } }
From source file:idgs.ConfVar.java
License:Open Source License
public static void setBoolVar(Configuration conf, ConfVar variable, Boolean value) { require(variable.valClass == Boolean.class); conf.setBoolean(variable.varname, value); }