List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:mvm.rya.joinselect.mr.JoinSelectProspectOutput.java
License:Apache License
@Override public int run(String[] args) throws AccumuloSecurityException, IOException, ClassNotFoundException, InterruptedException { Configuration conf = getConf(); String inTable = conf.get(PROSPECTS_TABLE); String auths = conf.get(AUTHS); String outPath = conf.get(PROSPECTS_OUTPUTPATH); assert inTable != null && outPath != null; Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); JoinSelectStatsUtil.initTabToSeqFileJob(job, inTable, outPath, auths); job.setMapperClass(CardinalityMapper.class); job.setNumReduceTasks(0);//from www . j ava 2s. co m job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:mvm.rya.joinselect.mr.JoinSelectSpoTableOutput.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String inTable = conf.get(SPO_TABLE); String auths = conf.get(AUTHS); String outPath = conf.get(SPO_OUTPUTPATH); assert inTable != null && outPath != null; Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); JoinSelectStatsUtil.initTabToSeqFileJob(job, inTable, outPath, auths); job.setMapperClass(JoinSelectMapper.class); job.setNumReduceTasks(0);//from www .j av a2s .c o m job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat.java
License:Apache License
public static void setEnableDictionary(Configuration configuration, boolean enableDictionary) { configuration.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, enableDictionary); }
From source file:net.java.jatextmining.JaCoOccurrence.java
License:Apache License
/** * Setting the pos for configuration that will be extracted. * @param conf Specify the Hadoop configuration object. *//*from www . jav a2s. c o m*/ private void setPos(Configuration conf) { if (pos == null) { pos = conf.get("jatextmining.wordCountposSet"); } if (pos == null) { conf.setBoolean("compNoun", true); } else { String[] poses = pos.split(","); for (String posBuf : poses) { String posCleared = posBuf.replaceAll(" ", ""); if (posSet.contains(posCleared)) { conf.setBoolean(posCleared, true); } else { System.err.println("[error] unknown pos: " + posCleared); System.exit(-1); } } } }
From source file:net.java.jatextmining.JaCoOccurrence.java
License:Apache License
/** * Tne implementation for start counting the co-occurrence words. * @param conf Specify the Hadoop Configuration object. * @param cache Specify the distributed cache file path. * @return If success return true, not success return false. * @throws IOException Exception for IO. * @throws URISyntaxException Exception for distributed cache file path. * @throws InterruptedException Exception for threads, waitForComletion(). * @throws ClassNotFoundException Exception for waitForCompletion(). */// ww w.ja v a2 s.c om private boolean runJaCoOccurrence(Configuration conf, String cache) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { String reducerNum = conf.get("jatextmining.JaWordCounterReducerNum"); conf.setBoolean("df", true); Job job = new Job(conf); job.setJarByClass(JaCoOccurrence.class); TextInputFormat.addInputPath(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(cache)); FileSystem fs = FileSystem.get(new URI(cache), conf); FileStatus[] status = fs.listStatus(new Path(cache)); if (status != null) { fs.delete(new Path(cache), true); } fs.close(); job.setMapperClass(CoOccurrenceMapper.class); job.setReducerClass(CountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setNumReduceTasks(Integer.valueOf(reducerNum)); boolean rv = job.waitForCompletion(true); if (rv) { writeDocNumFile(conf, job); } return rv; }
From source file:net.java.jatextmining.JaWordCounter.java
License:Apache License
/** * Clear the pos from configuration./*from w w w. ja va2 s . com*/ * @param conf Specify the Hadoop configuration object. */ private void clearPos(Configuration conf) { for (String posBuf : posSet) { conf.setBoolean("posBuf", false); } }
From source file:net.java.jatextmining.JaWordCounter.java
License:Apache License
/** * The implements for start counting the words from document with MapReduce. * @param conf Specify the Hadoop configuration object. * @return If success return true, not sucess return fales. * @throws IOException Exception for IO. * @throws InterruptedException Exception for threads. * @throws ClassNotFoundException Exception for finding class. *///from w ww . j av a 2 s.c om private boolean runCount(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { String reducerNum = conf.get("jatextmining.JaWordCounterReducerNum"); conf.setBoolean("df", true); if (weightingFlag) { conf.setBoolean("weighting", true); } Job job = new Job(conf); job.setJarByClass(JaWordCounter.class); TextInputFormat.addInputPath(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setMapperClass(CountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); job.setReducerClass(CountReducer.class); job.setOutputKeyClass(Text.class); job.setNumReduceTasks(Integer.valueOf(reducerNum)); return job.waitForCompletion(true); }
From source file:net.java.jatextmining.util.Compressor.java
License:Apache License
/** * Run the MapReduce compress files of HDFS. * @param conf Specify the Hadoop Configuration object. * @return If success return true, if not success return false. * @throws IOException Exception for IO. * @throws InterruptedException Exception for threads(waitForComletion()). * @throws ClassNotFoundException Exception for waitForComletion(). *//*from ww w. jav a2s.co m*/ private boolean runCompressor(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { conf.setBoolean("mapred.output.compress", true); conf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class); Job job = new Job(conf); job.setJarByClass(Compressor.class); TextInputFormat.addInputPath(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setMapperClass(CompressorMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); return job.waitForCompletion(true); }
From source file:nl.gridline.zieook.inx.movielens.RowSimilarityZieOok.java
License:Apache License
@Override @SuppressWarnings("rawtypes") protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); // This is not working: - we set the jar directly: // if (reducer.equals(Reducer.class)) // {//from w w w . j a v a 2s . c o m // if (mapper.equals(Mapper.class)) // { // throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); // } // job.setJarByClass(mapper); // } // else // { // job.setJarByClass(reducer); // } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setJobName(getCustomJobName(job, mapper, reducer)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:nl.gridline.zieook.runners.cf.RecommenderJobZieOok.java
License:Apache License
@Override public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { addInputOption();//from w ww . j a v a 2s .com addOutputOption(); addOption("numRecommendations", "n", "Number of recommendations per user", String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS)); addOption("usersFile", "u", "File of users to recommend for", null); addOption("itemsFile", "i", "File of items to recommend for", null); addOption("filterFile", "f", "File containing comma-separated userID,itemID pairs. Used to exclude the item from " + "the recommendations for that user (optional)", null); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("maxPrefsPerUser", "mp", "Maximum number of preferences considered per user in final recommendation phase", String.valueOf(UserVectorSplitterMapper.DEFAULT_MAX_PREFS_PER_USER_CONSIDERED)); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this in the similarity computation " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("maxSimilaritiesPerItem", "m", "Maximum number of similarities considered per item ", String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ITEM)); addOption("maxCooccurrencesPerItem", "mo", "try to cap the number of cooccurrences per item to this " + "number (default: " + DEFAULT_MAX_COOCCURRENCES_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_COOCCURRENCES_PER_ITEM)); addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " + "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')', String.valueOf(SimilarityType.SIMILARITY_COOCCURRENCE)); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Path inputPath = getInputPath(); Path outputPath = getOutputPath(); Path tempDirPath = new Path(parsedArgs.get("--tempDir")); int numRecommendations = Integer.parseInt(parsedArgs.get("--numRecommendations")); String usersFile = parsedArgs.get("--usersFile"); String itemsFile = parsedArgs.get("--itemsFile"); String filterFile = parsedArgs.get("--filterFile"); boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData")); int maxPrefsPerUser = Integer.parseInt(parsedArgs.get("--maxPrefsPerUser")); int minPrefsPerUser = Integer.parseInt(parsedArgs.get("--minPrefsPerUser")); int maxSimilaritiesPerItem = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerItem")); int maxCooccurrencesPerItem = Integer.parseInt(parsedArgs.get("--maxCooccurrencesPerItem")); String similarityClassname = parsedArgs.get("--similarityClassname"); Path userVectorPath = new Path(tempDirPath, "userVectors"); Path itemIDIndexPath = new Path(tempDirPath, "itemIDIndex"); Path countUsersPath = new Path(tempDirPath, "countUsers"); Path itemUserMatrixPath = new Path(tempDirPath, "itemUserMatrix"); Path similarityMatrixPath = new Path(tempDirPath, "similarityMatrix"); Path prePartialMultiplyPath1 = new Path(tempDirPath, "prePartialMultiply1"); Path prePartialMultiplyPath2 = new Path(tempDirPath, "prePartialMultiply2"); Path explicitFilterPath = new Path(tempDirPath, "explicitFilterPath"); Path partialMultiplyPath = new Path(tempDirPath, "partialMultiply"); AtomicInteger currentPhase = new AtomicInteger(); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job itemIDIndex = prepareJob(inputPath, itemIDIndexPath, TextInputFormat.class, ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); itemIDIndex.setCombinerClass(ItemIDIndexReducer.class); task.setCurrentJob(itemIDIndex).waitForCompletion(true); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job toUserVector = prepareJob(inputPath, userVectorPath, TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); toUserVector.getConfiguration().setBoolean(BOOLEAN_DATA, booleanData); toUserVector.getConfiguration().setInt(ToUserVectorReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser); task.setCurrentJob(toUserVector).waitForCompletion(true); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job countUsers = prepareJob(userVectorPath, countUsersPath, SequenceFileInputFormat.class, CountUsersMapper.class, CountUsersKeyWritable.class, VarLongWritable.class, CountUsersReducer.class, VarIntWritable.class, NullWritable.class, TextOutputFormat.class); countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class); countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class); task.setCurrentJob(countUsers).waitForCompletion(true); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job maybePruneAndTransponse = prepareJob(userVectorPath, itemUserMatrixPath, SequenceFileInputFormat.class, MaybePruneRowsMapper.class, IntWritable.class, DistributedRowMatrix.MatrixEntryWritable.class, ToItemVectorsReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); maybePruneAndTransponse.getConfiguration().setInt(MaybePruneRowsMapper.MAX_COOCCURRENCES, maxCooccurrencesPerItem); task.setCurrentJob(maybePruneAndTransponse).waitForCompletion(true); } int numberOfUsers = TasteHadoopUtils.readIntFromFile(getConf(), countUsersPath); if (shouldRunNextPhase(parsedArgs, currentPhase)) { /* * Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor this call to something like * new DistributedRowMatrix(...).rowSimilarity(...) */ try { ToolRunner.run(getConf(), new RowSimilarityZieOok(), new String[] { // "--input", itemUserMatrixPath.toString(), // "--output", similarityMatrixPath.toString(), // "--numberOfColumns", String.valueOf(numberOfUsers), // "--similarityClassname", similarityClassname, // "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem + 1), // "--tempDir", tempDirPath.toString() }); } catch (Exception e) { throw new IllegalStateException("item-item-similarity computation failed", e); } } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job prePartialMultiply1 = prepareJob(similarityMatrixPath, prePartialMultiplyPath1, SequenceFileInputFormat.class, SimilarityMatrixRowWrapperMapper.class, VarIntWritable.class, VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class, SequenceFileOutputFormat.class); task.setCurrentJob(prePartialMultiply1).waitForCompletion(true); Job prePartialMultiply2 = prepareJob(userVectorPath, prePartialMultiplyPath2, SequenceFileInputFormat.class, UserVectorSplitterMapper.class, VarIntWritable.class, VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class, SequenceFileOutputFormat.class); if (usersFile != null) { prePartialMultiply2.getConfiguration().set(UserVectorSplitterMapper.USERS_FILE, usersFile); } prePartialMultiply2.getConfiguration().setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED, maxPrefsPerUser); task.setCurrentJob(prePartialMultiply2).waitForCompletion(true); Job partialMultiply = prepareJob(new Path(prePartialMultiplyPath1 + "," + prePartialMultiplyPath2), partialMultiplyPath, SequenceFileInputFormat.class, Mapper.class, VarIntWritable.class, VectorOrPrefWritable.class, ToVectorAndPrefReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class, SequenceFileOutputFormat.class); /* necessary to make this job (having a combined input path) work on Amazon S3 */ Configuration partialMultiplyConf = partialMultiply.getConfiguration(); FileSystem fs = FileSystem.get(tempDirPath.toUri(), partialMultiplyConf); prePartialMultiplyPath1 = prePartialMultiplyPath1.makeQualified(fs); prePartialMultiplyPath2 = prePartialMultiplyPath2.makeQualified(fs); FileInputFormat.setInputPaths(partialMultiply, prePartialMultiplyPath1, prePartialMultiplyPath2); task.setCurrentJob(partialMultiply).waitForCompletion(true); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { /* convert the user/item pairs to filter if a filterfile has been specified */ if (filterFile != null) { Job itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class, ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class, ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class, SequenceFileOutputFormat.class); task.setCurrentJob(itemFiltering).waitForCompletion(true); } String aggregateAndRecommendInput = partialMultiplyPath.toString(); if (filterFile != null) { aggregateAndRecommendInput += "," + explicitFilterPath; } Job aggregateAndRecommend = prepareJob(new Path(aggregateAndRecommendInput), outputPath, SequenceFileInputFormat.class, PartialMultiplyMapper.class, VarLongWritable.class, PrefAndSimilarityColumnWritable.class, AggregateAndRecommendReducer.class, VarLongWritable.class, RecommendedItemsWritable.class, SequenceFileOutputFormat.class); Configuration aggregateAndRecommendConf = aggregateAndRecommend.getConfiguration(); if (itemsFile != null) { aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMS_FILE, itemsFile); } if (filterFile != null) { /* necessary to make this job (having a combined input path) work on Amazon S3 */ FileSystem fs = FileSystem.get(tempDirPath.toUri(), aggregateAndRecommendConf); partialMultiplyPath = partialMultiplyPath.makeQualified(fs); explicitFilterPath = explicitFilterPath.makeQualified(fs); FileInputFormat.setInputPaths(aggregateAndRecommend, partialMultiplyPath, explicitFilterPath); } setIOSort(aggregateAndRecommend); aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH, itemIDIndexPath.toString()); aggregateAndRecommendConf.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations); aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData); task.setCurrentJob(aggregateAndRecommend).waitForCompletion(true); } return 0; }