Example usage for org.apache.hadoop.conf Configuration setBoolean

List of usage examples for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:mvm.rya.joinselect.mr.JoinSelectProspectOutput.java

License:Apache License

@Override
public int run(String[] args)
        throws AccumuloSecurityException, IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = getConf();
    String inTable = conf.get(PROSPECTS_TABLE);
    String auths = conf.get(AUTHS);
    String outPath = conf.get(PROSPECTS_OUTPUTPATH);

    assert inTable != null && outPath != null;

    Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);

    JoinSelectStatsUtil.initTabToSeqFileJob(job, inTable, outPath, auths);
    job.setMapperClass(CardinalityMapper.class);

    job.setNumReduceTasks(0);//from   www  .  j  ava 2s.  co m

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;

}

From source file:mvm.rya.joinselect.mr.JoinSelectSpoTableOutput.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    String inTable = conf.get(SPO_TABLE);
    String auths = conf.get(AUTHS);
    String outPath = conf.get(SPO_OUTPUTPATH);

    assert inTable != null && outPath != null;

    Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);

    JoinSelectStatsUtil.initTabToSeqFileJob(job, inTable, outPath, auths);
    job.setMapperClass(JoinSelectMapper.class);
    job.setNumReduceTasks(0);//from  www .j  av  a2s  .c o m
    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;

}

From source file:net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat.java

License:Apache License

public static void setEnableDictionary(Configuration configuration, boolean enableDictionary) {
    configuration.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, enableDictionary);
}

From source file:net.java.jatextmining.JaCoOccurrence.java

License:Apache License

/**
 * Setting the pos for configuration that will be extracted.
 * @param conf Specify the Hadoop configuration object.
 *//*from   www  .  jav  a2s.  c o m*/
private void setPos(Configuration conf) {
    if (pos == null) {
        pos = conf.get("jatextmining.wordCountposSet");
    }
    if (pos == null) {
        conf.setBoolean("compNoun", true);
    } else {
        String[] poses = pos.split(",");
        for (String posBuf : poses) {
            String posCleared = posBuf.replaceAll(" ", "");
            if (posSet.contains(posCleared)) {
                conf.setBoolean(posCleared, true);
            } else {
                System.err.println("[error] unknown pos: " + posCleared);
                System.exit(-1);
            }
        }
    }
}

From source file:net.java.jatextmining.JaCoOccurrence.java

License:Apache License

/**
 * Tne implementation for start counting the co-occurrence words.
 * @param conf Specify the Hadoop Configuration object.
 * @param cache Specify the distributed cache file path.
 * @return If success return true, not success return false.
 * @throws IOException Exception for IO.
 * @throws URISyntaxException Exception for distributed cache file path.
 * @throws InterruptedException Exception for threads, waitForComletion().
 * @throws ClassNotFoundException Exception for waitForCompletion().
 *///  ww  w.ja  v  a2 s.c om
private boolean runJaCoOccurrence(Configuration conf, String cache)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String reducerNum = conf.get("jatextmining.JaWordCounterReducerNum");
    conf.setBoolean("df", true);
    Job job = new Job(conf);
    job.setJarByClass(JaCoOccurrence.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(cache));
    FileSystem fs = FileSystem.get(new URI(cache), conf);
    FileStatus[] status = fs.listStatus(new Path(cache));
    if (status != null) {
        fs.delete(new Path(cache), true);
    }
    fs.close();
    job.setMapperClass(CoOccurrenceMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setNumReduceTasks(Integer.valueOf(reducerNum));
    boolean rv = job.waitForCompletion(true);
    if (rv) {
        writeDocNumFile(conf, job);
    }

    return rv;
}

From source file:net.java.jatextmining.JaWordCounter.java

License:Apache License

/**
 * Clear the pos from configuration./*from   w  w  w. ja va2 s . com*/
 * @param conf Specify the Hadoop configuration object.
 */
private void clearPos(Configuration conf) {
    for (String posBuf : posSet) {
        conf.setBoolean("posBuf", false);
    }
}

From source file:net.java.jatextmining.JaWordCounter.java

License:Apache License

/**
 * The implements for start counting the words from document with MapReduce.
 * @param conf Specify the Hadoop configuration object.
 * @return If success return true, not sucess return fales.
 * @throws IOException Exception for IO.
 * @throws InterruptedException Exception for threads.
 * @throws ClassNotFoundException Exception for finding class.
 *///from w  ww  . j av  a 2 s.c om
private boolean runCount(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException {
    String reducerNum = conf.get("jatextmining.JaWordCounterReducerNum");
    conf.setBoolean("df", true);
    if (weightingFlag) {
        conf.setBoolean("weighting", true);
    }
    Job job = new Job(conf);
    job.setJarByClass(JaWordCounter.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));
    job.setMapperClass(CountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setReducerClass(CountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setNumReduceTasks(Integer.valueOf(reducerNum));

    return job.waitForCompletion(true);
}

From source file:net.java.jatextmining.util.Compressor.java

License:Apache License

/**
 * Run the MapReduce compress files of HDFS.
 * @param conf Specify the Hadoop Configuration object.
 * @return If success return true, if not success return false.
 * @throws IOException Exception for IO.
 * @throws InterruptedException Exception for threads(waitForComletion()).
 * @throws ClassNotFoundException Exception for waitForComletion().
 *//*from  ww w.  jav a2s.co  m*/
private boolean runCompressor(Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.setBoolean("mapred.output.compress", true);
    conf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class);
    Job job = new Job(conf);
    job.setJarByClass(Compressor.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));
    job.setMapperClass(CompressorMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    return job.waitForCompletion(true);
}

From source file:nl.gridline.zieook.inx.movielens.RowSimilarityZieOok.java

License:Apache License

@Override
@SuppressWarnings("rawtypes")
protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    // This is not working: - we set the jar directly:
    // if (reducer.equals(Reducer.class))
    // {//from   w  w w . j  a v a  2s  . c  o m
    // if (mapper.equals(Mapper.class))
    // {
    // throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    // }
    // job.setJarByClass(mapper);
    // }
    // else
    // {
    // job.setJarByClass(reducer);
    // }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setJobName(getCustomJobName(job, mapper, reducer));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:nl.gridline.zieook.runners.cf.RecommenderJobZieOok.java

License:Apache License

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    addInputOption();//from w  ww .  j  a  v a 2s .com
    addOutputOption();
    addOption("numRecommendations", "n", "Number of recommendations per user",
            String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS));
    addOption("usersFile", "u", "File of users to recommend for", null);
    addOption("itemsFile", "i", "File of items to recommend for", null);
    addOption("filterFile", "f",
            "File containing comma-separated userID,itemID pairs. Used to exclude the item from "
                    + "the recommendations for that user (optional)",
            null);
    addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
    addOption("maxPrefsPerUser", "mp",
            "Maximum number of preferences considered per user in final recommendation phase",
            String.valueOf(UserVectorSplitterMapper.DEFAULT_MAX_PREFS_PER_USER_CONSIDERED));
    addOption("minPrefsPerUser", "mp",
            "ignore users with less preferences than this in the similarity computation " + "(default: "
                    + DEFAULT_MIN_PREFS_PER_USER + ')',
            String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
    addOption("maxSimilaritiesPerItem", "m", "Maximum number of similarities considered per item ",
            String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ITEM));
    addOption("maxCooccurrencesPerItem", "mo",
            "try to cap the number of cooccurrences per item to this " + "number (default: "
                    + DEFAULT_MAX_COOCCURRENCES_PER_ITEM + ')',
            String.valueOf(DEFAULT_MAX_COOCCURRENCES_PER_ITEM));
    addOption("similarityClassname", "s",
            "Name of distributed similarity class to instantiate, alternatively use "
                    + "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')',
            String.valueOf(SimilarityType.SIMILARITY_COOCCURRENCE));

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Path inputPath = getInputPath();
    Path outputPath = getOutputPath();
    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
    int numRecommendations = Integer.parseInt(parsedArgs.get("--numRecommendations"));
    String usersFile = parsedArgs.get("--usersFile");
    String itemsFile = parsedArgs.get("--itemsFile");
    String filterFile = parsedArgs.get("--filterFile");
    boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
    int maxPrefsPerUser = Integer.parseInt(parsedArgs.get("--maxPrefsPerUser"));
    int minPrefsPerUser = Integer.parseInt(parsedArgs.get("--minPrefsPerUser"));
    int maxSimilaritiesPerItem = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerItem"));
    int maxCooccurrencesPerItem = Integer.parseInt(parsedArgs.get("--maxCooccurrencesPerItem"));
    String similarityClassname = parsedArgs.get("--similarityClassname");

    Path userVectorPath = new Path(tempDirPath, "userVectors");
    Path itemIDIndexPath = new Path(tempDirPath, "itemIDIndex");
    Path countUsersPath = new Path(tempDirPath, "countUsers");
    Path itemUserMatrixPath = new Path(tempDirPath, "itemUserMatrix");
    Path similarityMatrixPath = new Path(tempDirPath, "similarityMatrix");
    Path prePartialMultiplyPath1 = new Path(tempDirPath, "prePartialMultiply1");
    Path prePartialMultiplyPath2 = new Path(tempDirPath, "prePartialMultiply2");
    Path explicitFilterPath = new Path(tempDirPath, "explicitFilterPath");
    Path partialMultiplyPath = new Path(tempDirPath, "partialMultiply");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job itemIDIndex = prepareJob(inputPath, itemIDIndexPath, TextInputFormat.class, ItemIDIndexMapper.class,
                VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class,
                VarLongWritable.class, SequenceFileOutputFormat.class);
        itemIDIndex.setCombinerClass(ItemIDIndexReducer.class);
        task.setCurrentJob(itemIDIndex).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job toUserVector = prepareJob(inputPath, userVectorPath, TextInputFormat.class, ToItemPrefsMapper.class,
                VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class,
                ToUserVectorReducer.class, VarLongWritable.class, VectorWritable.class,
                SequenceFileOutputFormat.class);
        toUserVector.getConfiguration().setBoolean(BOOLEAN_DATA, booleanData);
        toUserVector.getConfiguration().setInt(ToUserVectorReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser);
        task.setCurrentJob(toUserVector).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job countUsers = prepareJob(userVectorPath, countUsersPath, SequenceFileInputFormat.class,
                CountUsersMapper.class, CountUsersKeyWritable.class, VarLongWritable.class,
                CountUsersReducer.class, VarIntWritable.class, NullWritable.class, TextOutputFormat.class);
        countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
        countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
        task.setCurrentJob(countUsers).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job maybePruneAndTransponse = prepareJob(userVectorPath, itemUserMatrixPath,
                SequenceFileInputFormat.class, MaybePruneRowsMapper.class, IntWritable.class,
                DistributedRowMatrix.MatrixEntryWritable.class, ToItemVectorsReducer.class, IntWritable.class,
                VectorWritable.class, SequenceFileOutputFormat.class);
        maybePruneAndTransponse.getConfiguration().setInt(MaybePruneRowsMapper.MAX_COOCCURRENCES,
                maxCooccurrencesPerItem);
        task.setCurrentJob(maybePruneAndTransponse).waitForCompletion(true);
    }

    int numberOfUsers = TasteHadoopUtils.readIntFromFile(getConf(), countUsersPath);

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        /*
         * Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor this call to something like
         * new DistributedRowMatrix(...).rowSimilarity(...)
         */
        try {
            ToolRunner.run(getConf(), new RowSimilarityZieOok(), new String[] { //
                    "--input", itemUserMatrixPath.toString(), //
                    "--output", similarityMatrixPath.toString(), //
                    "--numberOfColumns", String.valueOf(numberOfUsers), //
                    "--similarityClassname", similarityClassname, //
                    "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem + 1), //
                    "--tempDir", tempDirPath.toString() });
        } catch (Exception e) {
            throw new IllegalStateException("item-item-similarity computation failed", e);
        }
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job prePartialMultiply1 = prepareJob(similarityMatrixPath, prePartialMultiplyPath1,
                SequenceFileInputFormat.class, SimilarityMatrixRowWrapperMapper.class, VarIntWritable.class,
                VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
                SequenceFileOutputFormat.class);
        task.setCurrentJob(prePartialMultiply1).waitForCompletion(true);

        Job prePartialMultiply2 = prepareJob(userVectorPath, prePartialMultiplyPath2,
                SequenceFileInputFormat.class, UserVectorSplitterMapper.class, VarIntWritable.class,
                VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
                SequenceFileOutputFormat.class);
        if (usersFile != null) {
            prePartialMultiply2.getConfiguration().set(UserVectorSplitterMapper.USERS_FILE, usersFile);
        }
        prePartialMultiply2.getConfiguration().setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED,
                maxPrefsPerUser);
        task.setCurrentJob(prePartialMultiply2).waitForCompletion(true);

        Job partialMultiply = prepareJob(new Path(prePartialMultiplyPath1 + "," + prePartialMultiplyPath2),
                partialMultiplyPath, SequenceFileInputFormat.class, Mapper.class, VarIntWritable.class,
                VectorOrPrefWritable.class, ToVectorAndPrefReducer.class, VarIntWritable.class,
                VectorAndPrefsWritable.class, SequenceFileOutputFormat.class);

        /* necessary to make this job (having a combined input path) work on Amazon S3 */
        Configuration partialMultiplyConf = partialMultiply.getConfiguration();
        FileSystem fs = FileSystem.get(tempDirPath.toUri(), partialMultiplyConf);
        prePartialMultiplyPath1 = prePartialMultiplyPath1.makeQualified(fs);
        prePartialMultiplyPath2 = prePartialMultiplyPath2.makeQualified(fs);
        FileInputFormat.setInputPaths(partialMultiply, prePartialMultiplyPath1, prePartialMultiplyPath2);
        task.setCurrentJob(partialMultiply).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {

        /* convert the user/item pairs to filter if a filterfile has been specified */
        if (filterFile != null) {
            Job itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class,
                    ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class,
                    ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class,
                    SequenceFileOutputFormat.class);
            task.setCurrentJob(itemFiltering).waitForCompletion(true);
        }

        String aggregateAndRecommendInput = partialMultiplyPath.toString();
        if (filterFile != null) {
            aggregateAndRecommendInput += "," + explicitFilterPath;
        }

        Job aggregateAndRecommend = prepareJob(new Path(aggregateAndRecommendInput), outputPath,
                SequenceFileInputFormat.class, PartialMultiplyMapper.class, VarLongWritable.class,
                PrefAndSimilarityColumnWritable.class, AggregateAndRecommendReducer.class,
                VarLongWritable.class, RecommendedItemsWritable.class, SequenceFileOutputFormat.class);
        Configuration aggregateAndRecommendConf = aggregateAndRecommend.getConfiguration();
        if (itemsFile != null) {
            aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMS_FILE, itemsFile);
        }

        if (filterFile != null) {
            /* necessary to make this job (having a combined input path) work on Amazon S3 */
            FileSystem fs = FileSystem.get(tempDirPath.toUri(), aggregateAndRecommendConf);
            partialMultiplyPath = partialMultiplyPath.makeQualified(fs);
            explicitFilterPath = explicitFilterPath.makeQualified(fs);
            FileInputFormat.setInputPaths(aggregateAndRecommend, partialMultiplyPath, explicitFilterPath);
        }
        setIOSort(aggregateAndRecommend);
        aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH,
                itemIDIndexPath.toString());
        aggregateAndRecommendConf.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations);
        aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData);
        task.setCurrentJob(aggregateAndRecommend).waitForCompletion(true);
    }

    return 0;
}