Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:edu.isi.mavuno.app.distsim.ContextToPattern.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String contextPath = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.ContextPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.ExtractorArgs", conf);
    String minMatches = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.MinMatches", conf);
    boolean harvestGlobalStats = Boolean
            .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.GlobalStats", conf));
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.OutputPath", conf);

    MavunoUtils.createDirectory(conf, outputPath);

    sLogger.info("Tool name: ContextToPattern");
    sLogger.info(" - Context path: " + contextPath);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Harvest global stats: " + harvestGlobalStats);
    sLogger.info(" - Output path: " + outputPath);

    // set total terms path
    conf.set("Mavuno.TotalTermsPath", outputPath + "/totalTerms");

    // split contexts into manageable chunks
    conf.set("Mavuno.Split.InputPath", contextPath);
    conf.set("Mavuno.Split.OutputPath", outputPath + "/contexts-split");
    conf.set("Mavuno.Split.SplitKey", "context");
    new Split(conf).run();

    // get context splits
    FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/contexts-split");
    int split = 0;
    for (FileStatus file : files) {
        if (!file.getPath().getName().endsWith(".examples")) {
            continue;
        }//from   w  w w  . j a v a2  s  . c o m

        // extract patterns
        conf.set("Mavuno.Extract.InputPath", file.getPath().toString());
        conf.set("Mavuno.Extract.CorpusPath", corpusPath);
        conf.set("Mavuno.Extract.CorpusClass", corpusClass);
        conf.set("Mavuno.Extract.ExtractorClass", extractorClass);
        conf.set("Mavuno.Extract.ExtractorArgs", extractorArgs);
        conf.set("Mavuno.Extract.ExtractorTarget", "pattern");
        conf.set("Mavuno.Extract.MinMatches", minMatches);
        conf.set("Mavuno.Extract.OutputPath", outputPath + "/contexts-split/patterns/" + split);
        new Extract(conf).run();

        // increment split
        split++;
    }

    // extract global pattern statistics if necessary
    if (harvestGlobalStats) {
        conf.set("Mavuno.ExtractGlobalStats.InputPath", outputPath + "/contexts-split/patterns/");
        conf.set("Mavuno.ExtractGlobalStats.CorpusPath", corpusPath);
        conf.set("Mavuno.ExtractGlobalStats.CorpusClass", corpusClass);
        conf.set("Mavuno.ExtractGlobalStats.ExtractorClass", extractorClass);
        conf.set("Mavuno.ExtractGlobalStats.ExtractorArgs", extractorArgs);
        conf.set("Mavuno.ExtractGlobalStats.ExtractorTarget", "pattern");
        conf.set("Mavuno.ExtractGlobalStats.OutputPath", outputPath + "/contexts-split/pattern-stats/");
        new ExtractGlobalStats(conf).run();
    }

    // combine context splits
    conf.set("Mavuno.CombineSplits.ExamplesPath", outputPath + "/contexts-split/patterns");
    conf.set("Mavuno.CombineSplits.ExampleStatsPath", outputPath + "/contexts-split/pattern-stats");
    conf.set("Mavuno.CombineSplits.SplitKey", "context");
    conf.setInt("Mavuno.CombineSplits.TotalSplits", split);
    conf.set("Mavuno.CombineSplits.OutputPath", outputPath + "/pattern-stats");
    new CombineSplits(conf).run();

    // delete context splits
    MavunoUtils.removeDirectory(conf, outputPath + "/contexts-split");

    return 0;
}

From source file:edu.isi.mavuno.app.distsim.PatternToContext.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String patternPath = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.PatternPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.ExtractorArgs", conf);
    String minMatches = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.MinMatches", conf);
    boolean harvestGlobalStats = Boolean
            .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.PatternToContext.GlobalStats", conf));
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.OutputPath", conf);

    MavunoUtils.createDirectory(conf, outputPath);

    sLogger.info("Tool name: PatternToContext");
    sLogger.info(" - Pattern path: " + patternPath);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Harvest global stats: " + harvestGlobalStats);
    sLogger.info(" - Output path: " + outputPath);

    // set total terms path
    conf.set("Mavuno.TotalTermsPath", outputPath + "/totalTerms");

    // split patterns into manageable chunks
    conf.set("Mavuno.Split.InputPath", patternPath);
    conf.set("Mavuno.Split.OutputPath", outputPath + "/patterns-split");
    conf.set("Mavuno.Split.SplitKey", "pattern");
    new Split(conf).run();

    // get pattern splits
    FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/patterns-split");
    int split = 0;
    for (FileStatus file : files) {
        if (!file.getPath().getName().endsWith(".examples")) {
            continue;
        }//  w  w w .  j  av a2  s.  c o  m

        // extract contexts
        conf.set("Mavuno.Extract.InputPath", file.getPath().toString());
        conf.set("Mavuno.Extract.CorpusPath", corpusPath);
        conf.set("Mavuno.Extract.CorpusClass", corpusClass);
        conf.set("Mavuno.Extract.ExtractorClass", extractorClass);
        conf.set("Mavuno.Extract.ExtractorArgs", extractorArgs);
        conf.set("Mavuno.Extract.ExtractorTarget", "context");
        conf.set("Mavuno.Extract.MinMatches", minMatches);
        conf.set("Mavuno.Extract.OutputPath", outputPath + "/patterns-split/contexts/" + split);
        new Extract(conf).run();

        // increment split
        split++;
    }

    // extract global context statistics if necessary
    if (harvestGlobalStats) {
        conf.set("Mavuno.ExtractGlobalStats.InputPath", outputPath + "/patterns-split/contexts/");
        conf.set("Mavuno.ExtractGlobalStats.CorpusPath", corpusPath);
        conf.set("Mavuno.ExtractGlobalStats.CorpusClass", corpusClass);
        conf.set("Mavuno.ExtractGlobalStats.ExtractorClass", extractorClass);
        conf.set("Mavuno.ExtractGlobalStats.ExtractorArgs", extractorArgs);
        conf.set("Mavuno.ExtractGlobalStats.ExtractorTarget", "context");
        conf.set("Mavuno.ExtractGlobalStats.OutputPath", outputPath + "/patterns-split/context-stats/");
        new ExtractGlobalStats(conf).run();
    }

    // combine pattern splits
    conf.set("Mavuno.CombineSplits.ExamplesPath", outputPath + "/patterns-split/contexts");
    conf.set("Mavuno.CombineSplits.ExampleStatsPath", outputPath + "/patterns-split/context-stats");
    conf.set("Mavuno.CombineSplits.SplitKey", "pattern");
    conf.setInt("Mavuno.CombineSplits.TotalSplits", split);
    conf.set("Mavuno.CombineSplits.OutputPath", outputPath + "/context-stats");
    new CombineSplits(conf).run();

    // delete pattern splits
    MavunoUtils.removeDirectory(conf, outputPath + "/patterns-split");

    return 0;
}

From source file:edu.isi.mavuno.app.distsim.PatternToPattern.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String patternPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.PatternPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.CorpusClass", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.CorpusPath", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.ExtractorArgs", conf);
    int minMatches = Integer.parseInt(MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.MinMatches", conf));
    boolean harvestGlobalStats = Boolean
            .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.GlobalStats", conf));
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.OutputPath", conf);

    MavunoUtils.createDirectory(conf, outputPath);

    sLogger.info("Tool name: PatternToPattern");
    sLogger.info(" - Pattern path: " + patternPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Output path: " + outputPath);
    sLogger.info(" - Context class: " + extractorClass);
    sLogger.info(" - Context arguments: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Harvest global stats: " + harvestGlobalStats);

    // pattern to context
    conf.set("Mavuno.PatternToContext.PatternPath", patternPath);
    conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath);
    conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass);
    conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass);
    conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs);
    conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches);
    conf.setBoolean("Mavuno.PatternToContext.GlobalStats", harvestGlobalStats);
    conf.set("Mavuno.PatternToContext.OutputPath", outputPath);
    new PatternToContext(conf).run();

    // context to pattern
    conf.set("Mavuno.ContextToPattern.ContextPath", outputPath + "/context-stats");
    conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath);
    conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass);
    conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass);
    conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs);
    conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches);
    conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", harvestGlobalStats);
    conf.set("Mavuno.ContextToPattern.OutputPath", outputPath);
    new ContextToPattern(conf).run();

    return 0;/*from  w ww  .j  a v  a 2 s.com*/
}

From source file:edu.isi.mavuno.app.ie.HarvestEspressoContexts.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String inputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.InputPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ExtractorArgs", conf);
    String scorerClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ScorerClass", conf);
    String scorerArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ScorerArgs", conf);
    int numPatterns = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.NumPatterns", conf));
    int minMatches = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.MinMatches", conf));
    String baseOutputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.OutputPath", conf);

    String numIterations = MavunoUtils.getOptionalParam("Mavuno.HarvestEspressoContexts.NumIterations", conf);
    int iterations = 1;
    if (numIterations != null) {
        iterations = Integer.parseInt(numIterations);
    }// w  ww  .j  a  va2 s .c o m

    MavunoUtils.createDirectory(conf, baseOutputPath);

    sLogger.info("Tool name: HarvestEspressoContexts");
    sLogger.info(" - Input path: " + inputPath);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Scorer class: " + scorerClass);
    sLogger.info(" - Scorer args: " + scorerArgs);
    sLogger.info(" - Number of patterns: " + numPatterns);
    sLogger.info(" - Minimum matches: " + minMatches);
    sLogger.info(" - Iterations: " + iterations);
    sLogger.info(" - Output path: " + baseOutputPath);

    // initial sub output path
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0");
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0/contexts-scored");

    // examples -> sequence file
    conf.set("Mavuno.ExamplesToSequenceFile.InputPath", inputPath);
    conf.set("Mavuno.ExamplesToSequenceFile.OutputPath",
            baseOutputPath + "/0/contexts-scored/scored-contexts-raw");
    new ExamplesToSequenceFile(conf).run();

    // iterate procedure
    for (int i = 1; i <= iterations; i++) {
        // previous output path (input to current iteration)
        String prevOutputPath = baseOutputPath + "/" + (i - 1);

        // current output path
        String curOutputPath = baseOutputPath + "/" + i;
        MavunoUtils.createDirectory(conf, curOutputPath);

        // seeds -> patterns
        conf.set("Mavuno.ContextToPattern.ContextPath",
                prevOutputPath + "/contexts-scored/scored-contexts-raw");
        conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath);
        conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass);
        conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass);
        conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches);
        conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", true);
        conf.set("Mavuno.ContextToPattern.OutputPath", curOutputPath + "/patterns");
        new ContextToPattern(conf).run();

        // score patterns
        conf.set("Mavuno.ComputePatternScores.InputPath", curOutputPath + "/patterns");
        conf.set("Mavuno.ComputePatternScores.ContextScorerClass", null);
        conf.set("Mavuno.ComputePatternScores.PatternScorerClass", scorerClass);
        conf.set("Mavuno.ComputePatternScores.PatternScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputePatternScores.OutputPath", curOutputPath + "/patterns-scored");
        new ComputePatternScores(conf).run();

        // only retain top-(k * i) patterns
        if (numPatterns >= 0) {
            conf.set("Mavuno.GetTopResults.InputPath", curOutputPath + "/patterns-scored/scored-patterns");
            conf.set("Mavuno.GetTopResults.OutputPath", curOutputPath + "/patterns-scored-top");
            conf.setInt("Mavuno.GetTopResults.NumResults", numPatterns * i);
            conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", true);
            new GetTopResults(conf).run();
        }

        // patterns -> contexts
        if (numPatterns >= 0) {
            conf.set("Mavuno.PatternToContext.PatternPath", curOutputPath + "/patterns-scored-top");
        } else {
            conf.set("Mavuno.PatternToContext.PatternPath",
                    curOutputPath + "/patterns-scored/scored-patterns-raw");
        }
        conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath);
        conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass);
        conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass);
        conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches);
        conf.setBoolean("Mavuno.PatternToContext.GlobalStats", true);
        conf.set("Mavuno.PatternToContext.OutputPath", curOutputPath + "/contexts");
        new PatternToContext(conf).run();

        // score contexts
        conf.set("Mavuno.ComputeContextScores.InputPath", curOutputPath + "/contexts");
        conf.set("Mavuno.ComputeContextScores.PatternScorerClass", null);
        conf.set("Mavuno.ComputeContextScores.ContextScorerClass", scorerClass);
        conf.set("Mavuno.ComputeContextScores.ContextScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputeContextScores.OutputPath", curOutputPath + "/contexts-scored");
        new ComputeContextScores(conf).run();

        // delete previous output path
        MavunoUtils.removeDirectory(conf, prevOutputPath);
    }

    return 0;
}

From source file:edu.isi.mavuno.app.ie.HarvestEspressoPatterns.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String inputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.InputPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ExtractorArgs", conf);
    String scorerClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ScorerClass", conf);
    String scorerArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ScorerArgs", conf);
    int numContexts = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.NumContexts", conf));
    int minMatches = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.MinMatches", conf));
    String baseOutputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.OutputPath", conf);

    String numIterations = MavunoUtils.getOptionalParam("Mavuno.HarvestEspressoPatterns.NumIterations", conf);
    int iterations = 1;
    if (numIterations != null) {
        iterations = Integer.parseInt(numIterations);
    }//from  w w  w. ja v  a 2  s  .c o  m

    MavunoUtils.createDirectory(conf, baseOutputPath);

    sLogger.info("Tool name: HarvestEspressoPatterns");
    sLogger.info(" - Input path: " + inputPath);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Scorer class: " + scorerClass);
    sLogger.info(" - Scorer args: " + scorerArgs);
    sLogger.info(" - Number of contexts: " + numContexts);
    sLogger.info(" - Minimum matches: " + minMatches);
    sLogger.info(" - Iterations: " + iterations);
    sLogger.info(" - Output path: " + baseOutputPath);

    // initial sub output path
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0");
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0/patterns-scored");

    // patterns -> sequence file
    conf.set("Mavuno.ExamplesToSequenceFile.InputPath", inputPath);
    conf.set("Mavuno.ExamplesToSequenceFile.OutputPath",
            baseOutputPath + "/0/patterns-scored/scored-patterns-raw");
    new ExamplesToSequenceFile(conf).run();

    // iterate procedure
    for (int i = 1; i <= iterations; i++) {
        // previous output path (input to current iteration)
        String prevOutputPath = baseOutputPath + "/" + (i - 1);

        // current output path
        String curOutputPath = baseOutputPath + "/" + i;
        MavunoUtils.createDirectory(conf, curOutputPath);

        // seeds -> contexts
        conf.set("Mavuno.PatternToContext.PatternPath",
                prevOutputPath + "/patterns-scored/scored-patterns-raw");
        conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath);
        conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass);
        conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass);
        conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches);
        conf.setBoolean("Mavuno.PatternToContext.GlobalStats", true);
        conf.set("Mavuno.PatternToContext.OutputPath", curOutputPath + "/contexts");
        new PatternToContext(conf).run();

        // score contexts
        conf.set("Mavuno.ComputeContextScores.InputPath", curOutputPath + "/contexts");
        conf.set("Mavuno.ComputeContextScores.PatternScorerClass", null);
        conf.set("Mavuno.ComputeContextScores.ContextScorerClass", scorerClass);
        conf.set("Mavuno.ComputeContextScores.ContextScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputeContextScores.OutputPath", curOutputPath + "/contexts-scored");
        new ComputeContextScores(conf).run();

        // only retain top-(k * i) contexts
        if (numContexts >= 0) {
            conf.set("Mavuno.GetTopResults.InputPath", curOutputPath + "/contexts-scored/scored-contexts");
            conf.set("Mavuno.GetTopResults.OutputPath", curOutputPath + "/contexts-scored-top");
            conf.setInt("Mavuno.GetTopResults.NumResults", numContexts * i);
            conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", true);
            new GetTopResults(conf).run();
        }

        // contexts -> patterns
        if (numContexts >= 0) {
            conf.set("Mavuno.ContextToPattern.ContextPath", curOutputPath + "/contexts-scored-top");
        } else {
            conf.set("Mavuno.ContextToPattern.ContextPath",
                    curOutputPath + "/contexts-scored/scored-contexts-raw");
        }
        conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath);
        conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass);
        conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass);
        conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches);
        conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", true);
        conf.set("Mavuno.ContextToPattern.OutputPath", curOutputPath + "/patterns");
        new ContextToPattern(conf).run();

        // score patterns
        conf.set("Mavuno.ComputePatternScores.InputPath", curOutputPath + "/patterns");
        conf.set("Mavuno.ComputePatternScores.ContextScorerClass", null);
        conf.set("Mavuno.ComputePatternScores.PatternScorerClass", scorerClass);
        conf.set("Mavuno.ComputePatternScores.PatternScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputePatternScores.OutputPath", curOutputPath + "/patterns-scored");
        new ComputePatternScores(conf).run();

        // delete previous output path
        MavunoUtils.removeDirectory(conf, prevOutputPath);
    }

    return 0;
}

From source file:edu.isi.mavuno.extract.ExtractGlobalStats.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String inputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.InputPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorArgs", conf);
    String extractorTarget = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorTarget", conf)
            .toLowerCase();/*from  w ww.  ja va 2 s .  com*/
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.OutputPath", conf);

    // split examples
    conf.set("Mavuno.Split.InputPath", inputPath);
    conf.set("Mavuno.Split.OutputPath", outputPath + "/../split");
    conf.set("Mavuno.Split.SplitKey", extractorTarget);
    new Split(conf).run();

    // get splits
    FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/../split");
    int split = 0;
    for (FileStatus file : files) {
        if (!file.getPath().getName().endsWith(".examples")) {
            continue;
        }

        conf.set("Mavuno.ExtractGlobalStats.ExamplesPath", file.getPath().toString());

        sLogger.info("Tool name: ExtractGlobalStats");
        sLogger.info(" - Input path: " + inputPath);
        sLogger.info(" - Examples path: " + file.getPath());
        sLogger.info(" - Example split: " + split);
        sLogger.info(" - Corpus path: " + corpusPath);
        sLogger.info(" - Corpus class: " + corpusClass);
        sLogger.info(" - Extractor class: " + extractorClass);
        sLogger.info(" - Extractor class: " + extractorArgs);
        sLogger.info(" - Extractor target: " + extractorTarget);
        sLogger.info(" - Output path: " + outputPath);

        Job job = new Job(conf);
        job.setJobName("ExtractGlobalStats");

        MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
        FileOutputFormat.setOutputPath(job, new Path(outputPath + "/../split/" + split));

        job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        FileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        job.setMapOutputKeyClass(ContextPatternWritable.class);
        job.setSortComparatorClass(ContextPatternWritable.Comparator.class);
        job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class);
        job.setMapOutputValueClass(ContextPatternStatsWritable.class);

        job.setOutputKeyClass(ContextPatternWritable.class);
        job.setOutputValueClass(ContextPatternStatsWritable.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        job.waitForCompletion(true);

        split++;
    }

    // combine splits
    conf.setInt("Mavuno.CombineGlobalStats.TotalSplits", split);
    conf.set("Mavuno.CombineGlobalStats.InputPath", outputPath + "/../split/");
    conf.set("Mavuno.CombineGlobalStats.OutputPath", outputPath);
    new CombineGlobalStats(conf).run();

    MavunoUtils.removeDirectory(conf, outputPath + "/../split");

    return 0;
}

From source file:edu.iu.ccd.CCDLauncher.java

License:Apache License

private Job configureCCDJob(Path inputDir, int r, double lambda, int numIterations, int numMapTasks,
        int numThreadsPerWorker, int numModelSlices, Path modelDir, Path outputDir, String testFilePath,
        Configuration configuration, int jobID) throws IOException, URISyntaxException {
    configuration.setInt(Constants.R, r);
    configuration.setDouble(Constants.LAMBDA, lambda);
    configuration.setInt(Constants.NUM_ITERATIONS, numIterations);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    System.out.println("Model Dir Path: " + modelDir.toString());
    configuration.set(Constants.MODEL_DIR, modelDir.toString());
    configuration.setInt(Constants.NUM_MODEL_SLICES, numModelSlices);
    configuration.set(Constants.TEST_FILE_PATH, testFilePath);
    Job job = Job.getInstance(configuration, "ccd_job_" + jobID);
    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(CCDLauncher.class);
    job.setMapperClass(CCDMPCollectiveMapper.class);
    job.setNumReduceTasks(0);//from  w w  w  .  j ava 2s  .co m
    return job;
}

From source file:edu.iu.daal_adaboost.ADABOOSTDaalLauncher.java

License:Apache License

/**
 * Launches all the tasks in order./*ww  w  .java 2 s  .c o  m*/
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    /* Put shared libraries into the distributed cache */
    init.loadDistributedLibs();

    // load args
    init.loadSysArgs();

    //load app args
    conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1]));
    conf.set(HarpDAALConstants.TEST_FILE_PATH, args[init.getSysArgNum() + 2]);

    // launch job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    Job adaboostJob = init.createJob("AdaboostJob", ADABOOSTDaalLauncher.class,
            ADABOOSTDaalCollectiveMapper.class);

    // finish job
    boolean jobSuccess = adaboostJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        adaboostJob.killJob();
        System.out.println("Adaboost Job failed");
    }

    return 0;
}

From source file:edu.iu.daal_als.ALSDaalLauncher.java

License:Apache License

/**
 * Launches ALS workers./*from w w  w  .ja  v  a 2s  . c om*/
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    /* Put shared libraries into the distributed cache */
    init.loadDistributedLibs();

    // load args
    init.loadSysArgs();

    conf.setInt(HarpDAALConstants.NUM_FACTOR, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setDouble(Constants.ALPHA, Double.parseDouble(args[init.getSysArgNum() + 1]));
    conf.setDouble(Constants.LAMBDA, Double.parseDouble(args[init.getSysArgNum() + 2]));
    conf.set(HarpDAALConstants.TEST_FILE_PATH, args[init.getSysArgNum() + 3]);

    // launch job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));

    Job alsJob = init.createJob("alsJob", ALSDaalLauncher.class, ALSDaalCollectiveMapper.class);

    // finish job
    boolean jobSuccess = alsJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        alsJob.killJob();
        System.out.println("alsJob failed");
    }

    return 0;
}

From source file:edu.iu.daal_als_batch.ALSBatchDaalLauncher.java

License:Apache License

/**
 * Launches all the tasks in order.//from  ww  w .j  a  v a  2  s . c o  m
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    /* Put shared libraries into the distributed cache */
    init.loadDistributedLibs();

    // load args
    init.loadSysArgs();

    //load app args
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.NUM_FACTOR, Integer.parseInt(args[init.getSysArgNum() + 1]));

    // launch job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    Job alsbatchJob = init.createJob("AlsbatchJob", ALSBatchDaalLauncher.class,
            ALSBatchDaalCollectiveMapper.class);

    // finish job
    boolean jobSuccess = alsbatchJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        alsbatchJob.killJob();
        System.out.println("ALSbatch Job failed");
    }

    return 0;
}