Example usage for org.apache.hadoop.conf Configuration setBoolean

List of usage examples for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams//from  ww  w.ja va2 s  . com
 */
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport, Window mode, int winsize)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);
    con.set(WINDOW_TYPE, mode.toString());
    con.setInt(WINDOW_SIZE, winsize);

    if (mode.toString().equalsIgnoreCase("DOCUMENT")) {
        con.setInt("mapred.job.map.memory.mb", 3000);

        con.set("mapred.child.java.opts", "-Xmx2900M");
        con.set("mapred.reduce.child.java.opts", "-Xmx8000M");

        con.setInt("mapred.job.reduce.memory.mb", 8120);
    } else {
        con.setInt("mapred.job.map.memory.mb", 2000);

        con.set("mapred.child.java.opts", "-Xmx1900M");
        con.set("mapred.reduce.child.java.opts", "-Xmx2900M");

        con.setInt("mapred.job.reduce.memory.mb", 3000);
    }
    con.setBoolean("mapred.compress.map.output", true);
    con.setStrings("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
    con.setBoolean("mapred.compress.output", true);
    con.setStrings("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
    con.setInt("mapred.task.timeout", 6000000);
    con.setInt("io.sort.factor", 50);
    con.setInt("mapreduce.map.tasks", 256);
    con.setInt("dfs.replication", 1);
    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(512);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation/*  w  w w . ja  v  a2s.  c o  m*/
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(AssocReducer.MIN_VALUE, minValue);
    conf.setInt("mapred.job.map.memory.mb", 1280);
    conf.setInt("mapred.job.reduce.memory.mb", 2560);
    conf.set("mapred.reduce.child.java.opts", "-Xmx2G");
    conf.setInt("mapred.task.timeout", 6000000);
    conf.set(AssocReducer.ASSOC_METRIC, "llr");

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr");
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
    job.setReducerClass(AssocReducer.class);
    job.setNumReduceTasks(reduceTasks);
    // Defines additional single text based output 'text' for the job
    MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class);

    // Defines additional multi sequencefile based output 'sequence' for the
    // job
    MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:edu.berkeley.chukwa_xtrace.TestXtrAdaptor.java

License:Apache License

public void testXtrAdaptor() throws IOException, ChukwaAgent.AlreadyRunningException, InterruptedException {
    Configuration conf = new Configuration();
    File baseDir = new File(System.getProperty("test.build.data", "/tmp"));
    conf.set("chukwaAgent.checkpoint.dir", baseDir.getCanonicalPath());
    conf.setBoolean("chukwaAgent.checkpoint.enabled", false);
    conf.set("chukwaAgent.control.port", "0");
    ChukwaAgent agent = new ChukwaAgent(conf);
    ChunkCatcherConnector chunks = new ChunkCatcherConnector();
    chunks.start();/*from w ww .ja v  a  2 s  .co m*/

    System.setProperty("xtrace.reporter", "edu.berkeley.xtrace.reporting.TcpReporter");
    System.setProperty("xtrace.tcpdest", "localhost:7831");

    assertEquals(0, agent.adaptorCount());
    agent.processAddCommand("add edu.berkeley.chukwa_xtrace.XtrAdaptor XTrace TcpReportSource 0");
    assertEquals(1, agent.adaptorCount());

    XTraceContext.startTrace("test", "testtrace", "atag");
    XTraceContext.logEvent("test", "label");
    Chunk c = chunks.waitForAChunk();
    String report = new String(c.getData());
    assertTrue(report.contains("Agent: test"));
    assertTrue(report.contains("Tag: atag"));
    System.out.println(report);
    System.out.println("-- next chunk --- ");

    c = chunks.waitForAChunk();
    report = new String(c.getData());
    assertTrue(report.contains("Agent: test"));
    assertTrue(report.contains("Label: label"));
    System.out.println(report);

    System.out.println("OK");
    agent.shutdown();
}

From source file:edu.indiana.d2i.htrc.io.DataAPISilvermapleConf.java

License:Apache License

@Override
public void configurate(Configuration conf, int maxIdsPerReq) {
    //      conf.setInt(HTRCConstants.MAX_ID_RETRIEVED, 100);
    conf.setInt(HTRCConstants.MAX_ID_RETRIEVED, maxIdsPerReq);

    conf.set(HTRCConstants.DATA_API_URL_DELIMITOR, "|");
    conf.set(HTRCConstants.DATA_API_CLIENTID, "drhtrc");
    conf.set(HTRCConstants.DATA_API_CLIENTSECRETE, "d0ct0r.htrc");
    conf.set(HTRCConstants.DATA_API_TOKENLOC,
            "https://silvermaple.pti.indiana.edu:25443/oauth2/token?grant_type=client_credentials");
    conf.setBoolean(HTRCConstants.DATA_API_SELFSIGNED, false);
    conf.set(HTRCConstants.HOSTS_SEPARATEDBY_COMMA, "silvermaple.pti.indiana.edu:25443");
}

From source file:edu.indiana.d2i.htrc.io.SparseVectorsToMemcached.java

License:Apache License

private void parallelTransform() throws IOException, ClassNotFoundException, InterruptedException {
    ////from   www  . ja v a  2  s.c om
    Job job = new Job(getConf(), "Create sparse vectors from HTRC data storage, store them in MemCached");
    job.setJarByClass(SparseVectorsToMemcached.class);

    Configuration conf = job.getConfiguration();
    setupConfiguration(conf);

    // no speculation
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    job.setInputFormatClass(IDInputFormat.class);
    job.setOutputFormatClass(MemCachedOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(SparseVectorUtil.Text2VectorMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(idListDir));

    job.waitForCompletion(true);
}

From source file:edu.indiana.d2i.htrc.io.SVFromHDFS2Memcached.java

License:Apache License

private void parallelTransform() throws IOException, ClassNotFoundException, InterruptedException {
    ////www.j a v  a  2 s .  c  o m
    Job job = new Job(getConf(), "Create sparse vectors from HTRC data storage, store them in MemCached");
    job.setJarByClass(SVFromHDFS2Memcached.class);

    Configuration conf = job.getConfiguration();
    setupConfiguration(conf);

    // no speculation
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(MemCachedOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(SparseVectorUtil.Text2VectorMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(vecDir));

    job.waitForCompletion(true);
}

From source file:edu.isi.mavuno.app.distsim.ContextToContext.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String contextPath = MavunoUtils.getRequiredParam("Mavuno.ContextToContext.ContextPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ContextToContext.CorpusClass", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ContextToContext.CorpusPath", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.ContextToContext.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.ContextToContext.ExtractorArgs", conf);
    int minMatches = Integer.parseInt(MavunoUtils.getRequiredParam("Mavuno.ContextToContext.MinMatches", conf));
    boolean harvestGlobalStats = Boolean
            .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.ContextToContext.GlobalStats", conf));
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.ContextToContext.OutputPath", conf);

    MavunoUtils.createDirectory(conf, outputPath);

    sLogger.info("Tool name: ContextToContext");
    sLogger.info(" - Context path: " + contextPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Output path: " + outputPath);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor arguments: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Harvest global stats: " + harvestGlobalStats);

    // context to pattern
    conf.set("Mavuno.ContextToPattern.ContextPath", contextPath);
    conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath);
    conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass);
    conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass);
    conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs);
    conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches);
    conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", harvestGlobalStats);
    conf.set("Mavuno.ContextToPattern.OutputPath", outputPath);
    new ContextToPattern(conf).run();

    // pattern to context
    conf.set("Mavuno.PatternToContext.PatternPath", outputPath + "/pattern-stats");
    conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath);
    conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass);
    conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass);
    conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs);
    conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches);
    conf.setBoolean("Mavuno.PatternToContext.GlobalStats", harvestGlobalStats);
    conf.set("Mavuno.PatternToContext.OutputPath", outputPath);
    new PatternToContext(conf).run();

    return 0;/*  w  w  w. j  a v a2 s  .co  m*/
}

From source file:edu.isi.mavuno.app.distsim.PatternToPattern.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String patternPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.PatternPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.CorpusClass", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.CorpusPath", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.ExtractorArgs", conf);
    int minMatches = Integer.parseInt(MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.MinMatches", conf));
    boolean harvestGlobalStats = Boolean
            .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.GlobalStats", conf));
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.OutputPath", conf);

    MavunoUtils.createDirectory(conf, outputPath);

    sLogger.info("Tool name: PatternToPattern");
    sLogger.info(" - Pattern path: " + patternPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Output path: " + outputPath);
    sLogger.info(" - Context class: " + extractorClass);
    sLogger.info(" - Context arguments: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Harvest global stats: " + harvestGlobalStats);

    // pattern to context
    conf.set("Mavuno.PatternToContext.PatternPath", patternPath);
    conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath);
    conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass);
    conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass);
    conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs);
    conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches);
    conf.setBoolean("Mavuno.PatternToContext.GlobalStats", harvestGlobalStats);
    conf.set("Mavuno.PatternToContext.OutputPath", outputPath);
    new PatternToContext(conf).run();

    // context to pattern
    conf.set("Mavuno.ContextToPattern.ContextPath", outputPath + "/context-stats");
    conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath);
    conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass);
    conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass);
    conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs);
    conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches);
    conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", harvestGlobalStats);
    conf.set("Mavuno.ContextToPattern.OutputPath", outputPath);
    new ContextToPattern(conf).run();

    return 0;//from www. j av  a2 s  . c  o  m
}

From source file:edu.isi.mavuno.app.ie.HarvestEspressoContexts.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String inputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.InputPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ExtractorArgs", conf);
    String scorerClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ScorerClass", conf);
    String scorerArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ScorerArgs", conf);
    int numPatterns = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.NumPatterns", conf));
    int minMatches = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.MinMatches", conf));
    String baseOutputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.OutputPath", conf);

    String numIterations = MavunoUtils.getOptionalParam("Mavuno.HarvestEspressoContexts.NumIterations", conf);
    int iterations = 1;
    if (numIterations != null) {
        iterations = Integer.parseInt(numIterations);
    }/*  w w w  . j av a2 s . co m*/

    MavunoUtils.createDirectory(conf, baseOutputPath);

    sLogger.info("Tool name: HarvestEspressoContexts");
    sLogger.info(" - Input path: " + inputPath);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Scorer class: " + scorerClass);
    sLogger.info(" - Scorer args: " + scorerArgs);
    sLogger.info(" - Number of patterns: " + numPatterns);
    sLogger.info(" - Minimum matches: " + minMatches);
    sLogger.info(" - Iterations: " + iterations);
    sLogger.info(" - Output path: " + baseOutputPath);

    // initial sub output path
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0");
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0/contexts-scored");

    // examples -> sequence file
    conf.set("Mavuno.ExamplesToSequenceFile.InputPath", inputPath);
    conf.set("Mavuno.ExamplesToSequenceFile.OutputPath",
            baseOutputPath + "/0/contexts-scored/scored-contexts-raw");
    new ExamplesToSequenceFile(conf).run();

    // iterate procedure
    for (int i = 1; i <= iterations; i++) {
        // previous output path (input to current iteration)
        String prevOutputPath = baseOutputPath + "/" + (i - 1);

        // current output path
        String curOutputPath = baseOutputPath + "/" + i;
        MavunoUtils.createDirectory(conf, curOutputPath);

        // seeds -> patterns
        conf.set("Mavuno.ContextToPattern.ContextPath",
                prevOutputPath + "/contexts-scored/scored-contexts-raw");
        conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath);
        conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass);
        conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass);
        conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches);
        conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", true);
        conf.set("Mavuno.ContextToPattern.OutputPath", curOutputPath + "/patterns");
        new ContextToPattern(conf).run();

        // score patterns
        conf.set("Mavuno.ComputePatternScores.InputPath", curOutputPath + "/patterns");
        conf.set("Mavuno.ComputePatternScores.ContextScorerClass", null);
        conf.set("Mavuno.ComputePatternScores.PatternScorerClass", scorerClass);
        conf.set("Mavuno.ComputePatternScores.PatternScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputePatternScores.OutputPath", curOutputPath + "/patterns-scored");
        new ComputePatternScores(conf).run();

        // only retain top-(k * i) patterns
        if (numPatterns >= 0) {
            conf.set("Mavuno.GetTopResults.InputPath", curOutputPath + "/patterns-scored/scored-patterns");
            conf.set("Mavuno.GetTopResults.OutputPath", curOutputPath + "/patterns-scored-top");
            conf.setInt("Mavuno.GetTopResults.NumResults", numPatterns * i);
            conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", true);
            new GetTopResults(conf).run();
        }

        // patterns -> contexts
        if (numPatterns >= 0) {
            conf.set("Mavuno.PatternToContext.PatternPath", curOutputPath + "/patterns-scored-top");
        } else {
            conf.set("Mavuno.PatternToContext.PatternPath",
                    curOutputPath + "/patterns-scored/scored-patterns-raw");
        }
        conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath);
        conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass);
        conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass);
        conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches);
        conf.setBoolean("Mavuno.PatternToContext.GlobalStats", true);
        conf.set("Mavuno.PatternToContext.OutputPath", curOutputPath + "/contexts");
        new PatternToContext(conf).run();

        // score contexts
        conf.set("Mavuno.ComputeContextScores.InputPath", curOutputPath + "/contexts");
        conf.set("Mavuno.ComputeContextScores.PatternScorerClass", null);
        conf.set("Mavuno.ComputeContextScores.ContextScorerClass", scorerClass);
        conf.set("Mavuno.ComputeContextScores.ContextScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputeContextScores.OutputPath", curOutputPath + "/contexts-scored");
        new ComputeContextScores(conf).run();

        // delete previous output path
        MavunoUtils.removeDirectory(conf, prevOutputPath);
    }

    return 0;
}

From source file:edu.isi.mavuno.app.ie.HarvestEspressoPatterns.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String inputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.InputPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ExtractorArgs", conf);
    String scorerClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ScorerClass", conf);
    String scorerArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ScorerArgs", conf);
    int numContexts = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.NumContexts", conf));
    int minMatches = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.MinMatches", conf));
    String baseOutputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.OutputPath", conf);

    String numIterations = MavunoUtils.getOptionalParam("Mavuno.HarvestEspressoPatterns.NumIterations", conf);
    int iterations = 1;
    if (numIterations != null) {
        iterations = Integer.parseInt(numIterations);
    }/*from w  w  w  .  ja va2 s . c  o m*/

    MavunoUtils.createDirectory(conf, baseOutputPath);

    sLogger.info("Tool name: HarvestEspressoPatterns");
    sLogger.info(" - Input path: " + inputPath);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Scorer class: " + scorerClass);
    sLogger.info(" - Scorer args: " + scorerArgs);
    sLogger.info(" - Number of contexts: " + numContexts);
    sLogger.info(" - Minimum matches: " + minMatches);
    sLogger.info(" - Iterations: " + iterations);
    sLogger.info(" - Output path: " + baseOutputPath);

    // initial sub output path
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0");
    MavunoUtils.createDirectory(conf, baseOutputPath + "/0/patterns-scored");

    // patterns -> sequence file
    conf.set("Mavuno.ExamplesToSequenceFile.InputPath", inputPath);
    conf.set("Mavuno.ExamplesToSequenceFile.OutputPath",
            baseOutputPath + "/0/patterns-scored/scored-patterns-raw");
    new ExamplesToSequenceFile(conf).run();

    // iterate procedure
    for (int i = 1; i <= iterations; i++) {
        // previous output path (input to current iteration)
        String prevOutputPath = baseOutputPath + "/" + (i - 1);

        // current output path
        String curOutputPath = baseOutputPath + "/" + i;
        MavunoUtils.createDirectory(conf, curOutputPath);

        // seeds -> contexts
        conf.set("Mavuno.PatternToContext.PatternPath",
                prevOutputPath + "/patterns-scored/scored-patterns-raw");
        conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath);
        conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass);
        conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass);
        conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches);
        conf.setBoolean("Mavuno.PatternToContext.GlobalStats", true);
        conf.set("Mavuno.PatternToContext.OutputPath", curOutputPath + "/contexts");
        new PatternToContext(conf).run();

        // score contexts
        conf.set("Mavuno.ComputeContextScores.InputPath", curOutputPath + "/contexts");
        conf.set("Mavuno.ComputeContextScores.PatternScorerClass", null);
        conf.set("Mavuno.ComputeContextScores.ContextScorerClass", scorerClass);
        conf.set("Mavuno.ComputeContextScores.ContextScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputeContextScores.OutputPath", curOutputPath + "/contexts-scored");
        new ComputeContextScores(conf).run();

        // only retain top-(k * i) contexts
        if (numContexts >= 0) {
            conf.set("Mavuno.GetTopResults.InputPath", curOutputPath + "/contexts-scored/scored-contexts");
            conf.set("Mavuno.GetTopResults.OutputPath", curOutputPath + "/contexts-scored-top");
            conf.setInt("Mavuno.GetTopResults.NumResults", numContexts * i);
            conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", true);
            new GetTopResults(conf).run();
        }

        // contexts -> patterns
        if (numContexts >= 0) {
            conf.set("Mavuno.ContextToPattern.ContextPath", curOutputPath + "/contexts-scored-top");
        } else {
            conf.set("Mavuno.ContextToPattern.ContextPath",
                    curOutputPath + "/contexts-scored/scored-contexts-raw");
        }
        conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath);
        conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass);
        conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass);
        conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs);
        conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches);
        conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", true);
        conf.set("Mavuno.ContextToPattern.OutputPath", curOutputPath + "/patterns");
        new ContextToPattern(conf).run();

        // score patterns
        conf.set("Mavuno.ComputePatternScores.InputPath", curOutputPath + "/patterns");
        conf.set("Mavuno.ComputePatternScores.ContextScorerClass", null);
        conf.set("Mavuno.ComputePatternScores.PatternScorerClass", scorerClass);
        conf.set("Mavuno.ComputePatternScores.PatternScorerArgs", scorerArgs);
        conf.set("Mavuno.ComputePatternScores.OutputPath", curOutputPath + "/patterns-scored");
        new ComputePatternScores(conf).run();

        // delete previous output path
        MavunoUtils.removeDirectory(conf, prevOutputPath);
    }

    return 0;
}