Example usage for org.apache.hadoop.conf Configuration addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration addResource.

Prototype

public void addResource(Configuration conf)

Source Link

Document

Add a configuration resource.

Usage

From source file:MapReduce.TweetCountPerState.java

public static void main(String[] args) throws Exception {
    Configuration conf = new HBaseConfiguration();
    conf.addResource(TweetUtils.HBASE_CONF);
    Job job = Job.getInstance(conf, "Tweets Per State");
    job.setJarByClass(TweetCountPerState.class);

    Scan sc = new Scan();
    sc.setCaching(500);/*from www . j av a 2s  .com*/
    sc.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob("tweetdata", // input table
            sc, // Scan instance to control CF and attribute selection
            MapClass.class, // mapper class
            Text.class, // mapper output key
            LongWritable.class, // mapper output value
            job);

    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String dest = TweetUtils.OUTPUT_PREFIX + "TweetsPerState";
    if (args.length > 0) {
        dest = args[0];
    }
    File destination = new File(dest);
    FileUtil.fullyDelete(destination);
    FileOutputFormat.setOutputPath(job, new Path(dest));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:MapReduce.UsersPerTimeZone.java

public static void main(String[] args) throws Exception {
    Configuration conf = new HBaseConfiguration();
    conf.addResource(TweetUtils.HBASE_CONF);
    Job job = Job.getInstance(conf, "Users Per Time Zone");
    job.setJarByClass(UsersPerTimeZone.class);

    Scan sc = new Scan();
    sc.setCaching(500);/*from  w  w w .ja  v a 2  s  .  co  m*/
    sc.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob("twitteruser", // input table
            sc, // Scan instance to control CF and attribute selection
            MapClass.class, // mapper class
            Text.class, // mapper output key
            LongWritable.class, // mapper output value
            job);

    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String dest = TweetUtils.OUTPUT_PREFIX + "UsersPerTimeZone";
    if (args.length > 0) {
        dest = args[0];
    }
    File destination = new File(dest);
    FileUtil.fullyDelete(destination);
    FileOutputFormat.setOutputPath(job, new Path(dest));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java

License:Apache License

/**
 * Modify configuration according user-specified generic options
 * //  w  w w  .j  a  v  a2  s .c  o  m
 * @param conf
 *            Configuration to be modified
 * @param line
 *            User-specified generic options
 */
private void processGeneralOptions(Configuration conf, CommandLine line) throws IOException {
    if (line.hasOption("fs")) {
        FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
    }

    if (line.hasOption("jt")) {
        conf.set("mapred.job.tracker", line.getOptionValue("jt"));
    }
    if (line.hasOption("conf")) {
        String[] values = line.getOptionValues("conf");
        for (String value : values) {
            conf.addResource(new Path(value));
        }
    }
    if (line.hasOption("libjars")) {
        conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf));
        // setting libjars in client classpath
        URL[] libjars = getLibJars(conf);
        if (libjars != null && libjars.length > 0) {
            conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
            Thread.currentThread().setContextClassLoader(
                    new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
        }
    }
    if (line.hasOption("files")) {
        conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf));
    }
    if (line.hasOption("archives")) {
        conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf));
    }
    if (line.hasOption('D')) {
        String[] property = line.getOptionValues('D');
        for (String prop : property) {
            String[] keyval = prop.split("=", 2);
            if (keyval.length == 2) {
                conf.set(keyval[0], keyval[1]);
            }
        }
    }
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    // tokensFile
    if (line.hasOption("tokenCacheFile")) {
        String fileName = line.getOptionValue("tokenCacheFile");
        // check if the local file exists
        try {
            FileSystem localFs = FileSystem.getLocal(conf);
            Path p = new Path(fileName);
            if (!localFs.exists(p)) {
                throw new FileNotFoundException("File " + fileName + " does not exist.");
            }

            LOG.debug("setting conf tokensFile: {}", fileName);
            conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:Model.SparkLDA.java

public void fit(JavaRDD<Vector> parsedData) {
    try {//w  w  w.ja  va  2 s . c  o m
        System.out
                .println("===================================================================================");

        JavaPairRDD<Long, Vector> corpus = JavaPairRDD.fromJavaRDD(parsedData.zipWithIndex().map(Tuple2::swap));
        corpus.cache();
        OnlineLDAOptimizer olda = new OnlineLDAOptimizer();
        olda.setMiniBatchFraction(0.1);
        LDAModel ldaModel = new LDA().setOptimizer(olda).setK(topicCount).run(corpus);

        long end = System.currentTimeMillis();
        Configuration config = new Configuration();
        config.addResource(new Path("/usr/local/hadoop/conf/core-site.xml"));
        config.addResource(new Path("/usr/local/hadoop/conf/hdfs-site.xml"));
        FileSystem fs = FileSystem.get(config);
        Path outputPath = new Path("output" + outputFile);
        if (fs.exists(outputPath))
            fs.delete(outputPath);
        OutputStream os = fs.create(outputPath);
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(os));
        String line;
        //        File file = new File(Document.ProjectConstants.LdaSrcDirectory+"/output/output"+outputFile);
        //        file.createNewFile();
        //        BufferedWriter bw = new BufferedWriter(new FileWriter(file));

        Tuple2<int[], double[]>[] to = ldaModel.describeTopics(termPerTopic);
        for (int i = 0; i < to.length; i++) {
            int[] term = to[i]._1();
            double[] wght = to[i]._2();
            for (int j = 0; j < term.length; j++) {
                bw.write(Document.LdaPreprocessor.invDictionary.get(term[j]) + " " + wght[j] + "\n");
            }
            bw.write("\n\n");
        }
        bw.close();
    } catch (IOException ex) {
        Logger.getLogger(SparkLDA.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:my.mahout.AbstractJob.java

License:Apache License

/**
 * Overrides the base implementation to install the Oozie action configuration resource
 * into the provided Configuration object; note that ToolRunner calls setConf on the Tool
 * before it invokes run.//from  w  w  w  .j  ava2 s .  c o  m
 */
@Override
public void setConf(Configuration conf) {
    super.setConf(conf);

    // If running in an Oozie workflow as a Java action, need to add the
    // Configuration resource provided by Oozie to this job's config.
    String oozieActionConfXml = System.getProperty("oozie.action.conf.xml");
    if (oozieActionConfXml != null && conf != null) {
        conf.addResource(new Path("file:///", oozieActionConfXml));
        log.info("Added Oozie action Configuration resource {} to the Hadoop Configuration",
                oozieActionConfXml);
    }
}

From source file:net.broomie.JpWordCounter.java

License:Apache License

/**
 * This method is in order to run the WordCoCount process.
 * @param args Specify the arguments from command line.
 * @return Return 0 if success, return 1 if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 *//*from ww  w .j  a  v a  2s.c  om*/
public int run(final String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = getConf();
    conf.addResource(LIB_NAKAMEGURO_CONF);
    procArgs(args);
    String dfdb = conf.get(PROP_DFDB);
    if (modeName.equals("df")) {
        try {
            runCreateDFDB(conf, dfdb);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }

    } else if (modeName.equals("count")) {
        if (tfidfFlag) {
            try {
                DistributedCache.addCacheFile(new URI(dfdb), conf);
            } catch (URISyntaxException e) {
                e.printStackTrace();
            }
        }
        runWordCoCount(conf);
    }

    return 0;
}

From source file:net.broomie.reducer.CoCounteReducer.java

License:Apache License

/**
 * The setup method for TokenizeReducer.
 * This method will run before reduce phase.
 * @param context Specify the hadoop Context object.
 *///from  w w  w . ja va  2s .c  o  m
@Override
public final void setup(Context context) {
    Configuration conf = context.getConfiguration();
    String resourcePath = conf.get(LIB_NAKAMEGURO_CONF);
    conf.addResource(resourcePath);
}

From source file:net.broomie.reducer.CoCounteReducerMI.java

License:Apache License

/**
 * The setup method for TokenizeReducer.
 * This method will run before reduce phase.
 * @param context Specify the hadoop Context object.
 *//* w w w .java2 s  . c o  m*/
@Override
public final void setup(Context context) {
    Configuration conf = context.getConfiguration();
    String resourcePath = conf.get(LIB_NAKAMEGURO_CONF);
    conf.addResource(resourcePath);
    try {
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
        if (cacheFiles != null) {
            for (Path cachePath : cacheFiles) {
                loadCacheFile(cachePath, context);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:net.broomie.WordCoCounter.java

License:Apache License

/**
 * This method is in order to run the WordCoCount process.
 * @param args Specify the arguments from command line.
 * @return Return 0 if success, return 1 if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 *///from  w w  w .j  a v a  2  s.  com
public int run(final String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = getConf();
    conf.addResource(LIB_NAKAMEGURO_CONF);
    procArgs(args);
    String dfdb = conf.get(PROP_DFDB);
    if (tfidfFlag) {
        try {
            runWordCount(conf, dfdb);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
        try {
            DistributedCache.addCacheFile(new URI(dfdb), conf);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }
    runWordCoCount(conf);
    return 0;
}

From source file:net.broomie.WordCounter.java

License:Apache License

/**
 * This method is in order to run the WordCount process.
 * @param args Specify the arguments from command line.
 * @return Return 0 if success, return 1 if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 *///from  w  ww.  j a va 2 s.  c  o m
public int run(final String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    procArgs(args);
    if (in == null || out == null) {
        printUsage();
    }
    Configuration conf = getConf();
    conf.addResource(LIB_NAKAMEGURO_CONF);
    boolean rvBuf = runWordCount(conf);
    int rv = rvBuf ? 0 : 1;
    return rv;
}