Example usage for org.apache.hadoop.conf Configuration addResource

List of usage examples for org.apache.hadoop.conf Configuration addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration addResource.

Prototype

public void addResource(Configuration conf) 

Source Link

Document

Add a configuration resource.

Usage

From source file:MapReduce.TweetCountPerState.java

public static void main(String[] args) throws Exception {
    Configuration conf = new HBaseConfiguration();
    conf.addResource(TweetUtils.HBASE_CONF);
    Job job = Job.getInstance(conf, "Tweets Per State");
    job.setJarByClass(TweetCountPerState.class);

    Scan sc = new Scan();
    sc.setCaching(500);/*from www . j av a 2s  .com*/
    sc.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob("tweetdata", // input table
            sc, // Scan instance to control CF and attribute selection
            MapClass.class, // mapper class
            Text.class, // mapper output key
            LongWritable.class, // mapper output value
            job);

    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String dest = TweetUtils.OUTPUT_PREFIX + "TweetsPerState";
    if (args.length > 0) {
        dest = args[0];
    }
    File destination = new File(dest);
    FileUtil.fullyDelete(destination);
    FileOutputFormat.setOutputPath(job, new Path(dest));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:MapReduce.UsersPerTimeZone.java

public static void main(String[] args) throws Exception {
    Configuration conf = new HBaseConfiguration();
    conf.addResource(TweetUtils.HBASE_CONF);
    Job job = Job.getInstance(conf, "Users Per Time Zone");
    job.setJarByClass(UsersPerTimeZone.class);

    Scan sc = new Scan();
    sc.setCaching(500);/*from  w  w w .ja  v a 2  s  .  co  m*/
    sc.setCacheBlocks(false);

    TableMapReduceUtil.initTableMapperJob("twitteruser", // input table
            sc, // Scan instance to control CF and attribute selection
            MapClass.class, // mapper class
            Text.class, // mapper output key
            LongWritable.class, // mapper output value
            job);

    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String dest = TweetUtils.OUTPUT_PREFIX + "UsersPerTimeZone";
    if (args.length > 0) {
        dest = args[0];
    }
    File destination = new File(dest);
    FileUtil.fullyDelete(destination);
    FileOutputFormat.setOutputPath(job, new Path(dest));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java

License:Apache License

/**
 * Modify configuration according user-specified generic options
 * //  w  w w  .j  a  v  a2  s .c  o  m
 * @param conf
 *            Configuration to be modified
 * @param line
 *            User-specified generic options
 */
private void processGeneralOptions(Configuration conf, CommandLine line) throws IOException {
    if (line.hasOption("fs")) {
        FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
    }

    if (line.hasOption("jt")) {
        conf.set("mapred.job.tracker", line.getOptionValue("jt"));
    }
    if (line.hasOption("conf")) {
        String[] values = line.getOptionValues("conf");
        for (String value : values) {
            conf.addResource(new Path(value));
        }
    }
    if (line.hasOption("libjars")) {
        conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf));
        // setting libjars in client classpath
        URL[] libjars = getLibJars(conf);
        if (libjars != null && libjars.length > 0) {
            conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
            Thread.currentThread().setContextClassLoader(
                    new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
        }
    }
    if (line.hasOption("files")) {
        conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf));
    }
    if (line.hasOption("archives")) {
        conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf));
    }
    if (line.hasOption('D')) {
        String[] property = line.getOptionValues('D');
        for (String prop : property) {
            String[] keyval = prop.split("=", 2);
            if (keyval.length == 2) {
                conf.set(keyval[0], keyval[1]);
            }
        }
    }
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    // tokensFile
    if (line.hasOption("tokenCacheFile")) {
        String fileName = line.getOptionValue("tokenCacheFile");
        // check if the local file exists
        try {
            FileSystem localFs = FileSystem.getLocal(conf);
            Path p = new Path(fileName);
            if (!localFs.exists(p)) {
                throw new FileNotFoundException("File " + fileName + " does not exist.");
            }

            LOG.debug("setting conf tokensFile: {}", fileName);
            conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:Model.SparkLDA.java

public void fit(JavaRDD<Vector> parsedData) {
    try {//w  w  w.ja  va  2 s . c  o m
        System.out
                .println("===================================================================================");

        JavaPairRDD<Long, Vector> corpus = JavaPairRDD.fromJavaRDD(parsedData.zipWithIndex().map(Tuple2::swap));
        corpus.cache();
        OnlineLDAOptimizer olda = new OnlineLDAOptimizer();
        olda.setMiniBatchFraction(0.1);
        LDAModel ldaModel = new LDA().setOptimizer(olda).setK(topicCount).run(corpus);

        long end = System.currentTimeMillis();
        Configuration config = new Configuration();
        config.addResource(new Path("/usr/local/hadoop/conf/core-site.xml"));
        config.addResource(new Path("/usr/local/hadoop/conf/hdfs-site.xml"));
        FileSystem fs = FileSystem.get(config);
        Path outputPath = new Path("output" + outputFile);
        if (fs.exists(outputPath))
            fs.delete(outputPath);
        OutputStream os = fs.create(outputPath);
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(os));
        String line;
        //        File file = new File(Document.ProjectConstants.LdaSrcDirectory+"/output/output"+outputFile);
        //        file.createNewFile();
        //        BufferedWriter bw = new BufferedWriter(new FileWriter(file));

        Tuple2<int[], double[]>[] to = ldaModel.describeTopics(termPerTopic);
        for (int i = 0; i < to.length; i++) {
            int[] term = to[i]._1();
            double[] wght = to[i]._2();
            for (int j = 0; j < term.length; j++) {
                bw.write(Document.LdaPreprocessor.invDictionary.get(term[j]) + " " + wght[j] + "\n");
            }
            bw.write("\n\n");
        }
        bw.close();
    } catch (IOException ex) {
        Logger.getLogger(SparkLDA.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:my.mahout.AbstractJob.java

License:Apache License

/**
 * Overrides the base implementation to install the Oozie action configuration resource
 * into the provided Configuration object; note that ToolRunner calls setConf on the Tool
 * before it invokes run.//from  w  w  w  .j  ava2 s .  c o  m
 */
@Override
public void setConf(Configuration conf) {
    super.setConf(conf);

    // If running in an Oozie workflow as a Java action, need to add the
    // Configuration resource provided by Oozie to this job's config.
    String oozieActionConfXml = System.getProperty("oozie.action.conf.xml");
    if (oozieActionConfXml != null && conf != null) {
        conf.addResource(new Path("file:///", oozieActionConfXml));
        log.info("Added Oozie action Configuration resource {} to the Hadoop Configuration",
                oozieActionConfXml);
    }
}

From source file:net.broomie.JpWordCounter.java

License:Apache License

/**
 * This method is in order to run the WordCoCount process.
 * @param args Specify the arguments from command line.
 * @return Return 0 if success, return 1 if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 *//*from ww  w .j  a  v a  2s.c  om*/
public int run(final String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = getConf();
    conf.addResource(LIB_NAKAMEGURO_CONF);
    procArgs(args);
    String dfdb = conf.get(PROP_DFDB);
    if (modeName.equals("df")) {
        try {
            runCreateDFDB(conf, dfdb);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }

    } else if (modeName.equals("count")) {
        if (tfidfFlag) {
            try {
                DistributedCache.addCacheFile(new URI(dfdb), conf);
            } catch (URISyntaxException e) {
                e.printStackTrace();
            }
        }
        runWordCoCount(conf);
    }

    return 0;
}

From source file:net.broomie.reducer.CoCounteReducer.java

License:Apache License

/**
 * The setup method for TokenizeReducer.
 * This method will run before reduce phase.
 * @param context Specify the hadoop Context object.
 *///from  w w  w . ja va  2s .c  o  m
@Override
public final void setup(Context context) {
    Configuration conf = context.getConfiguration();
    String resourcePath = conf.get(LIB_NAKAMEGURO_CONF);
    conf.addResource(resourcePath);
}

From source file:net.broomie.reducer.CoCounteReducerMI.java

License:Apache License

/**
 * The setup method for TokenizeReducer.
 * This method will run before reduce phase.
 * @param context Specify the hadoop Context object.
 *//* w w w .java2 s  . c o  m*/
@Override
public final void setup(Context context) {
    Configuration conf = context.getConfiguration();
    String resourcePath = conf.get(LIB_NAKAMEGURO_CONF);
    conf.addResource(resourcePath);
    try {
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
        if (cacheFiles != null) {
            for (Path cachePath : cacheFiles) {
                loadCacheFile(cachePath, context);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:net.broomie.WordCoCounter.java

License:Apache License

/**
 * This method is in order to run the WordCoCount process.
 * @param args Specify the arguments from command line.
 * @return Return 0 if success, return 1 if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 *///from  w w  w .j  a v a  2  s.  com
public int run(final String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = getConf();
    conf.addResource(LIB_NAKAMEGURO_CONF);
    procArgs(args);
    String dfdb = conf.get(PROP_DFDB);
    if (tfidfFlag) {
        try {
            runWordCount(conf, dfdb);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
        try {
            DistributedCache.addCacheFile(new URI(dfdb), conf);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }
    runWordCoCount(conf);
    return 0;
}

From source file:net.broomie.WordCounter.java

License:Apache License

/**
 * This method is in order to run the WordCount process.
 * @param args Specify the arguments from command line.
 * @return Return 0 if success, return 1 if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 *///from  w  ww.  j a va 2 s.  c  o m
public int run(final String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    procArgs(args);
    if (in == null || out == null) {
        printUsage();
    }
    Configuration conf = getConf();
    conf.addResource(LIB_NAKAMEGURO_CONF);
    boolean rvBuf = runWordCount(conf);
    int rv = rvBuf ? 0 : 1;
    return rv;
}