Example usage for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:com.ambiata.ivory.operation.hadoop.MultipleInputs.java

License:Apache License

/**
 * Add a {@link Path} with a custom {@link InputFormat} and
 * {@link Mapper} to the list of inputs for the map-reduce job.
 *
 * @param job The {@link Job}/*from w w  w  .  ja v  a 2 s . c  o  m*/
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 * @param mapperClass {@link Mapper} class to use for this path
 */
@SuppressWarnings("unchecked")
public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass,
        Class<? extends Mapper> mapperClass) {

    addInputPath(job, path, inputFormatClass);
    Configuration conf = job.getConfiguration();
    /* WAS not encoded */
    String mapperMapping = encode(path.toString() + ";" + mapperClass.getName());
    String mappers = conf.get(DIR_MAPPERS);
    conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping);

    job.setMapperClass(DelegatingMapper.class);
}

From source file:com.ambimmort.webos.plugins.vfs4hdfs.HdfsFileSystem.java

License:Apache License

/**
 * @see org.apache.commons.vfs2.provider.AbstractFileSystem#resolveFile(org.apache.commons.vfs2.FileName)
 *//*from  w  w w . ja  v  a2 s .  c  om*/
@Override
public FileObject resolveFile(final FileName name) throws FileSystemException {

    synchronized (this) {
        if (null == this.fs) {
            final String hdfsUri = name.getRootURI();
            final Configuration conf = new Configuration(true);
            conf.set(FileSystem.FS_DEFAULT_NAME_KEY, hdfsUri);
            this.fs = null;
            try {
                fs = FileSystem.get(conf);
            } catch (final IOException e) {
                log.error("Error connecting to filesystem " + hdfsUri, e);
                throw new FileSystemException("Error connecting to filesystem " + hdfsUri, e);
            }
        }
    }

    final boolean useCache = null != getContext().getFileSystemManager().getFilesCache();
    FileObject file;
    if (useCache) {
        file = this.getFileFromCache(name);
    } else {
        file = null;
    }
    if (null == file) {
        String path = null;
        try {
            path = URLDecoder.decode(name.getPath(), "UTF-8");
        } catch (final UnsupportedEncodingException e) {
            path = name.getPath();
        }
        final Path filePath = new Path(path);
        file = new HdfsFileObject((AbstractFileName) name, this, fs, filePath);
        if (useCache) {
            this.putFileToCache(file);
        }
    }
    /**
     * resync the file information if requested
     */
    if (getFileSystemManager().getCacheStrategy().equals(CacheStrategy.ON_RESOLVE)) {
        file.refresh();
    }
    return file;
}

From source file:com.anhth12.lambda.BatchUpdateFunction.java

@Override
public Void call(JavaPairRDD<K, M> newData, Time timestamp) throws Exception {
    if (newData.take(1).isEmpty()) {
        log.info("No data in current generation's RDD; nothing to do");
        return null;
    }/*from w  w  w . j  a  v  a 2s  .  co  m*/

    log.info("Beginning update at {}", timestamp);

    Configuration hadoopConf = sparkContext.hadoopConfiguration();

    JavaPairRDD<K, M> pastData;
    Path inputPathPattern = new Path(dataDirString + "/*/part-*");
    FileSystem fs = FileSystem.get(hadoopConf);
    FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern);

    if (inputPathStatuses == null || inputPathStatuses.length == 0) {
        log.info("No past data at path(s) {}", inputPathPattern);
        pastData = null;
    } else {
        log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath());
        Configuration updatedConf = new Configuration(hadoopConf);
        updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses));
        JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext
                .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass,
                        messageWritableClass);
        pastData = pastWriteableData.mapToPair(
                new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass));

    }

    try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) {
        updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newData, pastData, modelDirString,
                producer);
    }
    return null;
}

From source file:com.anhth12.lambda.BatchUpdateFunction2.java

@Override
public Void call(JavaRDD<MessageAndMetadata> newData, Time timestamp) throws Exception {
    if (newData.take(1).isEmpty()) {
        log.info("No data in current generation's RDD; nothing to do");
        return null;
    }/*from  ww  w  .j a  va  2  s.  co  m*/

    log.info("Beginning update at {}", timestamp);

    JavaPairRDD<K, M> newDataKM = newData.mapToPair(new PairFunction<MessageAndMetadata, K, M>() {

        @Override
        public Tuple2<K, M> call(MessageAndMetadata t) throws Exception {

            return (Tuple2<K, M>) new Tuple2<>(new String(t.getKey()), new String(t.getPayload()));
        }
    });

    Configuration hadoopConf = sparkContext.hadoopConfiguration();

    JavaPairRDD<K, M> pastData;
    Path inputPathPattern = new Path(dataDirString + "/*/part-*");
    FileSystem fs = FileSystem.get(hadoopConf);
    FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern);

    if (inputPathStatuses == null || inputPathStatuses.length == 0) {
        log.info("No past data at path(s) {}", inputPathPattern);
        pastData = null;
    } else {
        log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath());
        Configuration updatedConf = new Configuration(hadoopConf);
        updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses));
        JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext
                .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass,
                        messageWritableClass);
        pastData = pastWriteableData.mapToPair(
                new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass));

    }
    try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) {
        updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newDataKM, pastData, modelDirString,
                producer);
    }
    return null;

}

From source file:com.antbrains.crf.hadoop.FeatureCounter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: wordcount <in> <out> <templatefile>");
        System.exit(2);//  w  w w  . j a  va 2s. c o m
    }

    String[] templates = SgdCrf.readTemplates(otherArgs[2]).toArray(new String[0]);
    conf.set("templates", strArr2Str(templates));

    Job job = new Job(conf, FeatureCounter.class.getSimpleName());

    job.setJarByClass(FeatureCounter.class);
    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(SumReducer.class);
    job.setReducerClass(SumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.FeatureFilter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("Usage: wordcount <in> <out> filterRuleFile statOnly");
        System.exit(-1);//from  w ww  . ja  v a  2 s .c o m
    }

    boolean statOnly = true;
    if (otherArgs[3].equalsIgnoreCase("false")) {
        statOnly = false;
    }
    conf.set("statOnly", statOnly + "");

    String rules = FileTools.readFile(otherArgs[2], "UTF8");
    conf.set("rules", rules);
    conf.set("mapred.reduce.tasks", "0");
    Job job = new Job(conf, FeatureFilter.class.getSimpleName());

    job.setJarByClass(FeatureFilter.class);
    job.setMapperClass(CounterMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.InstanceGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("InstanceGenerator <in> <out> <featuredict> <template>");
        System.exit(-1);//from   w  w  w  .ja  v a2 s .  com
    }
    Template template = new Template(otherArgs[3], "UTF8");
    conf.set("template", object2String(template));
    // conf.set("tc", object2String(tc));

    DistributedCache.addCacheFile(new URI(otherArgs[2]), conf);
    conf.set("dict", otherArgs[2]);
    conf.set("mapred.reduce.tasks", "0");
    Job job = new Job(conf, InstanceGenerator.class.getSimpleName());

    job.setJarByClass(InstanceGenerator.class);
    job.setMapperClass(CounterMapper.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.ParallelTraining.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("ParallelTraining <instanceDir> <outDir> <featurecount> <training-params>");
        System.exit(-1);// www  .  j  a  va  2 s  .co m
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));

    conf.set("pt.iterate", "1");
    conf.set("pt.featureCount", featureCount + "");

    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));
    conf.set("pt.params", object2String(params));

    Job job = new Job(conf, ParallelTraining.class.getSimpleName());

    job.setJarByClass(ParallelTraining.class);
    job.setMapperClass(TrainingMapper.class);
    job.setReducerClass(TrainingReducer.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(TrainingWeights.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.ParallelTraining2.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));

    if (otherArgs.length != 5) {
        System.err.println(/*w  w w  .j  a v a2 s  .  c om*/
                "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>");
        System.exit(-1);
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));
    int outIter = Integer.valueOf(otherArgs[4]);

    String prevOutDir = "";
    for (int i = 1; i <= outIter; i++) {
        System.out.println("iterator: " + i);
        conf.set("pt.iterate", i + "");
        conf.set("pt.featureCount", featureCount + "");

        conf.set("pt.params", object2String(params));
        String outDir = otherArgs[1] + "/result" + i;

        if (i > 1) {
            conf.set("paramDir", prevOutDir);
        }
        prevOutDir = outDir;
        fs.delete(new Path(outDir), true);

        Job job = new Job(conf, ParallelTraining2.class.getSimpleName());

        job.setJarByClass(ParallelTraining2.class);
        job.setMapperClass(TrainingMapper.class);
        job.setReducerClass(TrainingReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        System.out.println("outDir: " + outDir);
        FileOutputFormat.setOutputPath(job, new Path(outDir));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("iter " + i + " failed");
            break;
        }
    }
}

From source file:com.antsdb.saltedfish.storage.HBaseUtilMain.java

License:Open Source License

private void connect(String zkserver) throws IOException {
    println("Connecting to server %s ...", zkserver);
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", zkserver);
    this.conn = ConnectionFactory.createConnection(conf);
    println("hbase connected - " + zkserver + "\n");
}