List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:com.ambiata.ivory.operation.hadoop.MultipleInputs.java
License:Apache License
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job}/*from w w w . ja v a 2 s . c o m*/ * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); /* WAS not encoded */ String mapperMapping = encode(path.toString() + ";" + mapperClass.getName()); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
From source file:com.ambimmort.webos.plugins.vfs4hdfs.HdfsFileSystem.java
License:Apache License
/** * @see org.apache.commons.vfs2.provider.AbstractFileSystem#resolveFile(org.apache.commons.vfs2.FileName) *//*from w w w . ja v a2 s . c om*/ @Override public FileObject resolveFile(final FileName name) throws FileSystemException { synchronized (this) { if (null == this.fs) { final String hdfsUri = name.getRootURI(); final Configuration conf = new Configuration(true); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, hdfsUri); this.fs = null; try { fs = FileSystem.get(conf); } catch (final IOException e) { log.error("Error connecting to filesystem " + hdfsUri, e); throw new FileSystemException("Error connecting to filesystem " + hdfsUri, e); } } } final boolean useCache = null != getContext().getFileSystemManager().getFilesCache(); FileObject file; if (useCache) { file = this.getFileFromCache(name); } else { file = null; } if (null == file) { String path = null; try { path = URLDecoder.decode(name.getPath(), "UTF-8"); } catch (final UnsupportedEncodingException e) { path = name.getPath(); } final Path filePath = new Path(path); file = new HdfsFileObject((AbstractFileName) name, this, fs, filePath); if (useCache) { this.putFileToCache(file); } } /** * resync the file information if requested */ if (getFileSystemManager().getCacheStrategy().equals(CacheStrategy.ON_RESOLVE)) { file.refresh(); } return file; }
From source file:com.anhth12.lambda.BatchUpdateFunction.java
@Override public Void call(JavaPairRDD<K, M> newData, Time timestamp) throws Exception { if (newData.take(1).isEmpty()) { log.info("No data in current generation's RDD; nothing to do"); return null; }/*from w w w . j a v a 2s . co m*/ log.info("Beginning update at {}", timestamp); Configuration hadoopConf = sparkContext.hadoopConfiguration(); JavaPairRDD<K, M> pastData; Path inputPathPattern = new Path(dataDirString + "/*/part-*"); FileSystem fs = FileSystem.get(hadoopConf); FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern); if (inputPathStatuses == null || inputPathStatuses.length == 0) { log.info("No past data at path(s) {}", inputPathPattern); pastData = null; } else { log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath()); Configuration updatedConf = new Configuration(hadoopConf); updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses)); JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass, messageWritableClass); pastData = pastWriteableData.mapToPair( new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass)); } try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) { updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newData, pastData, modelDirString, producer); } return null; }
From source file:com.anhth12.lambda.BatchUpdateFunction2.java
@Override public Void call(JavaRDD<MessageAndMetadata> newData, Time timestamp) throws Exception { if (newData.take(1).isEmpty()) { log.info("No data in current generation's RDD; nothing to do"); return null; }/*from ww w .j a va 2 s. co m*/ log.info("Beginning update at {}", timestamp); JavaPairRDD<K, M> newDataKM = newData.mapToPair(new PairFunction<MessageAndMetadata, K, M>() { @Override public Tuple2<K, M> call(MessageAndMetadata t) throws Exception { return (Tuple2<K, M>) new Tuple2<>(new String(t.getKey()), new String(t.getPayload())); } }); Configuration hadoopConf = sparkContext.hadoopConfiguration(); JavaPairRDD<K, M> pastData; Path inputPathPattern = new Path(dataDirString + "/*/part-*"); FileSystem fs = FileSystem.get(hadoopConf); FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern); if (inputPathStatuses == null || inputPathStatuses.length == 0) { log.info("No past data at path(s) {}", inputPathPattern); pastData = null; } else { log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath()); Configuration updatedConf = new Configuration(hadoopConf); updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses)); JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass, messageWritableClass); pastData = pastWriteableData.mapToPair( new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass)); } try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) { updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newDataKM, pastData, modelDirString, producer); } return null; }
From source file:com.antbrains.crf.hadoop.FeatureCounter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: wordcount <in> <out> <templatefile>"); System.exit(2);// w w w . j a va 2s. c o m } String[] templates = SgdCrf.readTemplates(otherArgs[2]).toArray(new String[0]); conf.set("templates", strArr2Str(templates)); Job job = new Job(conf, FeatureCounter.class.getSimpleName()); job.setJarByClass(FeatureCounter.class); job.setMapperClass(CounterMapper.class); job.setCombinerClass(SumReducer.class); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.FeatureFilter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 4) { System.err.println("Usage: wordcount <in> <out> filterRuleFile statOnly"); System.exit(-1);//from w ww . ja v a 2 s .c o m } boolean statOnly = true; if (otherArgs[3].equalsIgnoreCase("false")) { statOnly = false; } conf.set("statOnly", statOnly + ""); String rules = FileTools.readFile(otherArgs[2], "UTF8"); conf.set("rules", rules); conf.set("mapred.reduce.tasks", "0"); Job job = new Job(conf, FeatureFilter.class.getSimpleName()); job.setJarByClass(FeatureFilter.class); job.setMapperClass(CounterMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.InstanceGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 4) { System.err.println("InstanceGenerator <in> <out> <featuredict> <template>"); System.exit(-1);//from w w w .ja v a2 s . com } Template template = new Template(otherArgs[3], "UTF8"); conf.set("template", object2String(template)); // conf.set("tc", object2String(tc)); DistributedCache.addCacheFile(new URI(otherArgs[2]), conf); conf.set("dict", otherArgs[2]); conf.set("mapred.reduce.tasks", "0"); Job job = new Job(conf, InstanceGenerator.class.getSimpleName()); job.setJarByClass(InstanceGenerator.class); job.setMapperClass(CounterMapper.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.ParallelTraining.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 4) { System.err.println("ParallelTraining <instanceDir> <outDir> <featurecount> <training-params>"); System.exit(-1);// www . j a va 2 s .co m } int featureCount = Integer.valueOf(otherArgs[2]); // conf.set("tc", object2String(tc)); conf.set("pt.iterate", "1"); conf.set("pt.featureCount", featureCount + ""); TrainingParams params = SgdCrf.loadParams(otherArgs[3]); System.out.println(new Gson().toJson(params)); conf.set("pt.params", object2String(params)); Job job = new Job(conf, ParallelTraining.class.getSimpleName()); job.setJarByClass(ParallelTraining.class); job.setMapperClass(TrainingMapper.class); job.setReducerClass(TrainingReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(TrainingWeights.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.ParallelTraining2.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); FileSystem fs = FileSystem.get(conf); TrainingParams params = SgdCrf.loadParams(otherArgs[3]); System.out.println(new Gson().toJson(params)); if (otherArgs.length != 5) { System.err.println(/*w w w .j a v a2 s . c om*/ "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>"); System.exit(-1); } int featureCount = Integer.valueOf(otherArgs[2]); // conf.set("tc", object2String(tc)); int outIter = Integer.valueOf(otherArgs[4]); String prevOutDir = ""; for (int i = 1; i <= outIter; i++) { System.out.println("iterator: " + i); conf.set("pt.iterate", i + ""); conf.set("pt.featureCount", featureCount + ""); conf.set("pt.params", object2String(params)); String outDir = otherArgs[1] + "/result" + i; if (i > 1) { conf.set("paramDir", prevOutDir); } prevOutDir = outDir; fs.delete(new Path(outDir), true); Job job = new Job(conf, ParallelTraining2.class.getSimpleName()); job.setJarByClass(ParallelTraining2.class); job.setMapperClass(TrainingMapper.class); job.setReducerClass(TrainingReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); System.out.println("outDir: " + outDir); FileOutputFormat.setOutputPath(job, new Path(outDir)); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("iter " + i + " failed"); break; } } }
From source file:com.antsdb.saltedfish.storage.HBaseUtilMain.java
License:Open Source License
private void connect(String zkserver) throws IOException { println("Connecting to server %s ...", zkserver); Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", zkserver); this.conn = ConnectionFactory.createConnection(conf); println("hbase connected - " + zkserver + "\n"); }