List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:com.lightboxtechnologies.spectrum.MRCoffeeJob.java
License:Apache License
public static int run(String imageID, String outpath, String[] command, Configuration conf) throws ClassNotFoundException, DecoderException, IOException, InterruptedException { conf.setStrings("command", command); conf.setLong("timestamp", System.currentTimeMillis()); final Job job = new Job(conf, "MRCoffeeJob"); job.setJarByClass(MRCoffeeJob.class); job.setMapperClass(MRCoffeeMapper.class); // job.setReducerClass(KeyValueSortReducer.class); // job.setNumReduceTasks(1); job.setNumReduceTasks(0);// w ww. j a v a 2 s.c om FsEntryHBaseInputFormat.setupJob(job, imageID); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputKeyClass(ImmutableHexWritable.class); // job.setOutputValueClass(KeyValue.class); job.setOutputValueClass(JsonWritable.class); // job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // HFileOutputFormat.setOutputPath(job, new Path(outpath)); TextOutputFormat.setOutputPath(job, new Path(outpath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); String imageID;// w ww. j av a 2s .c om String outpath; String friendlyname; final Set<String> exts = new HashSet<String>(); if ("-f".equals(otherArgs[0])) { if (otherArgs.length != 4) { die(); } // load extensions from file final Path extpath = new Path(otherArgs[1]); InputStream in = null; try { in = extpath.getFileSystem(conf).open(extpath); Reader r = null; try { r = new InputStreamReader(in); BufferedReader br = null; try { br = new BufferedReader(r); String line; while ((line = br.readLine()) != null) { exts.add(line.trim().toLowerCase()); } br.close(); } finally { IOUtils.closeQuietly(br); } r.close(); } finally { IOUtils.closeQuietly(r); } in.close(); } finally { IOUtils.closeQuietly(in); } imageID = otherArgs[2]; friendlyname = otherArgs[3]; outpath = otherArgs[4]; } else { if (otherArgs.length < 3) { die(); } // read extensions from trailing args imageID = otherArgs[0]; friendlyname = otherArgs[1]; outpath = otherArgs[2]; // lowercase all file extensions for (int i = 2; i < otherArgs.length; ++i) { exts.add(otherArgs[i].toLowerCase()); } } conf.setStrings("extensions", exts.toArray(new String[exts.size()])); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf); job.setJarByClass(SequenceFileExport.class); job.setMapperClass(SequenceFileExportMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(MapWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, imageID); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); FileOutputFormat.setOutputPath(job, new Path(outpath)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.luogh.learning.lab.hbase.IndexBuilderTest.java
License:Apache License
/** * perf_test_schema:perf_test_table_normal_with_partition_key w_cf value * Job configuration./* www. ja v a 2s . co m*/ */ public static Job configureJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; System.out.println("****" + tableName); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set("index.tablename", tableName); conf.set("index.familyname", columnFamily); String[] fields = new String[args.length - 2]; System.arraycopy(args, 2, fields, 0, fields.length); conf.setStrings("index.fields", fields); Job job = Job.getInstance(conf); TableMapReduceUtil.initTableMapperJob(Lists.newArrayList(new Scan()), Map.class, ImmutableBytesWritable.class, Put.class, job); job.setJarByClass(IndexBuilderTest.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); return job; }
From source file:com.marklogic.contentpump.Command.java
License:Apache License
static void applyCommonOutputConfigOptions(Configuration conf, CommandLine cmdline) { if (cmdline.hasOption(OUTPUT_URI_REPLACE)) { String uriReplace = cmdline.getOptionValue(OUTPUT_URI_REPLACE); if (uriReplace == null) { throw new IllegalArgumentException("Missing option argument: " + OUTPUT_URI_REPLACE); } else {/*w w w .j ava 2 s. c o m*/ String[] replace = uriReplace.split(","); // URI replace comes in pattern and replacement pairs. if (replace.length % 2 != 0) { throw new IllegalArgumentException( "Invalid option argument for " + OUTPUT_URI_REPLACE + " :" + uriReplace); } // Replacement string is expected to be in '' for (int i = 0; i < replace.length - 1; i++) { String replacement = replace[++i].trim(); if (!replacement.startsWith("'") || !replacement.endsWith("'")) { throw new IllegalArgumentException( "Invalid option argument for " + OUTPUT_URI_REPLACE + " :" + uriReplace); } } conf.setStrings(MarkLogicConstants.CONF_OUTPUT_URI_REPLACE, uriReplace); } } if (cmdline.hasOption(OUTPUT_URI_PREFIX)) { String outPrefix = cmdline.getOptionValue(OUTPUT_URI_PREFIX); conf.set(MarkLogicConstants.CONF_OUTPUT_URI_PREFIX, outPrefix); } if (cmdline.hasOption(OUTPUT_URI_SUFFIX)) { String outSuffix = cmdline.getOptionValue(OUTPUT_URI_SUFFIX); conf.set(MarkLogicConstants.CONF_OUTPUT_URI_SUFFIX, outSuffix); } if (cmdline.hasOption(OUTPUT_COLLECTIONS)) { String collectionsString = cmdline.getOptionValue(OUTPUT_COLLECTIONS); conf.set(MarkLogicConstants.OUTPUT_COLLECTION, collectionsString); } if (cmdline.hasOption(OUTPUT_PERMISSIONS)) { String permissionString = cmdline.getOptionValue(OUTPUT_PERMISSIONS); conf.set(MarkLogicConstants.OUTPUT_PERMISSION, permissionString); } if (cmdline.hasOption(OUTPUT_QUALITY)) { String quantity = cmdline.getOptionValue(OUTPUT_QUALITY); conf.set(MarkLogicConstants.OUTPUT_QUALITY, quantity); } if (cmdline.hasOption(RDF_STREAMING_MEMORY_THRESHOLD)) { String thresh = cmdline.getOptionValue(RDF_STREAMING_MEMORY_THRESHOLD); conf.set(RDF_STREAMING_MEMORY_THRESHOLD, thresh); } if (cmdline.hasOption(RDF_TRIPLES_PER_DOCUMENT)) { String count = cmdline.getOptionValue(RDF_TRIPLES_PER_DOCUMENT); conf.set(RDF_TRIPLES_PER_DOCUMENT, count); } }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMapReduce.java
public static void main(String[] args) throws Exception { String[] theta;//from www. j a va 2 s.c o m int iteration = 0, num_of_iteration = 1; int feature_size = 0, input_data_size = 0; FileSystem fs; Float alpha = 0.1f; do { Configuration conf = new Configuration(); fs = FileSystem.get(conf); Job job = new Job(conf, "LinearRegressionMapReduce"); job.setJarByClass(MultipleLinearRegressionMapReduce.class); // the following two lines are needed for propagating "theta" conf = job.getConfiguration(); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MultipleLinearRegressionMap.class); job.setReducerClass(MultipleLinearRegressionReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); // set mapred.reduce.tasks = 1 (only one reducer) FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); alpha = Float.parseFloat(args[2]); num_of_iteration = Integer.parseInt(args[3]); feature_size = Integer.parseInt(args[4]); input_data_size = Integer.parseInt(args[5]); conf.setFloat("alpha", alpha); conf.setInt("feature_size", feature_size); conf.setInt("input_data_size", input_data_size); conf.setInt("iteration", iteration); theta = new String[feature_size]; if (iteration == 0) { // first iteration for (int i = 0; i < theta.length; i++) theta[i] = "0.0"; conf.setStrings("theta", theta); } else { try { String uri = "/user/hduser/theta.txt"; fs = FileSystem.get(conf); //FSDataInputStream in = fs.open(new Path(uri)); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri)))); theta = br.readLine().split(","); } catch (Exception e) { } conf.setStrings("theta", theta); } for (int i = 0; i < theta.length; i++) System.out.println("In MapRedce main function: theta[ " + i + " ]" + theta[i]); try { job.waitForCompletion(true); iteration++; } catch (IOException e) { e.printStackTrace(); } } while (iteration < num_of_iteration); }
From source file:com.pivotal.hawq.mapreduce.conf.HAWQConfiguration.java
License:Apache License
/** * Set the schema of the table into configuration * * @param conf the configuration/*from w w w. j a va2 s . co m*/ * @param schema schema of the table */ public static void setInputTableSchema(Configuration conf, HAWQSchema schema) { conf.setStrings(TABLE_SCHEMA_PROPERTY, schema.toString()); }
From source file:com.rw.legion.DefaultJob.java
License:Apache License
/** * Main method./*from ww w .j a va 2 s . co m*/ * * @param args Arguments should be: 1) input path, 2) output path, 3) * location of Legion objective file. */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Load the Legion objective from the JSON doc. Path path = new Path(args[2]); FileSystem fs = FileSystem.get(new URI(args[2]), conf); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String json = ""; String line = br.readLine(); while (line != null) { json += line; line = br.readLine(); } br.close(); /* * Save the JSON for the Legion objective to the Hadoop configuration, * so we can access it in other containers. */ conf.setStrings("legion_objective", json); // De-serialize the objective so we can access the settings here. LegionObjective legionObjective = ObjectiveDeserializer.deserialize(json); // Start configuring the MapReduce job. Job hadoopJob = Job.getInstance(conf, "Legion"); hadoopJob.setJarByClass(DefaultJob.class); hadoopJob.setMapperClass(DefaultMapper.class); LazyOutputFormat.setOutputFormatClass(hadoopJob, TextOutputFormat.class); // Compress the output to speed things up. TextOutputFormat.setCompressOutput(hadoopJob, true); TextOutputFormat.setOutputCompressorClass(hadoopJob, GzipCodec.class); // What input format do we use? try { @SuppressWarnings("unchecked") Class<? extends FileInputFormat<NullWritable, LegionRecord>> inputClass = (Class<? extends FileInputFormat<NullWritable, LegionRecord>>) Class .forName(legionObjective.getInputFormat()); hadoopJob.setInputFormatClass(inputClass); } catch (Exception e) { throw new JsonParseException( "Problem loading input format " + "class '" + legionObjective.getInputFormat() + "'"); } // Should we set a max combined size? if (legionObjective.getMaxCombinedSize() != null) { CombineFileInputFormat.setMaxInputSplitSize(hadoopJob, legionObjective.getMaxCombinedSize()); } /* * These are just static convenience methods, so it doesn't matter if * they come from the wrong class. */ FileInputFormat.setInputDirRecursive(hadoopJob, true); FileInputFormat.addInputPath(hadoopJob, new Path(args[0])); FileOutputFormat.setOutputPath(hadoopJob, new Path(args[1])); // Since a Legion objective can specify multiple output tables. for (OutputTable outputTable : legionObjective.getOutputTables()) { MultipleOutputs.addNamedOutput(hadoopJob, outputTable.getTitle(), TextOutputFormat.class, NullWritable.class, Text.class); } MultipleOutputs.addNamedOutput(hadoopJob, "skipped", TextOutputFormat.class, NullWritable.class, Text.class); hadoopJob.waitForCompletion(true); }
From source file:com.scaleoutsoftware.soss.hserver.GridOutputFormat.java
License:Apache License
/** * Sets the {@link NamedMap} to direct output to. * * @param job job to modify/*from w ww. ja v a 2 s . com*/ * @param map named map to be used for output */ public static void setNamedMap(Job job, NamedMap map) { Configuration configuration = job.getConfiguration(); configuration.setBoolean(outputIsNamedMapProperty, true); configuration.setStrings(outputNamedMapProperty, map.getMapName()); CustomSerializer keySerializer = map.getKeySerializer(); CustomSerializer valueSerializer = map.getValueSerializer(); SerializationMode serializationMode = map.getSerializationMode(); AvailabilityMode availabilityMode = map.getAvailabilityMode(); configuration.setInt(SERIALIZATION_MODE, serializationMode.ordinal()); configuration.setInt(AVAILABILITY_MODE, availabilityMode.ordinal()); configuration.setClass(outputNamedMapKeySerializerProperty, keySerializer.getClass(), Object.class); configuration.setClass(outputNamedMapValueSerializerProperty, valueSerializer.getClass(), Object.class); if (keySerializer.getObjectClass() != null) { configuration.setClass(outputNamedMapKeyProperty, keySerializer.getObjectClass(), Object.class); } if (valueSerializer.getObjectClass() != null) { configuration.setClass(outputNamedMapValueProperty, valueSerializer.getObjectClass(), Object.class); } }
From source file:com.toddbodnar.simpleHive.subQueries.select.java
@Override public void writeConfig(Configuration conf) { conf.set("SIMPLE_HIVE.SELECT.QUERY_STR", query); conf.set("SIMPLE_HIVE.SELECT.INPUT_SEPERATOR", controlCharacterConverter.convertToReadable(getInput().getSeperator())); conf.setStrings("SIMPLE_HIVE.SELECT.INPUT_COL_NAMES", getInput().getColNames()); }
From source file:com.topsoft.botspider.io.UnionData.java
License:Apache License
public static void setParseClass(Configuration conf, Class... classez) { if (conf == null || classez == null) return;//w ww.j av a 2 s .co m ArrayList<String> arrParse = new ArrayList<String>(classez.length); for (Class clzss : classez) { arrParse.add(clzss.getName()); } conf.setStrings(UNION_CLASS, arrParse.toArray(new String[arrParse.size()])); }