Example usage for org.apache.hadoop.conf Configuration setStrings

List of usage examples for org.apache.hadoop.conf Configuration setStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setStrings.

Prototype

public void setStrings(String name, String... values) 

Source Link

Document

Set the array of string values for the name property as as comma delimited values.

Usage

From source file:com.lightboxtechnologies.spectrum.MRCoffeeJob.java

License:Apache License

public static int run(String imageID, String outpath, String[] command, Configuration conf)
        throws ClassNotFoundException, DecoderException, IOException, InterruptedException {
    conf.setStrings("command", command);
    conf.setLong("timestamp", System.currentTimeMillis());

    final Job job = new Job(conf, "MRCoffeeJob");
    job.setJarByClass(MRCoffeeJob.class);

    job.setMapperClass(MRCoffeeMapper.class);

    //    job.setReducerClass(KeyValueSortReducer.class);
    //    job.setNumReduceTasks(1);
    job.setNumReduceTasks(0);//  w ww.  j a v  a 2  s.c  om

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    job.setOutputKeyClass(ImmutableHexWritable.class);
    //    job.setOutputValueClass(KeyValue.class);
    job.setOutputValueClass(JsonWritable.class);
    //    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    //    HFileOutputFormat.setOutputPath(job, new Path(outpath));
    TextOutputFormat.setOutputPath(job, new Path(outpath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();

    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    String imageID;// w  ww.  j av  a 2s  .c  om
    String outpath;
    String friendlyname;
    final Set<String> exts = new HashSet<String>();

    if ("-f".equals(otherArgs[0])) {
        if (otherArgs.length != 4) {
            die();
        }

        // load extensions from file
        final Path extpath = new Path(otherArgs[1]);

        InputStream in = null;
        try {
            in = extpath.getFileSystem(conf).open(extpath);

            Reader r = null;
            try {
                r = new InputStreamReader(in);

                BufferedReader br = null;
                try {
                    br = new BufferedReader(r);

                    String line;
                    while ((line = br.readLine()) != null) {
                        exts.add(line.trim().toLowerCase());
                    }

                    br.close();
                } finally {
                    IOUtils.closeQuietly(br);
                }

                r.close();
            } finally {
                IOUtils.closeQuietly(r);
            }

            in.close();
        } finally {
            IOUtils.closeQuietly(in);
        }

        imageID = otherArgs[2];
        friendlyname = otherArgs[3];
        outpath = otherArgs[4];
    } else {
        if (otherArgs.length < 3) {
            die();
        }

        // read extensions from trailing args
        imageID = otherArgs[0];
        friendlyname = otherArgs[1];
        outpath = otherArgs[2];

        // lowercase all file extensions
        for (int i = 2; i < otherArgs.length; ++i) {
            exts.add(otherArgs[i].toLowerCase());
        }
    }

    conf.setStrings("extensions", exts.toArray(new String[exts.size()]));

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf);
    job.setJarByClass(SequenceFileExport.class);
    job.setMapperClass(SequenceFileExportMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(MapWritable.class);

    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    FsEntryHBaseInputFormat.setupJob(job, imageID);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    FileOutputFormat.setOutputPath(job, new Path(outpath));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.luogh.learning.lab.hbase.IndexBuilderTest.java

License:Apache License

/**
 * perf_test_schema:perf_test_table_normal_with_partition_key w_cf value
 * Job configuration./*  www. ja v a  2s  .  co  m*/
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String columnFamily = args[1];
    System.out.println("****" + tableName);
    conf.set(TableInputFormat.INPUT_TABLE, tableName);
    conf.set("index.tablename", tableName);
    conf.set("index.familyname", columnFamily);
    String[] fields = new String[args.length - 2];
    System.arraycopy(args, 2, fields, 0, fields.length);
    conf.setStrings("index.fields", fields);
    Job job = Job.getInstance(conf);
    TableMapReduceUtil.initTableMapperJob(Lists.newArrayList(new Scan()), Map.class,
            ImmutableBytesWritable.class, Put.class, job);
    job.setJarByClass(IndexBuilderTest.class);
    job.setMapperClass(Map.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TableInputFormat.class);
    job.setOutputFormatClass(MultiTableOutputFormat.class);
    return job;
}

From source file:com.marklogic.contentpump.Command.java

License:Apache License

static void applyCommonOutputConfigOptions(Configuration conf, CommandLine cmdline) {

    if (cmdline.hasOption(OUTPUT_URI_REPLACE)) {
        String uriReplace = cmdline.getOptionValue(OUTPUT_URI_REPLACE);
        if (uriReplace == null) {
            throw new IllegalArgumentException("Missing option argument: " + OUTPUT_URI_REPLACE);
        } else {/*w w w  .j  ava  2 s.  c  o m*/
            String[] replace = uriReplace.split(",");
            // URI replace comes in pattern and replacement pairs.
            if (replace.length % 2 != 0) {
                throw new IllegalArgumentException(
                        "Invalid option argument for " + OUTPUT_URI_REPLACE + " :" + uriReplace);
            }
            // Replacement string is expected to be in ''
            for (int i = 0; i < replace.length - 1; i++) {
                String replacement = replace[++i].trim();
                if (!replacement.startsWith("'") || !replacement.endsWith("'")) {
                    throw new IllegalArgumentException(
                            "Invalid option argument for " + OUTPUT_URI_REPLACE + " :" + uriReplace);
                }
            }
            conf.setStrings(MarkLogicConstants.CONF_OUTPUT_URI_REPLACE, uriReplace);
        }
    }
    if (cmdline.hasOption(OUTPUT_URI_PREFIX)) {
        String outPrefix = cmdline.getOptionValue(OUTPUT_URI_PREFIX);
        conf.set(MarkLogicConstants.CONF_OUTPUT_URI_PREFIX, outPrefix);
    }
    if (cmdline.hasOption(OUTPUT_URI_SUFFIX)) {
        String outSuffix = cmdline.getOptionValue(OUTPUT_URI_SUFFIX);
        conf.set(MarkLogicConstants.CONF_OUTPUT_URI_SUFFIX, outSuffix);
    }
    if (cmdline.hasOption(OUTPUT_COLLECTIONS)) {
        String collectionsString = cmdline.getOptionValue(OUTPUT_COLLECTIONS);
        conf.set(MarkLogicConstants.OUTPUT_COLLECTION, collectionsString);
    }

    if (cmdline.hasOption(OUTPUT_PERMISSIONS)) {
        String permissionString = cmdline.getOptionValue(OUTPUT_PERMISSIONS);
        conf.set(MarkLogicConstants.OUTPUT_PERMISSION, permissionString);
    }
    if (cmdline.hasOption(OUTPUT_QUALITY)) {
        String quantity = cmdline.getOptionValue(OUTPUT_QUALITY);
        conf.set(MarkLogicConstants.OUTPUT_QUALITY, quantity);
    }

    if (cmdline.hasOption(RDF_STREAMING_MEMORY_THRESHOLD)) {
        String thresh = cmdline.getOptionValue(RDF_STREAMING_MEMORY_THRESHOLD);
        conf.set(RDF_STREAMING_MEMORY_THRESHOLD, thresh);
    }
    if (cmdline.hasOption(RDF_TRIPLES_PER_DOCUMENT)) {
        String count = cmdline.getOptionValue(RDF_TRIPLES_PER_DOCUMENT);
        conf.set(RDF_TRIPLES_PER_DOCUMENT, count);
    }
}

From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMapReduce.java

public static void main(String[] args) throws Exception {
    String[] theta;//from www.  j a  va 2  s.c  o  m
    int iteration = 0, num_of_iteration = 1;
    int feature_size = 0, input_data_size = 0;
    FileSystem fs;
    Float alpha = 0.1f;

    do {
        Configuration conf = new Configuration();
        fs = FileSystem.get(conf);

        Job job = new Job(conf, "LinearRegressionMapReduce");
        job.setJarByClass(MultipleLinearRegressionMapReduce.class);

        // the following two lines are needed for propagating "theta"
        conf = job.getConfiguration();

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(FloatWritable.class);

        job.setMapperClass(MultipleLinearRegressionMap.class);
        job.setReducerClass(MultipleLinearRegressionReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setNumReduceTasks(1); // set mapred.reduce.tasks = 1 (only one reducer)

        FileInputFormat.addInputPath(job, new Path(args[0]));
        Path out = new Path(args[1]);
        if (fs.exists(out))
            fs.delete(out, true);

        FileOutputFormat.setOutputPath(job, out);
        alpha = Float.parseFloat(args[2]);
        num_of_iteration = Integer.parseInt(args[3]);
        feature_size = Integer.parseInt(args[4]);
        input_data_size = Integer.parseInt(args[5]);
        conf.setFloat("alpha", alpha);
        conf.setInt("feature_size", feature_size);
        conf.setInt("input_data_size", input_data_size);
        conf.setInt("iteration", iteration);

        theta = new String[feature_size];

        if (iteration == 0) { // first iteration
            for (int i = 0; i < theta.length; i++)
                theta[i] = "0.0";
            conf.setStrings("theta", theta);
        } else {
            try {
                String uri = "/user/hduser/theta.txt";
                fs = FileSystem.get(conf);
                //FSDataInputStream in = fs.open(new Path(uri));
                BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri))));
                theta = br.readLine().split(",");
            } catch (Exception e) {

            }
            conf.setStrings("theta", theta);
        }

        for (int i = 0; i < theta.length; i++)
            System.out.println("In MapRedce main function: theta[ " + i + " ]" + theta[i]);

        try {
            job.waitForCompletion(true);
            iteration++;
        } catch (IOException e) {
            e.printStackTrace();
        }
    } while (iteration < num_of_iteration);

}

From source file:com.pivotal.hawq.mapreduce.conf.HAWQConfiguration.java

License:Apache License

/**
 * Set the schema of the table into configuration
 *
 * @param conf   the configuration/*from w w w.  j  a va2  s .  co m*/
 * @param schema schema of the table
 */
public static void setInputTableSchema(Configuration conf, HAWQSchema schema) {
    conf.setStrings(TABLE_SCHEMA_PROPERTY, schema.toString());
}

From source file:com.rw.legion.DefaultJob.java

License:Apache License

/**
 * Main method./*from ww  w  .j  a va  2 s  .  co m*/
 * 
 * @param args  Arguments should be: 1) input path, 2) output path, 3)
 * location of Legion objective file.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // Load the Legion objective from the JSON doc.
    Path path = new Path(args[2]);
    FileSystem fs = FileSystem.get(new URI(args[2]), conf);
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String json = "";

    String line = br.readLine();

    while (line != null) {
        json += line;
        line = br.readLine();
    }

    br.close();

    /*
     *  Save the JSON for the Legion objective to the Hadoop configuration,
     *  so we can access it in other containers.
     */
    conf.setStrings("legion_objective", json);

    // De-serialize the objective so we can access the settings here.
    LegionObjective legionObjective = ObjectiveDeserializer.deserialize(json);

    // Start configuring the MapReduce job.
    Job hadoopJob = Job.getInstance(conf, "Legion");

    hadoopJob.setJarByClass(DefaultJob.class);
    hadoopJob.setMapperClass(DefaultMapper.class);
    LazyOutputFormat.setOutputFormatClass(hadoopJob, TextOutputFormat.class);

    // Compress the output to speed things up.
    TextOutputFormat.setCompressOutput(hadoopJob, true);
    TextOutputFormat.setOutputCompressorClass(hadoopJob, GzipCodec.class);

    // What input format do we use?

    try {
        @SuppressWarnings("unchecked")
        Class<? extends FileInputFormat<NullWritable, LegionRecord>> inputClass = (Class<? extends FileInputFormat<NullWritable, LegionRecord>>) Class
                .forName(legionObjective.getInputFormat());

        hadoopJob.setInputFormatClass(inputClass);
    } catch (Exception e) {
        throw new JsonParseException(
                "Problem loading input format " + "class '" + legionObjective.getInputFormat() + "'");
    }

    // Should we set a max combined size?

    if (legionObjective.getMaxCombinedSize() != null) {
        CombineFileInputFormat.setMaxInputSplitSize(hadoopJob, legionObjective.getMaxCombinedSize());
    }

    /* 
     * These are just static convenience methods, so it doesn't matter if
     * they come from the wrong class.
     */
    FileInputFormat.setInputDirRecursive(hadoopJob, true);
    FileInputFormat.addInputPath(hadoopJob, new Path(args[0]));

    FileOutputFormat.setOutputPath(hadoopJob, new Path(args[1]));

    // Since a Legion objective can specify multiple output tables.
    for (OutputTable outputTable : legionObjective.getOutputTables()) {
        MultipleOutputs.addNamedOutput(hadoopJob, outputTable.getTitle(), TextOutputFormat.class,
                NullWritable.class, Text.class);
    }

    MultipleOutputs.addNamedOutput(hadoopJob, "skipped", TextOutputFormat.class, NullWritable.class,
            Text.class);

    hadoopJob.waitForCompletion(true);
}

From source file:com.scaleoutsoftware.soss.hserver.GridOutputFormat.java

License:Apache License

/**
 * Sets the {@link NamedMap} to direct output to.
 *
 * @param job job to modify/*from  w  ww. ja v a  2  s .  com*/
 * @param map named map to be used for output
 */
public static void setNamedMap(Job job, NamedMap map) {
    Configuration configuration = job.getConfiguration();
    configuration.setBoolean(outputIsNamedMapProperty, true);
    configuration.setStrings(outputNamedMapProperty, map.getMapName());
    CustomSerializer keySerializer = map.getKeySerializer();
    CustomSerializer valueSerializer = map.getValueSerializer();
    SerializationMode serializationMode = map.getSerializationMode();
    AvailabilityMode availabilityMode = map.getAvailabilityMode();
    configuration.setInt(SERIALIZATION_MODE, serializationMode.ordinal());
    configuration.setInt(AVAILABILITY_MODE, availabilityMode.ordinal());
    configuration.setClass(outputNamedMapKeySerializerProperty, keySerializer.getClass(), Object.class);
    configuration.setClass(outputNamedMapValueSerializerProperty, valueSerializer.getClass(), Object.class);
    if (keySerializer.getObjectClass() != null) {
        configuration.setClass(outputNamedMapKeyProperty, keySerializer.getObjectClass(), Object.class);
    }
    if (valueSerializer.getObjectClass() != null) {
        configuration.setClass(outputNamedMapValueProperty, valueSerializer.getObjectClass(), Object.class);
    }
}

From source file:com.toddbodnar.simpleHive.subQueries.select.java

@Override
public void writeConfig(Configuration conf) {
    conf.set("SIMPLE_HIVE.SELECT.QUERY_STR", query);
    conf.set("SIMPLE_HIVE.SELECT.INPUT_SEPERATOR",
            controlCharacterConverter.convertToReadable(getInput().getSeperator()));
    conf.setStrings("SIMPLE_HIVE.SELECT.INPUT_COL_NAMES", getInput().getColNames());
}

From source file:com.topsoft.botspider.io.UnionData.java

License:Apache License

public static void setParseClass(Configuration conf, Class... classez) {
    if (conf == null || classez == null)
        return;//w  ww.j  av a 2 s .co  m
    ArrayList<String> arrParse = new ArrayList<String>(classez.length);
    for (Class clzss : classez) {
        arrParse.add(clzss.getName());
    }
    conf.setStrings(UNION_CLASS, arrParse.toArray(new String[arrParse.size()]));
}