Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setSequenceDictionary(Configuration conf, SAMSequenceDictionary dict)
        throws IOException, URISyntaxException {
    int counter = 0;
    for (SAMSequenceRecord seq : dict.getSequences()) {
        conf.set(dictionarySequenceName + counter, seq.getSequenceName());
        conf.setInt(dictionarySequenceLength + counter, seq.getSequenceLength());
        counter++;/*from   ww  w  . ja  v  a 2s.c o m*/
    }
    conf.setInt(dictionaryCount, counter);
}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setMinChrLength(Configuration conf, int val) {
    Logger.DEBUG("min chr size set to " + val, 3);
    conf.setInt(minChrSize, val);
}

From source file:biglayer.AutoCoder.java

License:Apache License

/**
 * Runs this tool./*from w w  w .  jav a  2 s. c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
    }*/

    //String inputPath = cmdline.getOptionValue(INPUT);
    //String outputPath = cmdline.getOptionValue(OUTPUT);

    String inputPath = "qiwang321/MNIST-mingled-key/part*";
    String outputPath = "shangfu/layeroutput";

    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();

    conf.setInt("num_reduce_task", reduceTasks);
    conf.set("sidepath", outputPath + "_side/");

    Job job0 = Job.getInstance(conf);
    job0.setJobName(AutoCoder.class.getSimpleName());
    job0.setJarByClass(AutoCoder.class);
    job0.setNumReduceTasks(reduceTasks);

    job0.getConfiguration().setInt("layer_ind", 0);

    FileInputFormat.setInputPaths(job0, new Path(inputPath));
    FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0"));

    job0.setInputFormatClass(KeyValueTextInputFormat.class);
    job0.setOutputFormatClass(SequenceFileOutputFormat.class);

    job0.setMapOutputKeyClass(PairOfInts.class);
    job0.setMapOutputValueClass(ModelNode.class);
    job0.setOutputKeyClass(PairOfInts.class);
    job0.setOutputValueClass(ModelNode.class);

    job0.setMapperClass(MyMapper0.class);
    job0.setReducerClass(MyReducer0.class);
    job0.setPartitionerClass(MyPartitioner.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath + "_0");
    FileSystem.get(getConf()).delete(outputDir, true);
    long startTime = System.currentTimeMillis();
    long codeStart = System.currentTimeMillis();
    double codeTimeSum = 0;
    job0.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) {
        Job job1 = Job.getInstance(conf);
        job1.setJobName(AutoCoder.class.getSimpleName());
        job1.setJarByClass(AutoCoder.class);
        job1.setNumReduceTasks(reduceTasks);
        job1.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1)));
        FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train"));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + (iterations - 1));
        LOG.info(" - output path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - number of reducers: " + reduceTasks);

        job1.setInputFormatClass(SequenceFileInputFormat.class);
        job1.setOutputFormatClass(SequenceFileOutputFormat.class);

        job1.setMapOutputKeyClass(PairOfInts.class);
        job1.setMapOutputValueClass(ModelNode.class);
        job1.setOutputKeyClass(PairOfInts.class);
        job1.setOutputValueClass(ModelNode.class);

        job1.setMapperClass(MyMapper.class);
        job1.setReducerClass(MyReducer_Train.class);
        job1.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations + "_train");
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job1.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

        Job job2 = Job.getInstance(conf);
        job2.setJobName(AutoCoder.class.getSimpleName());
        job2.setJarByClass(AutoCoder.class);
        job2.setNumReduceTasks(reduceTasks);
        job2.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train")));
        FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - output path: " + outputPath + "_" + iterations);
        LOG.info(" - number of reducers: " + reduceTasks);

        job2.setInputFormatClass(SequenceFileInputFormat.class);
        job2.setOutputFormatClass(SequenceFileOutputFormat.class);

        job2.setMapOutputKeyClass(PairOfInts.class);
        job2.setMapOutputValueClass(ModelNode.class);
        job2.setOutputKeyClass(PairOfInts.class);
        job2.setOutputValueClass(ModelNode.class);

        job2.setMapperClass(MyMapper.class);
        job2.setReducerClass(MyReducer_GenData.class);
        job2.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations);
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job2.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    }

    LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER);
    LOG.info(" - output path: " + outputPath);
    reduceTasks = 1;
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job_super = Job.getInstance(conf);
    job_super.setJobName(AutoCoder.class.getSimpleName());
    job_super.setJarByClass(AutoCoder.class);
    job_super.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER));
    FileOutputFormat.setOutputPath(job_super, new Path(outputPath));

    job_super.setInputFormatClass(SequenceFileInputFormat.class);
    job_super.setOutputFormatClass(SequenceFileOutputFormat.class);

    job_super.setMapOutputKeyClass(PairOfInts.class);
    job_super.setMapOutputValueClass(ModelNode.class);
    job_super.setOutputKeyClass(NullWritable.class);
    job_super.setOutputValueClass(NullWritable.class);

    job_super.setMapperClass(MyMapper_Super.class);
    job_super.setReducerClass(MyReducer_Super.class);
    job_super.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    job_super.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds,  " + codeTimeSum
            + " seconds.");
    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:bigsidemodel.AutoCoder.java

License:Apache License

/**
 * Runs this tool.//from w w  w. j  a v  a2s .c  o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
    }*/

    //String inputPath = cmdline.getOptionValue(INPUT);
    //String outputPath = cmdline.getOptionValue(OUTPUT);

    String inputPath = "qiwang321/best5-mingled-key-56x56/part*";
    String outputPath = "shangfu/bigoutput";
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath + "0");
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();
    conf.setInt("num_reduce_task", reduceTasks);
    conf.set("sidepath", outputPath + "_side/");

    Job job0 = Job.getInstance(conf);
    job0.setJobName(AutoCoder.class.getSimpleName());
    job0.setJarByClass(AutoCoder.class);
    // set the path of the information of k clusters in this iteration
    job0.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job0, new Path(inputPath));
    FileOutputFormat.setOutputPath(job0, new Path(outputPath + "0"));

    job0.setInputFormatClass(KeyValueTextInputFormat.class);
    job0.setOutputFormatClass(SequenceFileOutputFormat.class);

    job0.setMapOutputKeyClass(PairOfInts.class);
    job0.setMapOutputValueClass(DataNode.class);
    job0.setOutputKeyClass(PairOfInts.class);
    job0.setOutputValueClass(DataNode.class);

    job0.setMapperClass(MyMapper0.class);
    job0.setReducerClass(MyReducer0.class);
    job0.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath + "0");
    FileSystem.get(getConf()).delete(outputDir, true);

    long codeStart = System.currentTimeMillis();
    double jobTimeSum = 0;

    long startTime = System.currentTimeMillis();
    job0.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    //======= Job 1
    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + outputPath + "0");
    LOG.info(" - output path: " + outputPath + "1");
    LOG.info(" - number of reducers: " + 1);
    int nModel = reduceTasks;
    reduceTasks = 1;

    Job job1 = Job.getInstance(conf);
    job1.setJobName(AutoCoder.class.getSimpleName());
    job1.setJarByClass(AutoCoder.class);
    // set the path of the information of k clusters in this iteration
    job1.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job1, new Path(outputPath + "0"));
    FileOutputFormat.setOutputPath(job1, new Path(outputPath + "1"));

    job1.setInputFormatClass(SequenceFileInputFormat.class);
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);

    job1.setMapOutputKeyClass(PairOfInts.class);
    job1.setMapOutputValueClass(DataNode.class);
    job1.setOutputKeyClass(NullWritable.class);
    job1.setOutputValueClass(NullWritable.class);

    job1.setMapperClass(MyMapper1.class);
    job1.setReducerClass(MyReducer1.class);
    job1.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath + "1");
    FileSystem.get(getConf()).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;
    LOG.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds,  " + jobTimeSum
            + " seconds.");

    return 0;
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

public Job createSubmittableJob(String[] args) {
    TABLE_NAME = args[1];/*www .  j a v  a  2s  . c  om*/
    Job job = null;
    try {
        job = new Job(new Configuration(), NAME);
        job.setJarByClass(HexastoreBulkImport.class);
        job.setMapperClass(sampler.TotalOrderPrep.Map.class);
        job.setReducerClass(Reduce.class);
        job.setCombinerClass(Combiner.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(TotalOrderPartitioner.class);
        //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000"));
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000"));
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        Path out = new Path("out");
        FileOutputFormat.setOutputPath(job, out);
        Configuration conf = new Configuration();
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        HBaseAdmin hadmin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats");
        HColumnDescriptor family = new HColumnDescriptor("size");
        desc.addFamily(family);
        conf.setInt("zookeeper.session.timeout", 600000);
        if (hadmin.tableExists(TABLE_NAME + "_stats")) {
            //hadmin.disableTable(TABLE_NAME+"_stats");
            //hadmin.deleteTable(TABLE_NAME+"_stats");
        } else {
            hadmin.createTable(desc);
        }

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //job.getConfiguration().setInt("mapred.map.tasks", 18);
        job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
        job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions);
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);
        job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5);
        job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5);
        //job.getConfiguration().setInt("io.sort.mb", 100);

    } catch (IOException e2) {
        e2.printStackTrace();
    }

    return job;
}

From source file:ca.sparkera.adapters.mainframe.CobolSerdeUtils.java

License:Apache License

/**
 * Determine the layout to that's been provided for cobol serde work.
 * /*  www .ja  v  a2  s.c  o  m*/
 * @param properties
 *            containing a key pointing to the layout, one way or another
 * @return layout to use while serdeing the avro file
 * @throws IOException
 *             if error while trying to read the layout from another
 *             location
 * @throws CobolSerdeException
 *             if unable to find a layout or pointer to it in the properties
 */
public static String determineLayoutOrThrowException(Configuration conf, Properties properties)
        throws IOException, CobolSerdeException {

    //For fixed length record get length of the file 
    String fixedRecordLength = properties.getProperty(CobolTableProperties.FB_LENGTH.getPropName());
    if (fixedRecordLength != null) {
        conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, Integer.parseInt(fixedRecordLength));
    }

    String layoutString = properties.getProperty(CobolTableProperties.LAYOUT_LITERAL.getPropName());
    if (layoutString != null && !layoutString.equals(LAYOUT_NONE))
        return CobolSerdeUtils.getLayoutFor(layoutString);

    //For testing purpose
    layoutString = properties.getProperty(CobolTableProperties.LAYOUT_TEST.getPropName());
    if (layoutString != null) {
        return readFile(layoutString, Charset.defaultCharset());
    }

    // Try pulling directly from URL
    layoutString = properties.getProperty(CobolTableProperties.LAYOUT_URL.getPropName());
    if (layoutString == null || layoutString.equals(LAYOUT_NONE))
        throw new CobolSerdeException(EXCEPTION_MESSAGE);

    try {
        String s = getLayoutFromFS(layoutString, conf);
        if (s == null) {
            // in case layout is not a file system

            return CobolSerdeUtils.getLayoutFor(new URL(layoutString).openStream());
        }
        return s;
    } catch (IOException ioe) {
        throw new CobolSerdeException("Unable to read layout from given path: " + layoutString, ioe);
    } catch (URISyntaxException urie) {
        throw new CobolSerdeException("Unable to read layout from given path: " + layoutString, urie);
    }
}

From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java

License:Apache License

/**
 * Runs this tool.//ww w  . j a va 2  s.co  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(
            OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    String sourcesString = cmdline.getOptionValue(SOURCES);
    String[] sources = sourcesString.split(",");
    for (int i = 0; i < sources.length; i++) {
        sources[i] = sources[i].trim();
    }

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);
    LOG.info(" - use sources: " + sourcesString);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setStrings(SOURCES, sources);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static void addComparators(Configuration conf, String property, Map<String, Fields> map,
        BaseFlowStep flowStep, Group group) {
    Iterator<Fields> fieldsIterator = map.values().iterator();

    if (!fieldsIterator.hasNext())
        return;/*from w  w w .  j a v a 2s  .com*/

    Fields fields = fieldsIterator.next();

    if (fields.hasComparators()) {
        conf.set(property, pack(fields, conf));
        return;
    }

    // use resolved fields if there are no comparators.
    Set<Scope> previousScopes = flowStep.getPreviousScopes(group);

    fields = previousScopes.iterator().next().getOutValuesFields();

    if (fields.size() != 0) // allows fields.UNKNOWN to be used
        conf.setInt(property + ".size", fields.size());
}

From source file:cascading.flow.tez.util.TezUtil.java

License:Open Source License

public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) {
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(),
                    context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(),
                    context.getTaskAttemptNumber(), isMapperOutput);

    config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
}

From source file:cascading.platform.tez.Hadoop2TezPlatform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;/*  w  w  w  .j  a va2 s  .com*/

    if (!isUseCluster()) {
        // Current usage requirements:
        // 1. Clients need to set "tez.local.mode" to true when creating a TezClient instance. (For the examples this can be done via -Dtez.local.mode=true)
        // 2. fs.defaultFS must be set to "file:///"
        // 2.1 If running examples - this must be set in tez-site.xml (so that it's picked up by the client, as well as the conf instances used to configure the Inputs / Outputs).
        // 2.2 If using programatically (without a tez-site.xml present). All configuration instances used (to crate the client / configure Inputs / Outputs) - must have this property set.
        // 3. tez.runtime.optimize.local.fetch needs to be set to true (either via tez-site.xml or in all configurations used to create the job (similar to fs.defaultFS in step 2))
        // 4. tez.staging-dir must be set (either programatically or via tez-site.xml).
        // Until TEZ-1337 goes in - the staging-dir for the job is effectively the root of the filesystem (and where inputs are read from / written to if relative paths are used).

        LOG.info("not using cluster");
        configuration = new Configuration();

        configuration.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions());
        //      configuration.setInt( FlowRuntimeProps.GATHER_PARTITIONS, 1 ); // deadlocks if larger than 1

        configuration.set(TezConfiguration.TEZ_LOCAL_MODE, "true");
        configuration.set("fs.defaultFS", "file:///");
        configuration.set("tez.runtime.optimize.local.fetch", "true");

        // hack to prevent deadlocks where downstream processors are scheduled before upstream
        configuration.setInt("tez.am.inline.task.execution.max-tasks", 3); // testHashJoinMergeIntoHashJoinAccumulatedAccumulatedMerge fails if set to 2

        configuration.set(TezConfiguration.TEZ_IGNORE_LIB_URIS, "true"); // in local mode, use local classpath
        configuration.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1);
        configuration.set(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, "true");

        configuration.set("tez.am.mode.session", "true"); // allows multiple TezClient instances to be used in a single jvm

        if (!Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            configuration.set("hadoop.tmp.dir", System.getProperty("hadoop.tmp.dir"));
        else
            configuration.set("hadoop.tmp.dir", "build/test/tmp");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored
        new File(System.getProperty("hadoop.tmp.dir")).mkdirs(); // ignored

        Configuration defaultConf = new Configuration();

        defaultConf.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions());

        defaultConf.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1);

        //      defaultConf.set( TezConfiguration.TEZ_AM_LOG_LEVEL, "DEBUG" );
        //      defaultConf.set( TezConfiguration.TEZ_TASK_LOG_LEVEL, "DEBUG" );

        defaultConf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
        defaultConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
        defaultConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, System.getProperty("hadoop.tmp.dir"));

        miniDFSCluster = new MiniDFSCluster.Builder(defaultConf).numDataNodes(4).format(true).racks(null)
                .build();

        fileSys = miniDFSCluster.getFileSystem();

        Configuration tezConf = new Configuration(defaultConf);
        tezConf.set("fs.defaultFS", fileSys.getUri().toString()); // use HDFS
        tezConf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");

        // see MiniTezClusterWithTimeline as alternate
        miniTezCluster = new MiniTezCluster(getClass().getName(), 4, 1, 1); // todo: set to 4
        miniTezCluster.init(tezConf);
        miniTezCluster.start();

        configuration = miniTezCluster.getConfig();

        // stats won't work after completion unless ATS is used
        if (setTimelineStore(configuration)) // true if ats can be loaded and configured for this hadoop version
        {
            configuration.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
                    ATSHistoryLoggingService.class.getName());
            configuration.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
            configuration.set(YarnConfiguration.TIMELINE_SERVICE_ADDRESS, "localhost:10200");
            configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS, "localhost:8188");
            configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS, "localhost:8190");

            yarnHistoryServer = new ApplicationHistoryServer();
            yarnHistoryServer.init(configuration);
            yarnHistoryServer.start();
        }
    }

    configuration.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 1);
    configuration.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
    configuration.setInt(TezConfiguration.TEZ_AM_MAX_TASK_FAILURES_PER_NODE, 1);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    Hadoop2TezPlanner.copyProperties(configuration, globalProperties); // copy any external properties

    Hadoop2TezPlanner.copyConfiguration(properties, configuration); // put all properties on the jobconf

    ExitUtil.disableSystemExit();

    //    forbidSystemExitCall();
}