Example usage for org.apache.hadoop.mapreduce Job setSpeculativeExecution

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSpeculativeExecution.

Prototype

public void setSpeculativeExecution(boolean speculativeExecution)

Source Link

Document

Turn speculative execution on or off for this job.

Usage

From source file:org.apache.accumulo.examples.simple.mapreduce.NGramIngest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Opts opts = new Opts();
    opts.parseArgs(getClass().getName(), args);

    Job job = JobUtil.getJob(getConf());
    job.setJobName(getClass().getSimpleName());
    job.setJarByClass(getClass());/*w ww .  j a  v  a2 s .  c  o  m*/

    opts.setAccumuloConfigs(job);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    job.setMapperClass(NGramMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);
    job.setSpeculativeExecution(false);

    if (!opts.getConnector().tableOperations().exists(opts.getTableName())) {
        log.info("Creating table " + opts.getTableName());
        opts.getConnector().tableOperations().create(opts.getTableName());
        SortedSet<Text> splits = new TreeSet<Text>();
        String numbers[] = "1 2 3 4 5 6 7 8 9".split("\\s");
        String lower[] = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s");
        String upper[] = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s");
        for (String[] array : new String[][] { numbers, lower, upper }) {
            for (String s : array) {
                splits.add(new Text(s));
            }
        }
        opts.getConnector().tableOperations().addSplits(opts.getTableName(), splits);
    }

    TextInputFormat.addInputPath(job, new Path(opts.inputDirectory));
    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.test.mrit.IntegrationTestMapReduce.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // read a list of tests from the input, and print out the results
    if (args.length != 2) {
        System.err.println("Wrong number of args: <input> <output>");
        return 1;
    }//from   ww  w .  java  2 s . com
    Configuration conf = getConf();
    Job job = Job.getInstance(conf, "accumulo integration test runner");
    conf = job.getConfiguration();

    // some tests take more than 10 minutes
    conf.setLong(MRJobConfig.TASK_TIMEOUT, 20 * 60 * 1000);

    // minicluster uses a lot of ram
    conf.setInt(MRJobConfig.MAP_MEMORY_MB, 4000);

    // hadoop puts an ancient version of jline on the classpath
    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);

    // no need to run a test multiple times
    job.setSpeculativeExecution(false);

    // read one line at a time
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 1);

    // run the test
    job.setJarByClass(IntegrationTestMapReduce.class);
    job.setMapperClass(TestMapper.class);

    // group test by result code
    job.setReducerClass(TestReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from w w w.  j  av a2 s. com
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(conf);
    //setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = Time.monotonicNow();
        job.waitForCompletion(true);
        if (!job.isSuccessful()) {
            System.out.println("Job " + job.getJobID() + " failed!");
            System.exit(1);
        }
        final double duration = (Time.monotonicNow() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

From source file:org.apache.nutch.hostdb.UpdateHostDb.java

License:Apache License

private void updateHostDb(Path hostDb, Path crawlDb, Path topHosts, boolean checkFailed, boolean checkNew,
        boolean checkKnown, boolean force, boolean filter, boolean normalize) throws Exception {

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("UpdateHostDb: starting at " + sdf.format(start));

    Job job = NutchJob.getInstance(getConf());
    Configuration conf = job.getConfiguration();
    boolean preserveBackup = conf.getBoolean("db.preserve.backup", true);
    job.setJarByClass(UpdateHostDb.class);
    job.setJobName("UpdateHostDb");

    FileSystem fs = hostDb.getFileSystem(conf);
    Path old = new Path(hostDb, "old");
    Path current = new Path(hostDb, "current");
    Path tempHostDb = new Path(hostDb, "hostdb-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // lock an existing hostdb to prevent multiple simultaneous updates
    Path lock = new Path(hostDb, LOCK_NAME);
    if (!fs.exists(current)) {
        fs.mkdirs(current);/*  w w w. j a v  a2s. c o m*/
    }
    LockUtil.createLockFile(fs, lock, false);

    MultipleInputs.addInputPath(job, current, SequenceFileInputFormat.class);

    if (topHosts != null) {
        MultipleInputs.addInputPath(job, topHosts, KeyValueTextInputFormat.class);
    }
    if (crawlDb != null) {
        // Tell the job we read from CrawlDB
        conf.setBoolean("hostdb.reading.crawldb", true);
        MultipleInputs.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME),
                SequenceFileInputFormat.class);
    }

    FileOutputFormat.setOutputPath(job, tempHostDb);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NutchWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HostDatum.class);
    job.setMapperClass(UpdateHostDbMapper.class);
    job.setReducerClass(UpdateHostDbReducer.class);
    job.setSpeculativeExecution(false);

    conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
    conf.setBoolean(HOSTDB_CHECK_FAILED, checkFailed);
    conf.setBoolean(HOSTDB_CHECK_NEW, checkNew);
    conf.setBoolean(HOSTDB_CHECK_KNOWN, checkKnown);
    conf.setBoolean(HOSTDB_FORCE_CHECK, force);
    conf.setBoolean(HOSTDB_URL_FILTERING, filter);
    conf.setBoolean(HOSTDB_URL_NORMALIZING, normalize);
    conf.setClassLoader(Thread.currentThread().getContextClassLoader());

    try {
        boolean success = job.waitForCompletion(true);
        if (!success) {
            String message = "UpdateHostDb job did not succeed, job status:" + job.getStatus().getState()
                    + ", reason: " + job.getStatus().getFailureInfo();
            LOG.error(message);
            NutchJob.cleanupAfterFailure(tempHostDb, lock, fs);
            throw new RuntimeException(message);
        }

        FSUtils.replace(fs, old, current, true);
        FSUtils.replace(fs, current, tempHostDb, true);

        if (!preserveBackup && fs.exists(old))
            fs.delete(old, true);
    } catch (Exception e) {
        LOG.error("UpdateHostDb job failed: {}", e.getMessage());
        NutchJob.cleanupAfterFailure(tempHostDb, lock, fs);
        throw e;
    }

    LockUtil.removeLockFile(fs, lock);
    long end = System.currentTimeMillis();
    LOG.info("UpdateHostDb: finished at " + sdf.format(end) + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
}

From source file:org.apache.nutch.util.hostdb.HostDb.java

License:Apache License

private void hostDb(Path hostDb, Path crawlDb, Path topHosts, boolean checkFailed, boolean checkNew,
        boolean checkKnown, boolean force, boolean filter, boolean normalize) throws Exception {

    long start = System.currentTimeMillis();
    LOG.info("HostDb: starting at " + sdf.format(start));

    Configuration conf = getConf();
    conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
    conf.setBoolean(HOSTDB_CHECK_FAILED, checkFailed);
    conf.setBoolean(HOSTDB_CHECK_NEW, checkNew);
    conf.setBoolean(HOSTDB_CHECK_KNOWN, checkKnown);
    conf.setBoolean(HOSTDB_FORCE_CHECK, force);
    conf.setBoolean(HOSTDB_URL_FILTERING, filter);
    conf.setBoolean(HOSTDB_URL_NORMALIZING, normalize);

    // Check whether the urlfilter-domainblacklist plugin is loaded
    if (filter && "urlfilter-domainblacklist".matches(conf.get("plugin.includes"))) {
        throw new Exception("domainblacklist-urlfilter must not be enabled");
    }//from   w  w  w .  java  2 s . c  o  m

    // Check whether the urlnormalizer-host plugin is loaded
    if (normalize && "urlnormalizer-host".matches(conf.get("plugin.includes"))) {
        throw new Exception("urlnormalizer-host must not be enabled");
    }

    FileSystem fs = FileSystem.get(conf);
    Path old = new Path(hostDb, "old");
    Path current = new Path(hostDb, CURRENT_NAME);
    Path tempHostDb = new Path(hostDb, "hostdb-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // lock an existing hostdb to prevent multiple simultaneous updates
    Path lock = new Path(hostDb, LOCK_NAME);
    if (!fs.exists(current)) {
        fs.mkdirs(current);
    }
    LockUtil.createLockFile(fs, lock, false);

    Job job = new Job(conf, "HostDb " + hostDb);
    job.setJarByClass(HostDb.class);
    job.setSpeculativeExecution(false);

    MultipleInputs.addInputPath(job, current, SequenceFileInputFormat.class);

    if (topHosts != null) {
        MultipleInputs.addInputPath(job, topHosts, KeyValueTextInputFormat.class);
    }
    if (crawlDb != null) {
        // Tell the job we read from CrawlDB
        conf.setBoolean("hostdb.reading.crawldb", true);
        MultipleInputs.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME),
                SequenceFileInputFormat.class);
    }

    FileOutputFormat.setOutputPath(job, tempHostDb);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NutchWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HostDatum.class);
    job.setMapperClass(HostDbMapper.class);
    job.setReducerClass(HostDbReducer.class);

    try {
        job.waitForCompletion(true);

        FSUtils.replace(fs, old, current, true);
        FSUtils.replace(fs, current, tempHostDb, true);

        boolean preserveBackup = conf.getBoolean("db.preserve.backup", true);
        if (!preserveBackup && fs.exists(old))
            fs.delete(old, true);
    } catch (Exception e) {
        if (fs.exists(tempHostDb)) {
            fs.delete(tempHostDb, true);
        }
        LockUtil.removeLockFile(fs, lock);
        throw e;
    }

    LockUtil.removeLockFile(fs, lock);
    long end = System.currentTimeMillis();
    LOG.info("HostDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}

From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java

License:Apache License

private boolean computeExpTable() throws IOException, ClassNotFoundException, InterruptedException {
    boolean success;

    logger.info("Creating expTable");

    // The split location for the interim calculations, delete upon completion
    Path splitDir = new Path("/tmp/splits-" + queryInfo.getIdentifier());
    if (fs.exists(splitDir)) {
        fs.delete(splitDir, true);//from   w w  w.j av a  2s.co  m
    }
    // Write the query hashes to the split files
    Map<Integer, BigInteger> queryElements = query.getQueryElements();
    List<Integer> keys = new ArrayList<>(queryElements.keySet());

    int numSplits = SystemConfiguration.getIntProperty("pir.expCreationSplits", 100);
    int elementsPerSplit = queryElements.size() / numSplits; // Integral division.
    logger.info("numSplits = " + numSplits + " elementsPerSplit = " + elementsPerSplit);
    for (int i = 0; i < numSplits; ++i) {
        // Grab the range of the thread
        int start = i * elementsPerSplit;
        int stop = start + elementsPerSplit - 1;
        if (i == (numSplits - 1)) {
            stop = queryElements.size() - 1;
        }
        HDFS.writeFileIntegers(keys.subList(start, stop), fs, new Path(splitDir, "split-" + i), false);
    }

    // Run the job to generate the expTable
    // Job jobExp = new Job(mrConfig.getConfig(), "pirExp-" + pirWL.getWatchlistNum());
    Job jobExp = Job.getInstance(conf, "pirExp-" + queryInfo.getIdentifier());

    jobExp.setSpeculativeExecution(false);
    jobExp.getConfiguration().set("mapreduce.map.speculative", "false");
    jobExp.getConfiguration().set("mapreduce.reduce.speculative", "false");

    // Set the memory and heap options
    jobExp.getConfiguration().set("mapreduce.map.memory.mb",
            SystemConfiguration.getProperty("mapreduce.map.memory.mb", "10000"));
    jobExp.getConfiguration().set("mapreduce.reduce.memory.mb",
            SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "10000"));
    jobExp.getConfiguration().set("mapreduce.map.java.opts",
            SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx9000m"));
    jobExp.getConfiguration().set("mapreduce.reduce.java.opts",
            SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx9000m"));
    jobExp.getConfiguration().set("mapreduce.reduce.shuffle.parallelcopies", "5");

    jobExp.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput"));
    jobExp.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);

    jobExp.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(jobExp, splitDir);

    jobExp.setJarByClass(ExpTableMapper.class);
    jobExp.setMapperClass(ExpTableMapper.class);

    jobExp.setMapOutputKeyClass(Text.class);
    jobExp.setMapOutputValueClass(Text.class);

    // Set the reducer and output params
    int numExpLookupPartitions = SystemConfiguration.getIntProperty("pir.numExpLookupPartitions", 100);
    jobExp.setNumReduceTasks(numExpLookupPartitions);
    jobExp.setReducerClass(ExpTableReducer.class);

    // Delete the output directory if it exists
    Path outPathExp = new Path(outputDirExp);
    if (fs.exists(outPathExp)) {
        fs.delete(outPathExp, true);
    }
    jobExp.setOutputKeyClass(Text.class);
    jobExp.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(jobExp, outPathExp);
    jobExp.getConfiguration().set("mapreduce.output.textoutputformat.separator", ",");
    MultipleOutputs.addNamedOutput(jobExp, FileConst.PIR, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addNamedOutput(jobExp, FileConst.EXP, TextOutputFormat.class, Text.class, Text.class);

    // Submit job, wait for completion
    success = jobExp.waitForCompletion(true);

    // Assemble the exp table from the output
    // element_index -> fileName
    Map<Integer, String> expFileTable = new HashMap<>();
    FileStatus[] status = fs.listStatus(outPathExp);
    for (FileStatus fstat : status) {
        if (fstat.getPath().getName().startsWith(FileConst.PIR)) {
            logger.info("fstat.getPath().getName().toString() = " + fstat.getPath().getName());
            try {
                try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fstat.getPath())))) {
                    String line;
                    while ((line = br.readLine()) != null) {
                        String[] rowValTokens = line.split(","); // form is element_index,reducerNumber
                        String fileName = fstat.getPath().getParent() + "/" + FileConst.EXP + "-r-"
                                + rowValTokens[1];
                        logger.info("fileName = " + fileName);
                        expFileTable.put(Integer.parseInt(rowValTokens[0]), fileName);
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    // Place exp table in query object
    query.setExpFileBasedLookup(expFileTable);
    new HadoopFileSystemStore(fs).store(queryInputDir, query);

    logger.info("Completed creation of expTable");

    return success;
}

From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java

License:Apache License

@SuppressWarnings("unchecked")
private boolean readDataEncRows(Path outPathInit) throws Exception {
    boolean success;

    Job job = Job.getInstance(conf, "pirMR");
    job.setSpeculativeExecution(false);

    // Set the data and query schema properties
    job.getConfiguration().set("dataSchemaName", qSchema.getDataSchemaName());
    job.getConfiguration().set("data.schemas", SystemConfiguration.getProperty("data.schemas"));
    job.getConfiguration().set("query.schemas", SystemConfiguration.getProperty("query.schemas"));

    // Set the memory and heap options
    job.getConfiguration().set("mapreduce.map.memory.mb",
            SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000"));
    job.getConfiguration().set("mapreduce.reduce.memory.mb",
            SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000"));
    job.getConfiguration().set("mapreduce.map.java.opts",
            SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m"));
    job.getConfiguration().set("mapreduce.reduce.java.opts",
            SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m"));

    // Set necessary files for Mapper setup
    job.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput"));
    job.getConfiguration().set("pirMR.stopListFile", SystemConfiguration.getProperty("pir.stopListFile"));

    job.getConfiguration().set("mapreduce.map.speculative", "false");
    job.getConfiguration().set("mapreduce.reduce.speculative", "false");

    job.getConfiguration().set("pirWL.useLocalCache",
            SystemConfiguration.getProperty("pir.useLocalCache", "true"));
    job.getConfiguration().set("pirWL.limitHitsPerSelector",
            SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false"));
    job.getConfiguration().set("pirWL.maxHitsPerSelector",
            SystemConfiguration.getProperty("pir.maxHitsPerSelector", "100"));

    if (dataInputFormat.equals(InputFormatConst.ES)) {
        String jobName = "pirMR_es_" + esResource + "_" + esQuery + "_" + System.currentTimeMillis();
        job.setJobName(jobName);/* ww w .  j av a  2 s  . co  m*/

        job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes"));
        job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port"));
        job.getConfiguration().set("es.resource", esResource);
        job.getConfiguration().set("es.query", esQuery);

        job.setInputFormatClass(EsInputFormat.class);
    } else if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT)) {
        String baseQuery = SystemConfiguration.getProperty("pir.baseQuery");
        String jobName = "pirMR_base_" + baseQuery + "_" + System.currentTimeMillis();
        job.setJobName(jobName);

        job.getConfiguration().set("baseQuery", baseQuery);
        job.getConfiguration().set("query", baseQuery);
        job.getConfiguration().set("pir.allowAdHocQuerySchemas",
                SystemConfiguration.getProperty("pir.allowAdHocQuerySchemas", "false"));

        job.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);

        // Set the inputFormatClass based upon the baseInputFormat property
        String classString = SystemConfiguration.getProperty("pir.baseInputFormat");
        Class<BaseInputFormat> inputClass = (Class<BaseInputFormat>) Class.forName(classString);
        if (!Class.forName("org.apache.pirk.inputformat.hadoop.BaseInputFormat").isAssignableFrom(inputClass)) {
            throw new Exception("baseInputFormat class = " + classString + " does not extend BaseInputFormat");
        }
        job.setInputFormatClass(inputClass);

        FileInputFormat.setInputPaths(job, inputFile);
    }

    job.setJarByClass(HashSelectorsAndPartitionDataMapper.class);
    job.setMapperClass(HashSelectorsAndPartitionDataMapper.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(BytesArrayWritable.class);

    // Set the reducer and output params
    job.setNumReduceTasks(numReduceTasks);
    job.setReducerClass(RowCalcReducer.class);

    // Delete the output directory if it exists
    if (fs.exists(outPathInit)) {
        fs.delete(outPathInit, true);
    }
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(job, outPathInit);
    job.getConfiguration().set("mapreduce.output.textoutputformat.separator", ",");

    MultipleOutputs.addNamedOutput(job, FileConst.PIR, TextOutputFormat.class, LongWritable.class, Text.class);

    // Submit job, wait for completion
    success = job.waitForCompletion(true);

    return success;
}

From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java

License:Apache License

private boolean multiplyColumns(Path outPathInit, Path outPathColumnMult)
        throws IOException, ClassNotFoundException, InterruptedException {
    boolean success;

    Job columnMultJob = Job.getInstance(conf, "pir_columnMult");
    columnMultJob.setSpeculativeExecution(false);

    String columnMultJobName = "pir_columnMult";

    // Set the same job configs as for the first iteration
    columnMultJob.getConfiguration().set("mapreduce.map.memory.mb",
            SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000"));
    columnMultJob.getConfiguration().set("mapreduce.reduce.memory.mb",
            SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000"));
    columnMultJob.getConfiguration().set("mapreduce.map.java.opts",
            SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m"));
    columnMultJob.getConfiguration().set("mapreduce.reduce.java.opts",
            SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m"));

    columnMultJob.getConfiguration().set("mapreduce.map.speculative", "false");
    columnMultJob.getConfiguration().set("mapreduce.reduce.speculative", "false");
    columnMultJob.getConfiguration().set("pirMR.queryInputDir",
            SystemConfiguration.getProperty("pir.queryInput"));

    columnMultJob.setJobName(columnMultJobName);
    columnMultJob.setJarByClass(ColumnMultMapper.class);
    columnMultJob.setNumReduceTasks(numReduceTasks);

    // Set the Mapper, InputFormat, and input path
    columnMultJob.setMapperClass(ColumnMultMapper.class);
    columnMultJob.setInputFormatClass(TextInputFormat.class);

    FileStatus[] status = fs.listStatus(outPathInit);
    for (FileStatus fstat : status) {
        if (fstat.getPath().getName().startsWith(FileConst.PIR)) {
            logger.info("fstat.getPath() = " + fstat.getPath().toString());
            FileInputFormat.addInputPath(columnMultJob, fstat.getPath());
        }//from w  ww.ja  v a 2  s .  c o m
    }
    columnMultJob.setMapOutputKeyClass(LongWritable.class);
    columnMultJob.setMapOutputValueClass(Text.class);

    // Set the reducer and output options
    columnMultJob.setReducerClass(ColumnMultReducer.class);
    columnMultJob.setOutputKeyClass(LongWritable.class);
    columnMultJob.setOutputValueClass(Text.class);
    columnMultJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ",");

    // Delete the output file, if it exists
    if (fs.exists(outPathColumnMult)) {
        fs.delete(outPathColumnMult, true);
    }
    FileOutputFormat.setOutputPath(columnMultJob, outPathColumnMult);

    MultipleOutputs.addNamedOutput(columnMultJob, FileConst.PIR_COLS, TextOutputFormat.class,
            LongWritable.class, Text.class);

    // Submit job, wait for completion
    success = columnMultJob.waitForCompletion(true);

    return success;
}

From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java

License:Apache License

private boolean computeFinalResponse(Path outPathFinal)
        throws ClassNotFoundException, IOException, InterruptedException {
    boolean success;

    Job finalResponseJob = Job.getInstance(conf, "pir_finalResponse");
    finalResponseJob.setSpeculativeExecution(false);

    String finalResponseJobName = "pir_finalResponse";

    // Set the same job configs as for the first iteration
    finalResponseJob.getConfiguration().set("mapreduce.map.memory.mb",
            SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000"));
    finalResponseJob.getConfiguration().set("mapreduce.reduce.memory.mb",
            SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000"));
    finalResponseJob.getConfiguration().set("mapreduce.map.java.opts",
            SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m"));
    finalResponseJob.getConfiguration().set("mapreduce.reduce.java.opts",
            SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m"));

    finalResponseJob.getConfiguration().set("pirMR.queryInputDir",
            SystemConfiguration.getProperty("pir.queryInput"));
    finalResponseJob.getConfiguration().set("pirMR.outputFile", outputFile);

    finalResponseJob.getConfiguration().set("mapreduce.map.speculative", "false");
    finalResponseJob.getConfiguration().set("mapreduce.reduce.speculative", "false");

    finalResponseJob.setJobName(finalResponseJobName);
    finalResponseJob.setJarByClass(ColumnMultMapper.class);
    finalResponseJob.setNumReduceTasks(1);

    // Set the Mapper, InputFormat, and input path
    finalResponseJob.setMapperClass(ColumnMultMapper.class);
    finalResponseJob.setInputFormatClass(TextInputFormat.class);

    FileStatus[] status = fs.listStatus(new Path(outputDirColumnMult));
    for (FileStatus fstat : status) {
        if (fstat.getPath().getName().startsWith(FileConst.PIR_COLS)) {
            logger.info("fstat.getPath() = " + fstat.getPath().toString());
            FileInputFormat.addInputPath(finalResponseJob, fstat.getPath());
        }/*from   w  ww . j  av a2s. c  o  m*/
    }
    finalResponseJob.setMapOutputKeyClass(LongWritable.class);
    finalResponseJob.setMapOutputValueClass(Text.class);

    // Set the reducer and output options
    finalResponseJob.setReducerClass(FinalResponseReducer.class);
    finalResponseJob.setOutputKeyClass(LongWritable.class);
    finalResponseJob.setOutputValueClass(Text.class);
    finalResponseJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ",");

    // Delete the output file, if it exists
    if (fs.exists(outPathFinal)) {
        fs.delete(outPathFinal, true);
    }
    FileOutputFormat.setOutputPath(finalResponseJob, outPathFinal);
    MultipleOutputs.addNamedOutput(finalResponseJob, FileConst.PIR_FINAL, TextOutputFormat.class,
            LongWritable.class, Text.class);

    // Submit job, wait for completion
    success = finalResponseJob.waitForCompletion(true);

    return success;
}

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

@VisibleForTesting
public Job createJob(int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime,
        int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime,
        int iReduceSleepCount) throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    conf.setInt(IREDUCE_TASKS_COUNT, numIReducer);

    // Configure intermediate reduces
    conf.setInt(org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount);
    LOG.info("Running MRR with " + iReduceStagesCount + " IR stages");

    for (int i = 1; i <= iReduceStagesCount; ++i) {
        // Set reducer class for intermediate reduce
        conf.setClass(/*from w  w w .  ja  v  a2  s  .c o m*/
                MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduce.class"),
                ISleepReducer.class, Reducer.class);
        // Set reducer output key class
        conf.setClass(
                MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.key.class"),
                IntWritable.class, Object.class);
        // Set reducer output value class
        conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i,
                "mapreduce.map.output.value.class"), IntWritable.class, Object.class);
        conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"),
                numIReducer);
    }

    Job job = Job.getInstance(conf, "sleep");
    job.setNumReduceTasks(numReducer);
    job.setJarByClass(MRRSleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(MRRSleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");

    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
}