Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:ar.edu.ungs.garules.CensusJob.java

License:Apache License

/**
 * Main -> Ejecucion del proceso/*w  w  w.  j  a v a2 s . c o  m*/
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {

    long time = System.currentTimeMillis();
    Individual<BitSet> bestInd = null;
    if (args.length != 2)
        args = DEFAULT_ARGS;

    // Preparacion del GA
    // --------------------------------------------------------------------------------------------------------------
    Set<Individual<BitSet>> bestIndividuals = new HashSet<Individual<BitSet>>();
    List<Gene> genes = new ArrayList<Gene>();
    genes.add(genCondicionACampo);
    genes.add(genCondicionAOperador);
    genes.add(genCondicionAValor);
    genes.add(genCondicionBPresente);
    genes.add(genCondicionBCampo);
    genes.add(genCondicionBOperador);
    genes.add(genCondicionBValor);
    genes.add(genCondicionCPresente);
    genes.add(genCondicionCCampo);
    genes.add(genCondicionCOperador);
    genes.add(genCondicionCValor);
    genes.add(genPrediccionCampo);
    genes.add(genPrediccionValor);

    Map<Gene, Ribosome<BitSet>> translators = new HashMap<Gene, Ribosome<BitSet>>();
    for (Gene gene : genes)
        translators.put(gene, new BitSetToIntegerRibosome(0));

    Genome<BitSet> genome = new BitSetGenome("Chromosome 1", genes, translators);

    Parameter<BitSet> par = new Parameter<BitSet>(0.035, 0.9, 200, new DescendantAcceptEvaluator<BitSet>(),
            new CensusFitnessEvaluator(), new BitSetOnePointCrossover(), new BitSetFlipMutator(), null,
            new BitSetRandomPopulationInitializer(), null, new ProbabilisticRouletteSelector(),
            new GlobalSinglePopulation<BitSet>(genome), 500, 100d, new BitSetMorphogenesisAgent(), genome);

    ParallelFitnessEvaluationGA<BitSet> ga = new ParallelFitnessEvaluationGA<BitSet>(par);
    ga.init();
    // --------------------------------------------------------------------------------------------------------------
    // Fin de Preparacion del GA

    // Itera hasta el maximo de generaciones permitidas 
    for (int i = 0; i < par.getMaxGenerations(); i++) {
        ga.initGeneration();
        Configuration conf = new Configuration();

        // Debug
        //showPopulation(ga.getPopulation());
        //System.out.println((System.currentTimeMillis()-time)/1000 + "s transcurridos desde el inicio");

        // Pasamos como parmetro las condiciones a evaluar
        Iterator<Individual<BitSet>> ite = ga.getPopulation().iterator();
        int contador = 0;
        Set<String> expUnicas = new HashSet<String>();
        while (ite.hasNext()) {
            Individual<BitSet> ind = ite.next();
            String rep = RuleStringAdaptor.adapt(RuleAdaptor.adapt(ind));
            expUnicas.add(rep);
        }
        for (String rep : expUnicas)
            if (ocurrencias.get(rep) == null) {
                conf.set(String.valueOf(contador), rep);
                contador++;
            }

        // Configuracion del job i
        Job job = new Job(conf, "GA rules - Generation " + i);
        job.setJarByClass(CensusJob.class);
        job.setMapperClass(CensusMapper.class);
        job.setCombinerClass(CensusReducer.class);
        job.setReducerClass(CensusReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1] + "g" + i));

        // Corrida del trabajo map-reduce representando a la generacion i
        job.waitForCompletion(true);

        // Aca calculamos el fitness en base a lo que arrojo el job y si hay un mejor individuo lo agregamos al set de mejores individuos....  
        llenarOcurrencias(conf, args[1] + "g" + i);

        // Corremos GA para la generacion.
        Individual<BitSet> winnerGen = ga.run(new CensusFitnessEvaluator(ocurrencias));

        // Mantenemos los mejores individuos
        if (bestInd == null) {
            bestInd = winnerGen;
            bestIndividuals.add(winnerGen);
        } else if (winnerGen.getFitness() > bestInd.getFitness()) {
            bestInd = winnerGen;
            bestIndividuals.add(winnerGen);
        }

        // Debug
        System.out.println("Mejor Individuo Generacion " + i + " => " + RuleAdaptor.adapt(bestInd)
                + " => Fitness = " + bestInd.getFitness());

    }

    // Ordenamos y mostramos los mejores individuos
    List<Individual<BitSet>> bestIndList = new ArrayList<Individual<BitSet>>(bestIndividuals);
    Collections.sort(bestIndList, new Comparator<Individual<BitSet>>() {
        public int compare(Individual<BitSet> o1, Individual<BitSet> o2) {
            return (o1.getFitness() > o2.getFitness() ? -1 : (o1.getFitness() == o2.getFitness() ? 0 : 1));
        }
    });
    showPopulation(bestIndList);
    System.out.println("Tiempo total de corrida " + (System.currentTimeMillis() - time) / 1000 + "s");

}

From source file:AshleyIngram.FYP.Hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w  ww  . ja v  a2  s .c  o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:assignment1.WordCount.LinkedSort.LinkedSort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: hadoop jar This.jar <in> [<in>...] <out>");
        System.exit(2);// ww  w  .j a  va  2 s  .c om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(LinkedSort.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setPartitionerClass(SortPartitioner.class);
    job.setOutputKeyClass(WordAndLength.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(2);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:assignment1.WordCount.WordCountInMap.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: hadoop jar This.jar <in> [<in>...] <out>");
        System.exit(2);/*from w  w w. j a  v  a 2 s. c o  m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(assignment1.WordCount.WordCountInMap.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:assignment1.WordCount.WordSort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: hadoop jar This.jar <in> [<in>...] <out>");
        System.exit(2);/*from  w  ww  .  jav  a2  s.  c  o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordSort.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setPartitionerClass(SortPartitioner.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(2);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:AverageProj.AveragePrice.java

public int run(String[] args) throws Exception, ClassNotFoundException {
    Configuration conf = getConf();
    Job job = new Job(conf, "Avg");
    job.setJarByClass(AveragePrice.class);
    //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setMapperClass(AvMapper.class);
    job.setMapOutputKeyClass(YearPrice.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setGroupingComparatorClass(YearSymComparator.class);
    // job.setCombinerClass(AvReducer.class);
    job.setReducerClass(AvReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(YearPrice.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setPartitionerClass(AvgPartitioner.class);
    job.setNumReduceTasks(7);/*w w w.ja  va 2  s. c  o m*/
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:azkaban.jobtype.SecurePigWrapper.java

License:Apache License

public static void main(final String[] args) throws IOException, InterruptedException {
    final Logger logger = Logger.getRootLogger();
    final Properties p = System.getProperties();
    final Configuration conf = new Configuration();

    SecurityUtils.getProxiedUser(p, logger, conf).doAs(new PrivilegedExceptionAction<Void>() {
        @Override//from www  .  ja  v  a 2s  .c  o m
        public Void run() throws Exception {
            prefetchToken();
            org.apache.pig.Main.main(args);
            return null;
        }

        // For Pig jobs that need to do extra communication with the
        // JobTracker, it's necessary to pre-fetch a token and include it in
        // the credentials cache
        private void prefetchToken() throws InterruptedException, IOException {
            String shouldPrefetch = p.getProperty(OBTAIN_BINARY_TOKEN);
            if (shouldPrefetch != null && shouldPrefetch.equals("true")) {
                logger.info("Pre-fetching token");
                Job job = new Job(conf, "totally phony, extremely fake, not real job");

                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                logger.info("Pre-fetching: Got new JobClient: " + jc);
                Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("hi"));
                job.getCredentials().addToken(new Text("howdy"), mrdt);

                File temp = File.createTempFile("mr-azkaban", ".token");
                temp.deleteOnExit();

                FileOutputStream fos = null;
                DataOutputStream dos = null;
                try {
                    fos = new FileOutputStream(temp);
                    dos = new DataOutputStream(fos);
                    job.getCredentials().writeTokenStorageToStream(dos);
                } finally {
                    if (dos != null) {
                        dos.close();
                    }
                    if (fos != null) {
                        fos.close();
                    }
                }
                logger.info("Setting " + MAPREDUCE_JOB_CREDENTIALS_BINARY + " to " + temp.getAbsolutePath());
                System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, temp.getAbsolutePath());
            } else {
                logger.info("Not pre-fetching token");
            }
        }
    });
}

From source file:azkaban.security.commons.SecurityUtils.java

License:Apache License

public static synchronized void prefetchToken(final File tokenFile, final Props p, final Logger logger)
        throws InterruptedException, IOException {

    final Configuration conf = new Configuration();
    logger.info("Getting proxy user for " + p.getString(TO_PROXY));
    logger.info("Getting proxy user for " + p.toString());

    getProxiedUser(p.toProperties(), logger, conf).doAs(new PrivilegedExceptionAction<Void>() {
        @Override/*from   ww w .j a v a2s  .  c o  m*/
        public Void run() throws Exception {
            getToken(p);
            return null;
        }

        private void getToken(Props p) throws InterruptedException, IOException {
            String shouldPrefetch = p.getString(OBTAIN_BINARY_TOKEN);
            if (shouldPrefetch != null && shouldPrefetch.equals("true")) {
                logger.info("Pre-fetching token");

                logger.info("Pre-fetching fs token");
                FileSystem fs = FileSystem.get(conf);
                Token<?> fsToken = fs.getDelegationToken(p.getString("user.to.proxy"));
                logger.info("Created token: " + fsToken.toString());

                Job job = new Job(conf, "totally phony, extremely fake, not real job");
                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                logger.info("Pre-fetching job token: Got new JobClient: " + jc);
                Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("hi"));
                logger.info("Created token: " + mrdt.toString());

                job.getCredentials().addToken(new Text("howdy"), mrdt);
                job.getCredentials().addToken(fsToken.getService(), fsToken);

                FileOutputStream fos = null;
                DataOutputStream dos = null;
                try {
                    fos = new FileOutputStream(tokenFile);
                    dos = new DataOutputStream(fos);
                    job.getCredentials().writeTokenStorageToStream(dos);
                } finally {
                    if (dos != null) {
                        dos.close();
                    }
                    if (fos != null) {
                        fos.close();
                    }
                }
                logger.info("Loading hadoop tokens into " + tokenFile.getAbsolutePath());
                p.put("HadoopTokenFileLoc", tokenFile.getAbsolutePath());
            } else {
                logger.info("Not pre-fetching token");
            }
        }
    });
}

From source file:bb.BranchAndBound.java

License:Apache License

static Job getJob(String input, String output, String dataDir, int iteration) throws Exception {
    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);
    FileStatus[] fileStatus = hdfs.listStatus(new Path(input));
    for (int i = 0; i < fileStatus.length; ++i) {
        if (fileStatus[i].getLen() == 0) {
            hdfs.delete(fileStatus[i].getPath());
        }//  w  w w.ja v a 2  s .c om
    }
    DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf);
    Job ret = new Job(conf, dataDir + "_iteration_" + iteration);
    ret.setJarByClass(BranchAndBound.class);
    ret.setMapperClass(BBMapper1.class);
    ret.setReducerClass(BBReducer.class);
    //ret.setReducerClass(MergeReducer.class);
    FileInputFormat.setInputPaths(ret, new Path(input));
    //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864);
    FileOutputFormat.setOutputPath(ret, new Path(output));
    ret.setOutputKeyClass(NullWritable.class);
    ret.setOutputValueClass(Text.class);
    return ret;
}

From source file:binningbycategories.BinningbyCategories.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception//from   w w w. j  ava 2  s  .  com
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Binning");
    job.setJarByClass(BinningbyCategories.class);
    job.setMapperClass(YouTubeBinMapper.class);
    job.setNumReduceTasks(0);

    TextInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Configure the MultipleOutputs by adding an output called "bins"
    // With the proper output format and mapper key/value pairs
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);

    // Enable the counters for the job
    // If there is a significant number of different named outputs, this
    // should be disabled
    MultipleOutputs.setCountersEnabled(job, true);

    System.exit(job.waitForCompletion(true) ? 0 : 2);
}