Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package hadoopProcesses; import hdfsIO.fileInteractions; import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.mapred.WordCount; /** * * @author madhatter */ public class testJob { public static void start(String[] args) { try { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); map Map = new map(); conf.setMapperClass(Map.getClass()); reducer Reduce = new reducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); FileSystem FS = FileSystem.get(conf); Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000"); if (FS.exists(src)) { System.out.println("\t\t------ Results ------ "); /* BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src))); String line; line = br.readLine(); while (line != null) { System.out.println("\t" + line); line = br.readLine(); } */ List<String> FileList = (new fileInteractions()).readLines(src, conf); for (String LocString : FileList) { System.out.println(LocString); } } } catch (Exception Exp) { Exp.printStackTrace(); } } } // otomatik altrma /* // create a configuration Configuration conf = new Configuration(); // create a new job based on the configuration Job job = new Job(conf); // here you have to put your mapper class job.setMapperClass(Mapper.class); // here you have to put your reducer class job.setReducerClass(Reducer.class); // here you have to set the jar which is containing your // map/reduce class, so you can use the mapper class job.setJarByClass(Mapper.class); // key/value of your reducer output job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // this is setting the format of your input, can be TextInputFormat job.setInputFormatClass(SequenceFileInputFormat.class); // same with output job.setOutputFormatClass(TextOutputFormat.class); // here you can set the path of your input SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/")); // this deletes possible output paths to prevent job failures FileSystem fs = FileSystem.get(conf); Path out = new Path("files/out/processed/"); fs.delete(out, true); // finally set the empty out path TextOutputFormat.setOutputPath(job, out); // this waits until the job completes and prints debug out to STDOUT or whatever // has been configured in your log4j properties. job.waitForCompletion(true); // this should be like defined in your mapred-site.xml conf.set("mapred.job.tracker", "jobtracker.com:50001"); // like defined in hdfs-site.xml conf.set("fs.default.name", "hdfs://namenode.com:9000"); This should be no problem when the hadoop-core.jar is in your application containers classpath. But I think you should put some kind of progress indicator to your web page, because it may take minutes to hours to complete a hadoop job ;) For YARN (> Hadoop 2) For YARN, the following configurations need to be set. // this should be like defined in your yarn-site.xml conf.set("yarn.resourcemanager.address", "yarn-manager.com:50001"); // framework is now "yarn", should be defined like this in mapred-site.xm conf.set("mapreduce.framework.name", "yarn"); // like defined in hdfs-site.xml conf.set("fs.default.name", "hdfs://namenode.com:9000"); */