List of usage examples for org.apache.hadoop.mapreduce Job getJar
public String getJar()
From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { HBaseConfiguration.addHbaseResources(getConf()); Configuration conf = getConf(); String quorum = conf.get("hbase.zookeeper.quorum"); String clientPort = conf.get("hbase.zookeeper.property.clientPort"); LOG.info("hbase.zookeeper.quorum=" + quorum); LOG.info("hbase.zookeeper.property.clientPort=" + clientPort); LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat")); CommandLine cmdLine = null;// w ww. j ava 2 s . c om try { cmdLine = parseOptions(args); LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort)); } catch (IllegalStateException e) { printHelpAndExit(e.getMessage(), getOptions()); } Class.forName(DriverManager.class.getName()); Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort)); String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt()); String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt()); String qualifiedTableName = getQualifiedTableName(schemaName, tableName); List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName); LOG.info("tableName=" + tableName); LOG.info("schemaName=" + schemaName); LOG.info("qualifiedTableName=" + qualifiedTableName); configureOptions(cmdLine, importColumns, getConf()); try { validateTable(conn, schemaName, tableName); } finally { conn.close(); } Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt())); Path outputPath = null; if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) { outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt())); } else { outputPath = new Path("/tmp/" + UUID.randomUUID()); } LOG.info("Configuring HFile output path to {}", outputPath); Job job = new Job(getConf(), "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY)); // Allow overriding the job jar setting by using a -D system property at startup if (job.getJar() == null) { job.setJarByClass(PhoenixCsvToKeyValueMapper.class); } job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileSystem.get(getConf()); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(PhoenixCsvToKeyValueMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable htable = new HTable(getConf(), qualifiedTableName); // Auto configure partitioner and reducer according to the Main Data table HFileOutputFormat.configureIncrementalLoad(job, htable); LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath); boolean success = job.waitForCompletion(true); if (!success) { LOG.error("Import job failed, check JobTracker for details"); return 1; } LOG.info("Loading HFiles from {}", outputPath); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf()); loader.doBulkLoad(outputPath, htable); htable.close(); LOG.info("Incremental load complete"); LOG.info("Removing output directory {}", outputPath); if (!FileSystem.get(getConf()).delete(outputPath, true)) { LOG.error("Removing output directory {} failed", outputPath); } return 0; }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }/*from w ww. ja v a 2 s . c om*/ job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }
From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); handleCommandLine(args, conf);/*from w w w . j a va 2s . c om*/ HadoopUtils.setMapAttempts(conf, 1); adjustConfigurationForHive(conf); HiveTools.setupJob(conf); Job job = new Job(conf, "hive-io-writing"); if (job.getJar() == null) { job.setJarByClass(getClass()); } job.setMapperClass(SampleMapper.class); job.setInputFormatClass(SampleInputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(HiveWritableRecord.class); job.setOutputFormatClass(SampleOutputFormat.class); job.setNumReduceTasks(0); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.nnapz.hbaseexplorer.mr.TableStats.java
License:Apache License
/** * M/R Job setup. No reduce.// www . j av a 2 s . c o m * * @param conf a suitable hadoop+hbase configuration * @param tableName the table we want to get stats from * @return the Job object, to be started * @throws java.io.IOException any hadoop IO problem */ public static Job createSubmittableJob(Configuration conf, String tableName) throws IOException { Job job = new Job(conf, NAME + "_" + tableName); if (job.getJar() == null) { job.setJarByClass(TableStats.class); // otherwise set in conf already } Scan scan = new Scan(); scan.setMaxVersions(10000); // todo fixme TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCountMapper.class, Text.class, Result.class, job); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); return job; }
From source file:de.bankmark.bigbench.queries.q18.MRlinearRegression.java
License:Apache License
private void usage(Job job) { String jar = job.getJar() != null ? job.getJar() : " jarName.jar "; System.err.println("Usage:\n hadoop jar " + jar + " " + this.getClass().getName() + " <inDir> <outDir> \n<inDir> file format: <double x> <double y> \n<outDir> file format: <double intercept> <double slope>"); }
From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java
License:Apache License
private void usage(Job job) { String jar = job.getJar() != null ? job.getJar() : " jarName.jar "; System.err.println("Usage:\n hadoop jar " + jar + " " + this.getClass().getName() + " <inDir> <outDir> \n<inDir> file format: tab separated csv: <documentID> <text> \n<outDir> file format Sequencefile: <Text documentID> <Text content>"); }
From source file:io.apigee.lembos.mapreduce.LembosMapReduceRunner.java
License:Apache License
/** * Returns a properly configured, ready to run Hadoop {@link Job}. * * @param args the command line arguments as supported by {@link GenericOptionsParser} * * @return the configured job/* ww w. j av a 2s . c o m*/ * * @throws IOException if there is a problem creating the job * @throws ExecutionException if there is an issue running the Node.js module * @throws InterruptedException if the execution of the Node.js module gets interrupted * @throws NodeException if there is an issue with the Node.js module */ public Job initJob(final String[] args) throws ExecutionException, InterruptedException, IOException, NodeException { final GenericOptionsParser gop = new GenericOptionsParser(args); // If ran from ToolRunner, conf should already be set but if not, set it manually if (conf == null) { setConf(gop.getConfiguration()); } // Load the Hadoop FS URL handler RunnerUtils.loadFsUrlStreamHandler(getConf()); // Persist the non-Runner CLI arguments conf.setStrings(LembosConstants.MR_MODULE_ARGS, gop.getRemainingArgs()); // Package the Node.js module and prepare it to be submitted with the Job RunnerUtils.prepareModuleForJob(conf); // Add "-libjars" to the current ClassLoader if necessary RunnerUtils.addLibJarsToClassLoader(conf); // Create Node.js environment for local use mrEnv = LembosMapReduceEnvironment.fromConf(conf); if (JavaScriptUtils.isDefined(mrEnv.getConfiguration())) { for (final Map.Entry<Object, Object> propertyEntry : mrEnv.getConfiguration().entrySet()) { final String key = propertyEntry.getKey().toString(); final Writable value = ConversionUtils.jsToWritable(propertyEntry.getValue(), mrEnv.getModule()); // Do not set these as we'll be setting them later from values we were passed from the CLI if (key.equals(LembosConstants.MR_MODULE_NAME)) { continue; } if (value instanceof BooleanWritable) { conf.setBoolean(key, ((BooleanWritable) value).get()); } else if (value instanceof DoubleWritable || value instanceof FloatWritable) { conf.setFloat(key, Float.valueOf(value.toString())); } else if (value instanceof IntWritable) { conf.setInt(key, ((IntWritable) value).get()); } else if (value instanceof LongWritable) { conf.setLong(key, ((LongWritable) value).get()); } else if (value instanceof Text) { conf.set(key, value.toString()); } else { System.err.println("Cannot convert JavaScript (" + value.getClass().getName() + ") to Configuration, using String"); conf.set(key, value.toString()); } } } // Create Job final String jobName = "LembosMapReduceJob-" + mrEnv.getModuleName(); final Job job = new Job(conf, jobName); jobWrapper = JobWrap.getInstance(mrEnv.getRuntime(), job); if (JavaScriptUtils.isDefined(mrEnv.getJobSetupFunction())) { mrEnv.callFunctionSync(mrEnv.getJobSetupFunction(), new Object[] { jobWrapper }); } // Always set the mapper job.setMapperClass(LembosMapper.class); // Conditionally set the combiner if (JavaScriptUtils.isDefined(mrEnv.getCombineFunction())) { job.setCombinerClass(LembosCombiner.class); } // Conditionally set the group comparator if (JavaScriptUtils.isDefined(mrEnv.getGroupFunction())) { job.setGroupingComparatorClass(LembosGroupComparator.class); } // Conditionally set the partitioner if (JavaScriptUtils.isDefined(mrEnv.getPartitionFunction())) { job.setPartitionerClass(LembosPartitioner.class); } // Conditionally set the reducer if (JavaScriptUtils.isDefined(mrEnv.getReduceFunction())) { job.setReducerClass(LembosReducer.class); } else { job.setNumReduceTasks(0); } // Conditionally set the sort comparator if (JavaScriptUtils.isDefined(mrEnv.getSortFunction())) { job.setSortComparatorClass(LembosSortComparator.class); } // This could potentially be unsafe but for testing, we need to set this based on the path to the built JAR if (job.getJar() == null) { job.setJarByClass(LembosMapReduceRunner.class); } // MapReduce configuration reference: // // http://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml // org.apache.hadoop.mapreduce.MRConfig // org.apache.hadoop.mapreduce.MRJobConfig return job; }
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();//from w w w . ja v a 2 s . c o m return 1; } try { Job job = new Job(new Configuration()); job.setJobName(getClass().getName()); Configuration conf = job.getConfiguration(); CommandLine results = cli.parse(conf, args); if (results.hasOption("input")) { Path path = new Path(results.getOptionValue("input")); FileInputFormat.setInputPaths(job, path); } if (results.hasOption("output")) { Path path = new Path(results.getOptionValue("output")); FileOutputFormat.setOutputPath(job, path); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("inputformat")) { explicitInputFormat = true; setIsJavaRecordReader(conf, true); job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class)); } if (results.hasOption("javareader")) { setIsJavaRecordReader(conf, true); } if (results.hasOption("map")) { setIsJavaMapper(conf, true); job.setMapperClass(getClass(results, "map", conf, Mapper.class)); } if (results.hasOption("partitioner")) { job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class)); } if (results.hasOption("reduce")) { setIsJavaReducer(conf, true); job.setReducerClass(getClass(results, "reduce", conf, Reducer.class)); } if (results.hasOption("reduces")) { job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces"))); } if (results.hasOption("writer")) { explicitOutputFormat = true; setIsJavaRecordWriter(conf, true); job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class)); } if (results.hasOption("lazyOutput")) { if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) { LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass()); } } if (results.hasOption("avroInput")) { avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase()); } if (results.hasOption("avroOutput")) { avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase()); } if (results.hasOption("program")) { setExecutable(conf, results.getOptionValue("program")); } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() }; // FindBugs complains that creating a URLClassLoader should be // in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { public ClassLoader run() { return new URLClassLoader(urls); } }); conf.setClassLoader(loader); } setupPipesJob(job); return job.waitForCompletion(true) ? 0 : 1; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:org.apache.accumulo.server.test.randomwalk.multitable.CopyTool.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf(), this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); if (job.getJar() == null) { log.error("M/R requires a jar file! Run mvn package."); return 1; }//from www. j a v a 2 s .co m job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[0], args[1].getBytes(), args[2], new Authorizations()); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]); job.setMapperClass(SeqMapClass.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[0], args[1].getBytes(), true, args[5]); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.server.test.randomwalk.sequential.MapRedVerifyTool.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf(), this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); if (job.getJar() == null) { log.error("M/R requires a jar file! Run mvn package."); return 1; }/*from w w w. ja v a 2 s .c o m*/ job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[0], args[1].getBytes(), args[2], new Authorizations()); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]); job.setMapperClass(SeqMapClass.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SeqReduceClass.class); job.setNumReduceTasks(1); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[0], args[1].getBytes(), true, args[5]); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }