List of usage examples for org.apache.hadoop.mapreduce Job getWorkingDirectory
public Path getWorkingDirectory() throws IOException
From source file:smile.wide.InferenceDriver.java
License:Apache License
/** Main per-instance inference driver. * Takes 2 arguments "on the command line" - input and output path. * /* ww w . j a v a 2s. c om*/ * Relevant configuration parameters: * - xdata.bayesnets.smile.library.path * - xdata.bayesnets.networkfile * - xdata.bayesnets.datasetreader.class * - xdata.bayesnets.datasetreader.filter * - xdata.bayesnets.datasetreader.variablenames * - xdata.bayesnets.datasetreader.instid * - xdata.bayesnets.queryvariable * * TODO: update the driver to answer a set of queries, not just one * */ @Override public int run(String[] filteredargs) throws Exception { try { // retrieve the input and output paths if (filteredargs.length != 2) { System.err.println("Usage: InferenceDriver <input-path> <output-dir>"); return -1; } inPath_ = filteredargs[filteredargs.length - 2]; outPath_ = filteredargs[filteredargs.length - 1]; // locate the native libraries Configuration conf = getConf(); String configuredLibHDFSPath_ = conf_.get("xdata.bayesnets.smile.library.path"); if (configuredLibHDFSPath_ == null || configuredLibHDFSPath_.isEmpty()) { s_logger.warn("SMILE library path defaulting to " + libHDFSPath_); s_logger.warn("Set xdata.bayesnets.smile.library.path to change. "); } else { libHDFSPath_ = configuredLibHDFSPath_; } // put the libraries in the job working dir DistributedCache.createSymlink(conf_); try { DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/smile.jar#smile.jar"), conf_); DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/libjsmile.so#libjsmile.so"), conf_); DistributedCache.addCacheFile(new URI(networkFileHDFSPath_ + "#" + basename(networkFileHDFSPath_)), conf_); } catch (URISyntaxException e) { s_logger.fatal("Bad URL for network file."); return -12; } // assume the network and data is configured already - for example /* conf_.set("xdata.bayesnets.networkfile", basename(modifiedNetwork_)); conf_.set("xdata.bayesnets.datasetreader.class", FacebookCSVReader.class.getName()); conf_.set("xdata.bayesnets.datasetreader.filter", "3,5,7,10,11,12" ); conf_.set("xdata.bayesnets.datasetreader.variablenames", "FirstName,MiddleName,Sex,IsAppUser,LikesCount,FriendCount" ); conf_.set("xdata.bayesnets.datasetreader.instid", "1"); conf_.set("xdata.bayesnets.queryvariable", "Age"); */ Job job = new Job(conf); job.setJarByClass(InferenceDriver.class); job.setJobName("SMILE-WIDE Inference"); FileInputFormat.addInputPath(job, new Path(inPath_)); FileOutputFormat.setOutputPath(job, new Path(outPath_)); job.setMapperClass(PerInstanceInferenceMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleArrayWritable.class); s_logger.info("Job working directory is " + job.getWorkingDirectory()); System.exit(job.waitForCompletion(true) ? 3 : 0); } catch (IOException e) { System.err.println("Something went badly wrong in IO."); System.exit(2); } catch (InterruptedException e) { System.err.println("Job interrupted."); e.printStackTrace(); } catch (ClassNotFoundException e) { System.err.println("ClassNotFound exception."); e.printStackTrace(); } return 0; }
From source file:smile.wide.Network.java
License:Apache License
/** * Runs inference in parallel on a large dataset. The result is stored as a two dimensional array - * major dimension corresponding to the instance, and minor to the query variable value. * /*w w w . j ava 2s. co m*/ * Result can be retrieved by calling inferenceResult(). * * Preconditions for correct use: * - all variables in the evidence set are present in the BN * - the query variable is present in the BN * - the names of all variables in the evidence set are names of columns in the DataSet * (if the variable is not in the evidence, why condition on it?) * * * @param dataset The dataset to run inference on * @param q The query to ask of each instance */ public void infer(DataSet dataset, DataSetReader<?, ?> reader, BNQuery q) { InferenceDriver id = new InferenceDriver(); conf_ = id.getConf(); // get the inference driver's config and set it up // find out the working location try { Job j = new Job(conf_); jobHDFSPath_ = j.getWorkingDirectory(); } catch (IOException e1) { e1.printStackTrace(); return; } int r = (new Random()).nextInt(); tempDir_ = new Path(jobHDFSPath_ + "/tmp/infresult_" + r); // write the network somewhere into HDFS - relies on two subsequent jobs // starting in the same directory, usually user home // TODO: communicate the actual location to the subsequent job? String name = hiddenNetwork_.getName() + ".xdsl"; try { hiddenNetwork_.writeFile("/tmp/" + name); FileSystem fs = FileSystem.get(conf_); fs.mkdirs(new Path(jobHDFSPath_ + "/tmp/")); fs.moveFromLocalFile(new Path(name), new Path(jobHDFSPath_ + "/tmp/" + name)); } catch (IOException e) { logger_.error("I/O Error recording the Bayes network " + name + " to " + jobHDFSPath_ + "/tmp/" + name); e.printStackTrace(); } conf_.set("xdata.bayesnets.networkfile", jobHDFSPath_ + "/tmp/" + name); // tell the driver the reader class conf_.set("xdata.bayesnets.datasetreader.class", reader.getClass().getName()); // pull out the column indices that correspond to the query variables // DataSet should know which columns are which ArrayList<String> evVars = q.getEvidenceVars(); String qvar = q.getQueryVar(); int[] colIndices = new int[evVars.size() + 1]; String[] colNames = new String[evVars.size() + 1]; colIndices[0] = dataset.indexOfColumn(qvar); for (int i = 1; i <= evVars.size(); ++i) { colIndices[i] = dataset.indexOfColumn(evVars.get(i - 1)); colNames[i] = evVars.get(i - 1); } Arrays.sort(colIndices); conf_.set("xdata.bayesnets.datasetreader.filter", concat(colIndices, ",")); // name the variables to which the dataset columns map conf_.set("xdata.bayesnets.datasetreader.variablenames", concat(colNames, ",")); int instID = dataset.instanceIDColumnIndex(); if (instID == -1) { logger_.error("No instance ID column index in dataset " + dataset.getName() + ". Dataset must provide one for inference."); return; } conf_.set("xdata.bayesnets.datasetreader.instid", "" + instID); conf_.set("xdata.bayesnets.queryvariable", q.getQueryVar()); // arguments for the inference driver are the location of the dataset and where // to write String[] args = new String[2]; // 2 arguments args[0] = dataset.location().toString(); args[1] = tempDir_.toString(); try { id.setConf(conf_); // make sure we are not setting up a side copy of the conf... ToolRunner.run(id, args); } catch (Exception e) { logger_.error("Something went wrong in executing the inference job"); e.printStackTrace(); } }
From source file:smile.wide.obsolete.InferenceDriver.java
License:Apache License
@Override public int run(String[] filteredargs) throws Exception { String inPath;/*from w ww .j a v a 2s . c om*/ String outPath; if (filteredargs.length != 2) { System.err.println("Usage: InferenceDriver <infile> <outfile>"); String yousaid = ""; for (String s : filteredargs) { yousaid += s + " "; } System.err.println("You said to the driver: " + yousaid); System.err.println("Are those generic arguments supposed to be there?"); } inPath = filteredargs[filteredargs.length - 2]; outPath = filteredargs[filteredargs.length - 1]; try { Configuration conf = getConf(); conf.set("keep.failed.task.files", "true"); conf.set("keep.failed.task.pattern", "*"); /* we'll do this through "-libjars" now DistributedCache.createSymlink(conf); DistributedCache.addCacheArchive( new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/smile.jar#smile.jar"), conf); DistributedCache.addCacheFile( new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/libjsmile.so#libjsmile.so"), conf); System.out.println("Create symlinks: " + DistributedCache.getSymlink(conf)); */ Job job = new Job(conf); job.setJarByClass(InferenceJob.class); job.setJobName("SMILE Inference test"); FileInputFormat.addInputPath(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setMapperClass(InferenceMapper.class); job.setReducerClass(InferenceReducer.class); // set both the map and reduce in/out classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // but redefine them for the mapper job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleArrayWritable.class); s_logger.info("Job working directory is " + job.getWorkingDirectory()); System.exit(job.waitForCompletion(true) ? 3 : 0); } catch (IOException e) { System.err.println("Something went badly wrong in IO."); System.exit(2); } catch (InterruptedException e) { System.err.println("Job interrupted."); e.printStackTrace(); } catch (ClassNotFoundException e) { System.err.println("ClassNotFound exception."); e.printStackTrace(); } return 0; }
From source file:smile.wide.obsolete.InferenceJob.java
License:Apache License
public static void main(String[] aaaarghs) throws URISyntaxException { if (aaaarghs.length != 2) { System.err.println("Usage: InferenceJob <infile> <outfile>"); System.exit(1);// w w w . ja v a 2 s. c om } try { JobConf conf = new JobConf(); DistributedCache.createSymlink(conf); DistributedCache.addCacheArchive( new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/smile.jar#smile.jar"), conf); DistributedCache.addCacheFile( new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/libjsmile.so#libjsmile.so"), conf); System.out.println("Create symlinks: " + DistributedCache.getSymlink(conf)); Job job = new Job(conf); job.setJarByClass(InferenceJob.class); job.setJobName("SMILE Inference test"); FileInputFormat.addInputPath(job, new Path(aaaarghs[0])); FileOutputFormat.setOutputPath(job, new Path(aaaarghs[1])); job.setMapperClass(InferenceMapper.class); job.setReducerClass(InferenceReducer.class); // but redefine them for the mapper job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ArrayWritable.class); // set both the map and reduce in/out classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // but redefine them for the mapper job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ArrayWritable.class); System.out.println("The job working directory is " + job.getWorkingDirectory()); System.exit(job.waitForCompletion(true) ? 3 : 0); } catch (IOException e) { System.err.println("Something went badly wrong in IO."); System.exit(2); } catch (InterruptedException e) { System.err.println("Job interrupted."); e.printStackTrace(); } catch (ClassNotFoundException e) { System.err.println("ClassNotFound exception."); e.printStackTrace(); } }