Example usage for org.apache.hadoop.mapreduce Job getWorkingDirectory

List of usage examples for org.apache.hadoop.mapreduce Job getWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getWorkingDirectory.

Prototype

public Path getWorkingDirectory() throws IOException 

Source Link

Document

Get the current working directory for the default file system.

Usage

From source file:smile.wide.InferenceDriver.java

License:Apache License

/** Main per-instance inference driver.
 *  Takes 2 arguments "on the command line" - input and output path. 
 * /* ww  w . j  a  v  a 2s.  c  om*/
 * Relevant configuration parameters:
 * - xdata.bayesnets.smile.library.path
 * - xdata.bayesnets.networkfile
 * - xdata.bayesnets.datasetreader.class
 * - xdata.bayesnets.datasetreader.filter
 * - xdata.bayesnets.datasetreader.variablenames
 * - xdata.bayesnets.datasetreader.instid
 * - xdata.bayesnets.queryvariable
 * 
 * TODO: update the driver to answer a set of queries, not just one
 * 
 */
@Override
public int run(String[] filteredargs) throws Exception {

    try {
        // retrieve the input and output paths
        if (filteredargs.length != 2) {
            System.err.println("Usage: InferenceDriver <input-path> <output-dir>");
            return -1;
        }
        inPath_ = filteredargs[filteredargs.length - 2];
        outPath_ = filteredargs[filteredargs.length - 1];

        // locate the native libraries
        Configuration conf = getConf();

        String configuredLibHDFSPath_ = conf_.get("xdata.bayesnets.smile.library.path");
        if (configuredLibHDFSPath_ == null || configuredLibHDFSPath_.isEmpty()) {
            s_logger.warn("SMILE library path defaulting to " + libHDFSPath_);
            s_logger.warn("Set xdata.bayesnets.smile.library.path to change. ");
        } else {
            libHDFSPath_ = configuredLibHDFSPath_;
        }

        // put the libraries in the job working dir
        DistributedCache.createSymlink(conf_);
        try {
            DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/smile.jar#smile.jar"), conf_);
            DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/libjsmile.so#libjsmile.so"), conf_);
            DistributedCache.addCacheFile(new URI(networkFileHDFSPath_ + "#" + basename(networkFileHDFSPath_)),
                    conf_);
        } catch (URISyntaxException e) {
            s_logger.fatal("Bad URL for network file.");
            return -12;
        }

        // assume the network and data is configured already - for example
        /*
        conf_.set("xdata.bayesnets.networkfile", basename(modifiedNetwork_));         
        conf_.set("xdata.bayesnets.datasetreader.class", FacebookCSVReader.class.getName());
        conf_.set("xdata.bayesnets.datasetreader.filter", "3,5,7,10,11,12" );
        conf_.set("xdata.bayesnets.datasetreader.variablenames", 
              "FirstName,MiddleName,Sex,IsAppUser,LikesCount,FriendCount" );
        conf_.set("xdata.bayesnets.datasetreader.instid", "1");
        conf_.set("xdata.bayesnets.queryvariable", "Age");         
        */

        Job job = new Job(conf);
        job.setJarByClass(InferenceDriver.class);
        job.setJobName("SMILE-WIDE Inference");

        FileInputFormat.addInputPath(job, new Path(inPath_));
        FileOutputFormat.setOutputPath(job, new Path(outPath_));

        job.setMapperClass(PerInstanceInferenceMapper.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(DoubleArrayWritable.class);

        s_logger.info("Job working directory is " + job.getWorkingDirectory());

        System.exit(job.waitForCompletion(true) ? 3 : 0);

    } catch (IOException e) {
        System.err.println("Something went badly wrong in IO.");
        System.exit(2);
    } catch (InterruptedException e) {
        System.err.println("Job interrupted.");
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        System.err.println("ClassNotFound exception.");
        e.printStackTrace();
    }

    return 0;
}

From source file:smile.wide.Network.java

License:Apache License

/**
 * Runs inference in parallel on a large dataset. The result is stored as a two dimensional array -
 * major dimension corresponding to the instance, and minor to the query variable value.
 * /*w  w  w  . j ava  2s.  co  m*/
 * Result can be retrieved by calling inferenceResult().
 * 
 * Preconditions for correct use:
 * - all variables in the evidence set are present in the BN
 * - the query variable is present in the BN
 * - the names of all variables in the evidence set are names of columns in the DataSet
 * (if the variable is not in the evidence, why condition on it?)
 * 
 * 
 * @param dataset   The dataset to run inference on
 * @param q         The query to ask of each instance
 */
public void infer(DataSet dataset, DataSetReader<?, ?> reader, BNQuery q) {
    InferenceDriver id = new InferenceDriver();
    conf_ = id.getConf(); // get the inference driver's config and set it up

    // find out the working location
    try {
        Job j = new Job(conf_);
        jobHDFSPath_ = j.getWorkingDirectory();
    } catch (IOException e1) {
        e1.printStackTrace();
        return;
    }

    int r = (new Random()).nextInt();
    tempDir_ = new Path(jobHDFSPath_ + "/tmp/infresult_" + r);

    // write the network somewhere into HDFS - relies on two subsequent jobs
    // starting in the same directory, usually user home
    // TODO: communicate the actual location to the subsequent job?
    String name = hiddenNetwork_.getName() + ".xdsl";
    try {

        hiddenNetwork_.writeFile("/tmp/" + name);
        FileSystem fs = FileSystem.get(conf_);
        fs.mkdirs(new Path(jobHDFSPath_ + "/tmp/"));
        fs.moveFromLocalFile(new Path(name), new Path(jobHDFSPath_ + "/tmp/" + name));
    } catch (IOException e) {
        logger_.error("I/O Error recording the Bayes network " + name + " to " + jobHDFSPath_ + "/tmp/" + name);
        e.printStackTrace();
    }
    conf_.set("xdata.bayesnets.networkfile", jobHDFSPath_ + "/tmp/" + name);

    // tell the driver the reader class
    conf_.set("xdata.bayesnets.datasetreader.class", reader.getClass().getName());

    // pull out the column indices that correspond to the query variables
    // DataSet should know which columns are which

    ArrayList<String> evVars = q.getEvidenceVars();
    String qvar = q.getQueryVar();
    int[] colIndices = new int[evVars.size() + 1];
    String[] colNames = new String[evVars.size() + 1];

    colIndices[0] = dataset.indexOfColumn(qvar);

    for (int i = 1; i <= evVars.size(); ++i) {
        colIndices[i] = dataset.indexOfColumn(evVars.get(i - 1));
        colNames[i] = evVars.get(i - 1);
    }
    Arrays.sort(colIndices);

    conf_.set("xdata.bayesnets.datasetreader.filter", concat(colIndices, ","));

    // name the variables to which the dataset columns map
    conf_.set("xdata.bayesnets.datasetreader.variablenames", concat(colNames, ","));

    int instID = dataset.instanceIDColumnIndex();
    if (instID == -1) {
        logger_.error("No instance ID column index in dataset " + dataset.getName()
                + ". Dataset must provide one for inference.");
        return;
    }
    conf_.set("xdata.bayesnets.datasetreader.instid", "" + instID);

    conf_.set("xdata.bayesnets.queryvariable", q.getQueryVar());

    // arguments for the inference driver are the location of the dataset and where
    // to write
    String[] args = new String[2]; // 2 arguments
    args[0] = dataset.location().toString();
    args[1] = tempDir_.toString();

    try {
        id.setConf(conf_); // make sure we are not setting up a side copy of the conf...
        ToolRunner.run(id, args);
    } catch (Exception e) {
        logger_.error("Something went wrong in executing the inference job");
        e.printStackTrace();
    }

}

From source file:smile.wide.obsolete.InferenceDriver.java

License:Apache License

@Override
public int run(String[] filteredargs) throws Exception {

    String inPath;/*from w  ww  .j a v  a  2s .  c om*/
    String outPath;

    if (filteredargs.length != 2) {
        System.err.println("Usage: InferenceDriver <infile> <outfile>");

        String yousaid = "";
        for (String s : filteredargs) {
            yousaid += s + " ";
        }
        System.err.println("You said to the driver: " + yousaid);
        System.err.println("Are those generic arguments supposed to be there?");
    }

    inPath = filteredargs[filteredargs.length - 2];
    outPath = filteredargs[filteredargs.length - 1];

    try {
        Configuration conf = getConf();

        conf.set("keep.failed.task.files", "true");
        conf.set("keep.failed.task.pattern", "*");

        /* we'll do this through "-libjars" now
        DistributedCache.createSymlink(conf);                                    
        DistributedCache.addCacheArchive(
              new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/smile.jar#smile.jar"), conf);
        DistributedCache.addCacheFile(
              new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/libjsmile.so#libjsmile.so"), conf);               
                                 
                
        System.out.println("Create symlinks: " + DistributedCache.getSymlink(conf)); 
        */

        Job job = new Job(conf);
        job.setJarByClass(InferenceJob.class);
        job.setJobName("SMILE Inference test");

        FileInputFormat.addInputPath(job, new Path(inPath));
        FileOutputFormat.setOutputPath(job, new Path(outPath));

        job.setMapperClass(InferenceMapper.class);
        job.setReducerClass(InferenceReducer.class);

        // set both the map and reduce in/out classes
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        // but redefine them for the mapper         
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(DoubleArrayWritable.class);

        s_logger.info("Job working directory is " + job.getWorkingDirectory());

        System.exit(job.waitForCompletion(true) ? 3 : 0);

    } catch (IOException e) {
        System.err.println("Something went badly wrong in IO.");
        System.exit(2);
    } catch (InterruptedException e) {
        System.err.println("Job interrupted.");
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        System.err.println("ClassNotFound exception.");
        e.printStackTrace();
    }

    return 0;
}

From source file:smile.wide.obsolete.InferenceJob.java

License:Apache License

public static void main(String[] aaaarghs) throws URISyntaxException {
    if (aaaarghs.length != 2) {
        System.err.println("Usage: InferenceJob <infile> <outfile>");
        System.exit(1);//  w w w .  ja  v a  2 s.  c  om
    }

    try {
        JobConf conf = new JobConf();
        DistributedCache.createSymlink(conf);
        DistributedCache.addCacheArchive(
                new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/smile.jar#smile.jar"), conf);
        DistributedCache.addCacheFile(
                new URI("hdfs://130.42.96.139:9000/jParInf/lib/linux64/libjsmile.so#libjsmile.so"), conf);

        System.out.println("Create symlinks: " + DistributedCache.getSymlink(conf));

        Job job = new Job(conf);
        job.setJarByClass(InferenceJob.class);
        job.setJobName("SMILE Inference test");

        FileInputFormat.addInputPath(job, new Path(aaaarghs[0]));
        FileOutputFormat.setOutputPath(job, new Path(aaaarghs[1]));

        job.setMapperClass(InferenceMapper.class);
        job.setReducerClass(InferenceReducer.class);

        // but redefine them for the mapper         
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ArrayWritable.class);

        // set both the map and reduce in/out classes
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        // but redefine them for the mapper         
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ArrayWritable.class);

        System.out.println("The job working directory is " + job.getWorkingDirectory());

        System.exit(job.waitForCompletion(true) ? 3 : 0);

    } catch (IOException e) {
        System.err.println("Something went badly wrong in IO.");
        System.exit(2);
    } catch (InterruptedException e) {
        System.err.println("Job interrupted.");
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        System.err.println("ClassNotFound exception.");
        e.printStackTrace();
    }

}