Example usage for org.apache.hadoop.fs FileSystem deleteOnExit

List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem deleteOnExit.

Prototype

Set deleteOnExit

To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.

Click Source Link

Document

A cache of files that should be deleted when the FileSystem is closed or the JVM is exited.

Usage

From source file:smile.wide.AttributeValueHistogram.java

License:Apache License

@Override
public int run(String[] arg) throws Exception {

    if (arg.length < 2) {
        s_logger.fatal("Usage: AttributeValueHistogram <infile> <outfile>");
        // TODO: return an error code?
    }//from ww w.  ja v  a2s .c o m

    s_logger.debug("Got " + arg.length + " arguments");

    inPath_ = arg[0];
    s_logger.info("Input path is " + inPath_);

    // parse the key-value arguments passed - by now these are the arguments
    // specific to AttributeValueHistogram
    for (int i = 1; i < arg.length; ++i) {
        String[] tokens = arg[i].split("=");
        if (tokens.length != 2) {
            s_logger.fatal("Can't parse argument" + arg[i]);
        }

        if (tokens[0].equals("xdata.bayesnets.datasetreader.class")) {
            readerClass_ = tokens[1].trim();
            s_logger.debug("Set reader class to " + readerClass_);
        } else if (tokens[0].equals("xdata.bayesnets.datasetreader.filter")) {
            readerFilter_ = tokens[1].trim();
            s_logger.debug("Set reader filter to " + readerFilter_);
        } else if (tokens[0].equals("xdata.bayesnets.datasetreader.instid")) {
            readerInstID_ = tokens[1].trim();
            s_logger.debug("Set reader's instance ID column to " + readerInstID_);
        } else if (tokens[0].equals("xdata.bayesnets.datasetreader.variablenames")) {
            variableNames_ = tokens[1].trim();
            s_logger.debug("Set reader's variable names to " + variableNames_);
        } else {
            s_logger.warn("Unknown argument " + arg[i]);
        }
    }

    conf_ = getConf();

    // pass the reader class to the mapper, in jobconf      
    // TODO: use setClass here - fails early if wrong, not in the mapper
    conf_.set("xdata.bayesnets.datasetreader.class", readerClass_);
    conf_.set("xdata.bayesnets.datasetreader.filter", readerFilter_);
    // conf_.set("xdata.bayesnets.datasetreader.instid", readerInstID_); // not used
    conf_.set("xdata.bayesnets.datasetreader.variablenames", variableNames_);

    conf_.setBoolean("mapred.compress.map.output", true); // compress intermediate data
    conf_.set("mapred.output.compression.type", CompressionType.BLOCK.toString()); // by block, to keep splittable
    conf_.setClass("mapred.map.output.compression.codec", GzipCodec.class, CompressionCodec.class);

    // for debugging               
    conf_.set("keep.failed.task.files", "true");
    conf_.set("keep.failed.task.pattern", "*");

    Job job = new Job(conf_);

    job.setJarByClass(AttributeValueHistogram.class); // use this jar
    job.setJobName("Collect value histograms by attribute");

    FileInputFormat.addInputPath(job, new Path(inPath_));

    int rnd = (new Random()).nextInt();
    lastWorkingDir_ = job.getWorkingDirectory().toUri();
    s_logger.info("Job working directory is " + lastWorkingDir_);
    String tempDirName = job.getWorkingDirectory() + "/tmp/attvalhist" + rnd + ".tmp";
    s_logger.info("Temp files in directory " + tempDirName);
    FileOutputFormat.setOutputPath(job, new Path(tempDirName));

    job.setMapperClass(AttributeValueHistogramMapper.class);
    job.setCombinerClass(AttributeValueHistogramReducer.class);
    job.setReducerClass(AttributeValueHistogramReducer.class);

    // set both the map and reduce in/out classes
    job.setOutputKeyClass(Text.class); // the name of the attribute
    job.setOutputValueClass(MapWritable.class); // Value -> count map
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // run'em
    int result = job.waitForCompletion(true) ? 0 : 16;

    // retain the temp file, collect the output      
    attributeValues_ = new TreeMap<String, Map<String, Integer>>();

    FileSystem fs = FileSystem.get(conf_);
    SequenceFile.Reader reader = null;

    Path resPath = new Path(tempDirName);
    FileStatus[] stats = fs.listStatus(resPath);

    // read all output files
    for (FileStatus stat : stats) {
        if (stat.getPath().toUri().toString().contains("part-r-"))
            try {
                s_logger.info("Reading results from " + stat.getPath());
                reader = new SequenceFile.Reader(fs, stat.getPath(), conf_);
                // Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf_);
                // MapWritable value = (MapWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf_);
                Text key = new Text();
                MapWritable value = new MapWritable();

                while (reader.next(key, value)) {
                    TreeMap<String, Integer> valueCounts = new TreeMap<String, Integer>();
                    for (Writable attValue : value.keySet()) {
                        valueCounts.put(((Text) attValue).toString(),
                                ((IntWritable) (value.get(attValue))).get());
                    }
                    attributeValues_.put(key.toString(), valueCounts);
                }
            } finally {
                IOUtils.closeStream(reader);
            }
    }

    fs.deleteOnExit(resPath);

    return result;
}

From source file:yarnkit.utils.YarnUtils.java

License:Apache License

public static Path createApplicationTempDir(@Nonnull FileSystem fs, @Nonnull String appId) throws IOException {
    Path dir = new Path(fs.getHomeDirectory(), appId);
    if (!fs.exists(dir)) {
        fs.mkdirs(dir);//from  w  w  w. j  a  v  a 2  s  .c  o m
        fs.deleteOnExit(dir);
    }
    return dir;
}