List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit
Set deleteOnExit
To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.
Click Source Link
From source file:smile.wide.AttributeValueHistogram.java
License:Apache License
@Override public int run(String[] arg) throws Exception { if (arg.length < 2) { s_logger.fatal("Usage: AttributeValueHistogram <infile> <outfile>"); // TODO: return an error code? }//from ww w. ja v a2s .c o m s_logger.debug("Got " + arg.length + " arguments"); inPath_ = arg[0]; s_logger.info("Input path is " + inPath_); // parse the key-value arguments passed - by now these are the arguments // specific to AttributeValueHistogram for (int i = 1; i < arg.length; ++i) { String[] tokens = arg[i].split("="); if (tokens.length != 2) { s_logger.fatal("Can't parse argument" + arg[i]); } if (tokens[0].equals("xdata.bayesnets.datasetreader.class")) { readerClass_ = tokens[1].trim(); s_logger.debug("Set reader class to " + readerClass_); } else if (tokens[0].equals("xdata.bayesnets.datasetreader.filter")) { readerFilter_ = tokens[1].trim(); s_logger.debug("Set reader filter to " + readerFilter_); } else if (tokens[0].equals("xdata.bayesnets.datasetreader.instid")) { readerInstID_ = tokens[1].trim(); s_logger.debug("Set reader's instance ID column to " + readerInstID_); } else if (tokens[0].equals("xdata.bayesnets.datasetreader.variablenames")) { variableNames_ = tokens[1].trim(); s_logger.debug("Set reader's variable names to " + variableNames_); } else { s_logger.warn("Unknown argument " + arg[i]); } } conf_ = getConf(); // pass the reader class to the mapper, in jobconf // TODO: use setClass here - fails early if wrong, not in the mapper conf_.set("xdata.bayesnets.datasetreader.class", readerClass_); conf_.set("xdata.bayesnets.datasetreader.filter", readerFilter_); // conf_.set("xdata.bayesnets.datasetreader.instid", readerInstID_); // not used conf_.set("xdata.bayesnets.datasetreader.variablenames", variableNames_); conf_.setBoolean("mapred.compress.map.output", true); // compress intermediate data conf_.set("mapred.output.compression.type", CompressionType.BLOCK.toString()); // by block, to keep splittable conf_.setClass("mapred.map.output.compression.codec", GzipCodec.class, CompressionCodec.class); // for debugging conf_.set("keep.failed.task.files", "true"); conf_.set("keep.failed.task.pattern", "*"); Job job = new Job(conf_); job.setJarByClass(AttributeValueHistogram.class); // use this jar job.setJobName("Collect value histograms by attribute"); FileInputFormat.addInputPath(job, new Path(inPath_)); int rnd = (new Random()).nextInt(); lastWorkingDir_ = job.getWorkingDirectory().toUri(); s_logger.info("Job working directory is " + lastWorkingDir_); String tempDirName = job.getWorkingDirectory() + "/tmp/attvalhist" + rnd + ".tmp"; s_logger.info("Temp files in directory " + tempDirName); FileOutputFormat.setOutputPath(job, new Path(tempDirName)); job.setMapperClass(AttributeValueHistogramMapper.class); job.setCombinerClass(AttributeValueHistogramReducer.class); job.setReducerClass(AttributeValueHistogramReducer.class); // set both the map and reduce in/out classes job.setOutputKeyClass(Text.class); // the name of the attribute job.setOutputValueClass(MapWritable.class); // Value -> count map job.setOutputFormatClass(SequenceFileOutputFormat.class); // run'em int result = job.waitForCompletion(true) ? 0 : 16; // retain the temp file, collect the output attributeValues_ = new TreeMap<String, Map<String, Integer>>(); FileSystem fs = FileSystem.get(conf_); SequenceFile.Reader reader = null; Path resPath = new Path(tempDirName); FileStatus[] stats = fs.listStatus(resPath); // read all output files for (FileStatus stat : stats) { if (stat.getPath().toUri().toString().contains("part-r-")) try { s_logger.info("Reading results from " + stat.getPath()); reader = new SequenceFile.Reader(fs, stat.getPath(), conf_); // Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf_); // MapWritable value = (MapWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf_); Text key = new Text(); MapWritable value = new MapWritable(); while (reader.next(key, value)) { TreeMap<String, Integer> valueCounts = new TreeMap<String, Integer>(); for (Writable attValue : value.keySet()) { valueCounts.put(((Text) attValue).toString(), ((IntWritable) (value.get(attValue))).get()); } attributeValues_.put(key.toString(), valueCounts); } } finally { IOUtils.closeStream(reader); } } fs.deleteOnExit(resPath); return result; }
From source file:yarnkit.utils.YarnUtils.java
License:Apache License
public static Path createApplicationTempDir(@Nonnull FileSystem fs, @Nonnull String appId) throws IOException { Path dir = new Path(fs.getHomeDirectory(), appId); if (!fs.exists(dir)) { fs.mkdirs(dir);//from w w w. j a v a 2 s .c o m fs.deleteOnExit(dir); } return dir; }