List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit
Set deleteOnExit
To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.
Click Source Link
From source file:org.qcri.algebra.MultiplicationTest.java
License:Apache License
@Before public void setup() throws Exception { NMFCommon.DEFAULT_REDUCESPLOTS = 2;// www. ja v a 2 s . co m dot(inputVectorsA, inputVectorsA2, dotVectors); composite(inputVectorsA, inputVectorsA2, inputVectorsAsquare, compositeVectors); product(inputVectorsA, inputVectorsB, productVectors); ata(inputVectorsA, ataVectors); conf = new Configuration(); conf.set("mapreduce.job.tracker", "local"); conf.set("fs.default.name", "file:///"); long currTime = System.currentTimeMillis(); Path testbed = new Path("/tmp/" + currTime); output = new Path(testbed, "output"); tmp = new Path(testbed, "tmp"); FileSystem fs; try { fs = FileSystem.get(output.toUri(), conf); fs.mkdirs(output); fs.mkdirs(tmp); fs.deleteOnExit(testbed); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in creating output direcoty " + output); return; } aTranspose = transpose(inputVectorsA); aDensePath = AlgebraCommon.toDenseMapDir(inputVectorsA, tmp, tmp, "matrixADense").getRowPath(); aSquareDensePath = AlgebraCommon.toDenseMapDir(inputVectorsAsquare, tmp, tmp, "matrixASqaureDense") .getRowPath(); a2DensePath = AlgebraCommon.toDenseMapDir(inputVectorsA2, tmp, tmp, "matrixA2Dense").getRowPath(); bDensePath = AlgebraCommon.toDenseMapDir(inputVectorsB, tmp, tmp, "matrixBDense").getRowPath(); atDensePath = AlgebraCommon.toDenseMapDir(aTranspose, tmp, tmp, "matrixAtDense").getRowPath(); aSparsePath = AlgebraCommon.toSparseMapDir(inputVectorsA, tmp, tmp, "matrixASparse").getRowPath(); aSquareSparsePath = AlgebraCommon.toSparseMapDir(inputVectorsAsquare, tmp, tmp, "matrixASqaureSparse") .getRowPath(); a2SparsePath = AlgebraCommon.toSparseMapDir(inputVectorsA2, tmp, tmp, "matrixA2Sparse").getRowPath(); bSparsePath = AlgebraCommon.toSparseMapDir(inputVectorsB, tmp, tmp, "matrixBSparse").getRowPath(); atSparsePath = AlgebraCommon.toSparseMapDir(aTranspose, tmp, tmp, "matrixAtSparse").getRowPath(); }
From source file:org.qcri.pca.CompositeJobTest.java
License:Apache License
@Before public void setup() throws Exception { conf = new Configuration(); long currTime = System.currentTimeMillis(); Path outputDir = new Path("/tmp/" + currTime); FileSystem fs; try {//from www . j a v a2 s . c om fs = FileSystem.get(outputDir.toUri(), conf); fs.mkdirs(outputDir); fs.deleteOnExit(outputDir); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in creating output direcoty " + outputDir); return; } ym = computeMean(inputVectors); double[] xm = new double[xsize]; times(ym, inMemMatrix, xm); ymPath = PCACommon.toDistributedVector(new DenseVector(ym), outputDir, "ym", conf); xmPath = PCACommon.toDistributedVector(new DenseVector(xm), outputDir, "xm", conf); DistributedRowMatrix distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(inMemMatrix), outputDir, outputDir, "inMemMatrix"); inMemMatrixPath = distMatrix.getRowPath(); for (double[] row : xtx) for (int c = 0; c < row.length; c++) row[c] = 0; for (double[] row : ytx) for (int c = 0; c < row.length; c++) row[c] = 0; computeXtXandYtX(inputVectors); }
From source file:org.qcri.pca.Norm2JobTest.java
License:Apache License
@Before public void setup() throws Exception { conf = new Configuration(); long currTime = System.currentTimeMillis(); Path meanSpanDirPath = new Path("/tmp/" + currTime + "/meanSpan"); meanSpanFilePath = new MeanAndSpanJob().getMeanSpanPath(meanSpanDirPath); FileSystem fs; try {/*from w w w. j ava 2s .c om*/ fs = FileSystem.get(meanSpanDirPath.toUri(), conf); fs.mkdirs(meanSpanDirPath); fs.deleteOnExit(meanSpanDirPath); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in creating meanSpan direcoty " + meanSpanDirPath); return; } prepareTheMeanSpanFile(fs); }
From source file:org.qcri.pca.PCATest.java
License:Apache License
@Before public void setup() { ppcaDriver = new SPCADriver() { public Path getTempPath() { return tmp; }//from w ww .j a va2s. c o m }; N = 527; D = 38; d = 8; conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); URL inputURL = this.getClass().getResource("/input.water"); input = new Path(inputURL.toString()); long currTime = System.currentTimeMillis(); output = new Path("/tmp/" + currTime + "/output"); tmp = new Path("/tmp/" + currTime + "/tmp"); FileSystem fs; try { fs = FileSystem.get(output.toUri(), conf); fs.mkdirs(output); fs.mkdirs(tmp); fs.deleteOnExit(output); fs.deleteOnExit(tmp); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in creating output direcoty " + output); return; } }
From source file:org.qcri.pca.ReconstructionErrJobTest.java
License:Apache License
@Before public void setup() throws Exception { conf = new Configuration(); long currTime = System.currentTimeMillis(); Path outputDir = new Path("/tmp/" + currTime); FileSystem fs; try {/* w w w. j av a 2 s . com*/ fs = FileSystem.get(outputDir.toUri(), conf); fs.mkdirs(outputDir); fs.deleteOnExit(outputDir); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in creating output direcoty " + outputDir); return; } ym = computeMean(inputVectors); double[] xm = new double[xsize]; times(ym, y2xVectors, xm); double[] zm = new double[cols]; timesTranspose(xm, cVectors, zm); for (int c = 0; c < cols; c++) zm[c] -= ym[c]; ymPath = PCACommon.toDistributedVector(new DenseVector(ym), outputDir, "ym", conf); zmPath = PCACommon.toDistributedVector(new DenseVector(zm), outputDir, "zm", conf); DistributedRowMatrix distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(y2xVectors), outputDir, outputDir, "y2xMatrix"); y2xMatrixPath = distMatrix.getRowPath(); distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(cVectors), outputDir, outputDir, "cMatrix"); cMatrixPath = distMatrix.getRowPath(); computeError(inputVectors); }
From source file:org.qcri.pca.VarianceJobTest.java
License:Apache License
@Before public void setup() throws Exception { conf = new Configuration(); long currTime = System.currentTimeMillis(); Path outputDir = new Path("/tmp/" + currTime); FileSystem fs; try {//ww w . j a v a 2s . co m fs = FileSystem.get(outputDir.toUri(), conf); fs.mkdirs(outputDir); fs.deleteOnExit(outputDir); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in creating output direcoty " + outputDir); return; } ym = computeMean(inputVectors); double[] xm = new double[xsize]; times(ym, y2xVectors, xm); ymPath = PCACommon.toDistributedVector(new DenseVector(ym), outputDir, "ym", conf); xmPath = PCACommon.toDistributedVector(new DenseVector(xm), outputDir, "xm", conf); DistributedRowMatrix distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(y2xVectors), outputDir, outputDir, "y2xMatrix"); y2xMatrixPath = distMatrix.getRowPath(); distMatrix = PCACommon.toDistributedRowMatrix(new DenseMatrix(cVectors), outputDir, outputDir, "cMatrix"); cMatrixPath = distMatrix.getRowPath(); }
From source file:org.terrier.utility.io.HadoopPlugin.java
License:Mozilla Public License
/** Initialises the Plugin, by connecting to the distributed file system */ public void initialise() throws Exception { config = getGlobalConfiguration();//from w w w .j ava2s . c o m final org.apache.hadoop.fs.FileSystem DFS = hadoopFS = org.apache.hadoop.fs.FileSystem.get(config); FileSystem terrierDFS = new FileSystem() { public String name() { return "hdfs"; } /** capabilities of the filesystem */ public byte capabilities() { return FSCapability.READ | FSCapability.WRITE | FSCapability.RANDOM_READ | FSCapability.STAT | FSCapability.DEL_ON_EXIT | FSCapability.LS_DIR; } public String[] schemes() { return new String[] { "dfs", "hdfs" }; } /** returns true if the path exists */ public boolean exists(String filename) throws IOException { if (logger.isDebugEnabled()) logger.debug("Checking that " + filename + " exists answer=" + DFS.exists(new Path(filename))); return DFS.exists(new Path(filename)); } /** open a file of given filename for reading */ public InputStream openFileStream(String filename) throws IOException { if (logger.isDebugEnabled()) logger.debug("Opening " + filename); return DFS.open(new Path(filename)); } /** open a file of given filename for writing */ public OutputStream writeFileStream(String filename) throws IOException { if (logger.isDebugEnabled()) logger.debug("Creating " + filename); return DFS.create(new Path(filename)); } public boolean mkdir(String filename) throws IOException { return DFS.mkdirs(new Path(filename)); } public RandomDataOutput writeFileRandom(String filename) throws IOException { throw new IOException("HDFS does not support random writing"); } public RandomDataInput openFileRandom(String filename) throws IOException { return new HadoopFSRandomAccessFile(DFS, filename); } public boolean delete(String filename) throws IOException { return DFS.delete(new Path(filename), true); } public boolean deleteOnExit(String filename) throws IOException { return DFS.deleteOnExit(new Path(filename)); } public String[] list(String path) throws IOException { final FileStatus[] contents = DFS.listStatus(new Path(path)); if (contents == null) throw new FileNotFoundException("Cannot list path " + path); final String[] names = new String[contents.length]; for (int i = 0; i < contents.length; i++) { names[i] = contents[i].getPath().getName(); } return names; } public String getParent(String path) throws IOException { return new Path(path).getParent().getName(); } public boolean rename(String source, String destination) throws IOException { return DFS.rename(new Path(source), new Path(destination)); } public boolean isDirectory(String path) throws IOException { return DFS.getFileStatus(new Path(path)).isDir(); } public long length(String path) throws IOException { return DFS.getFileStatus(new Path(path)).getLen(); } public boolean canWrite(String path) throws IOException { return DFS.getFileStatus(new Path(path)).getPermission().getUserAction().implies(FsAction.WRITE); } public boolean canRead(String path) throws IOException { return DFS.getFileStatus(new Path(path)).getPermission().getUserAction().implies(FsAction.READ); } }; Files.addFileSystemCapability(terrierDFS); }
From source file:org.terrier.utility.io.HadoopUtility.java
License:Mozilla Public License
protected static Path makeTemporaryFile(JobConf jobConf, String filename) throws IOException { final int randomKey = jobConf.getInt("terrier.tempfile.id", random.nextInt()); jobConf.setInt("terrier.tempfile.id", randomKey); FileSystem defFS = FileSystem.get(jobConf); final Path tempFile = new Path(HADOOP_TMP_PATH + "/" + (randomKey) + "-" + filename); defFS.deleteOnExit(tempFile); return tempFile; }
From source file:org.terrier.utility.io.HadoopUtility.java
License:Mozilla Public License
protected static void saveApplicationSetupToJob(JobConf jobConf, boolean getFreshProperties) throws Exception { // Do we load a fresh properties File? //TODO fix, if necessary //if (getFreshProperties) // loadApplicationSetup(new Path(ApplicationSetup.TERRIER_HOME)); FileSystem remoteFS = FileSystem.get(jobConf); URI remoteFSURI = remoteFS.getUri(); //make a copy of the current application setup properties, these may be amended //as some files are more globally accessible final Properties propertiesDuringJob = new Properties(); Properties appProperties = ApplicationSetup.getProperties(); for (Object _key : appProperties.keySet()) { String key = (String) _key; propertiesDuringJob.put(key, appProperties.get(key)); }/*from w w w. j a va2 s .c o m*/ //the share folder is needed during indexing, save this on DFS if (Files.getFileSystemName(ApplicationSetup.TERRIER_SHARE).equals("local")) { Path tempTRShare = makeTemporaryFile(jobConf, "terrier.share"); propertiesDuringJob.setProperty("terrier.share", remoteFSURI.resolve(tempTRShare.toUri()).toString()); if (Files.exists(ApplicationSetup.TERRIER_SHARE)) { jobConf.set("terrier.share.copied", remoteFSURI.resolve(tempTRShare.toUri()).toString()); logger.info("Copying terrier share/ directory (" + ApplicationSetup.TERRIER_SHARE + ") to shared storage area (" + remoteFSURI.resolve(tempTRShare.toUri()).toString() + ")"); FileUtil.copy(FileSystem.getLocal(jobConf), new Path(ApplicationSetup.TERRIER_SHARE), remoteFS, tempTRShare, false, false, jobConf); } else { logger.warn( "No terrier.share folder found at " + ApplicationSetup.TERRIER_SHARE + ", copying skipped"); } } //copy the terrier.properties content over Path tempTRProperties = makeTemporaryFile(jobConf, "terrier.properties"); logger.debug("Writing terrier properties out to DFS " + tempTRProperties.toString()); OutputStream out = remoteFS.create(tempTRProperties); remoteFS.deleteOnExit(tempTRProperties); propertiesDuringJob.store(out, "Automatically generated by HadoopUtility.saveApplicationSetupToJob()"); out.close(); out = null; DistributedCache.addCacheFile(tempTRProperties.toUri().resolve(new URI("#terrier.properties")), jobConf); DistributedCache.createSymlink(jobConf); //copy the non-JVM system properties over as well Path tempSysProperties = makeTemporaryFile(jobConf, "system.properties"); DataOutputStream dos = FileSystem.get(jobConf).create(tempSysProperties); logger.debug("Writing system properties out to DFS " + tempSysProperties.toString()); for (Object _propertyKey : System.getProperties().keySet()) { String propertyKey = (String) _propertyKey; if (!startsWithAny(propertyKey, checkSystemProperties)) { dos.writeUTF(propertyKey); dos.writeUTF(System.getProperty(propertyKey)); } } dos.writeUTF("FIN"); dos.close(); dos = null; DistributedCache.addCacheFile(tempSysProperties.toUri().resolve(new URI("#system.properties")), jobConf); }
From source file:simsql.runtime.MRLoader.java
License:Apache License
public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) { // make a directory for the relation Configuration conf = new Configuration(); FileSystem dfs = null; try {//from www . ja v a2 s . c o m dfs = FileSystem.get(conf); } catch (Exception e) { throw new RuntimeException("Cannot access HDFS!", e); } try { // if it exists, destroy it. Path path = new Path(outputPath); if (dfs.exists(path)) { dfs.delete(path, true); } } catch (Exception e) { throw new RuntimeException("Could not create the file to bulk load to!", e); } // find a file name String tempPath = null; if (inputPath.startsWith("hdfs:")) { tempPath = inputPath.replace("hdfs:", ""); } else { tempPath = "/tempDataFile_" + r.getName(); try { dfs.delete(new Path(tempPath), true); } catch (Exception e) { // ignore this. } // upload the text file try { dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath)); dfs.deleteOnExit(new Path(tempPath)); } catch (Exception e) { throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e); } } // set up the new job's parameters. conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass()); conf.set("io.serializations", "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt("simsql.loader.numAtts", r.getAttributes().size()); conf.setInt("simsql.loader.typeCode", (int) typeCode); conf.setInt("simsql.loader.sortAtt", sortAtt); String[] myStrings = new String[r.getAttributes().size()]; int j = 0; for (simsql.compiler.Attribute a : r.getAttributes()) { myStrings[j++] = a.getPhysicalRealization().getClass().getName(); } conf.setStrings("simsql.loader.types", myStrings); // create a job Job job; try { job = new Job(conf); } catch (Exception e) { throw new RuntimeException("Unable to create bulk loading job!", e); } // set the split size (number of mappers) long fSize = 0; if (inputPath.startsWith("hdfs")) { fSize = RelOp.getPathsTotalSize(new String[] { tempPath }); } else { fSize = new File(inputPath).length(); } FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks); FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks); // and the number of reducers job.setNumReduceTasks(numTasks); // the mapper/reducer/jar job.setMapperClass(MRLoaderMapper.class); job.setReducerClass(MRLoaderReducer.class); job.setJarByClass(MRLoader.class); // I/O settings. job.setOutputFormatClass(RecordOutputFormat.class); job.setMapOutputKeyClass(RecordKey.class); job.setMapOutputValueClass(RecordWrapper.class); job.setOutputKeyClass(Nothing.class); job.setOutputValueClass(Record.class); try { FileInputFormat.setInputPaths(job, new Path(tempPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (Exception e) { throw new RuntimeException("Could not set job inputs/outputs", e); } job.setGroupingComparatorClass(RecordKeyGroupingComparator.class); job.setPartitionerClass(RecordPartitioner.class); job.setSortComparatorClass(RecordKeySortComparator.class); job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath); // run it Counters counters; try { job.waitForCompletion(true); counters = job.getCounters(); } catch (Exception e) { throw new RuntimeException("Could not set up bulk loader job!", e); } // now, delete all the empty part files try { // get a filesystem FileSystem ddfs = FileSystem.get(conf); Path outPath = new Path(outputPath); if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) { FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter()); for (FileStatus ff : fstatus) { if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around... ddfs.delete(ff.getPath(), true); } } } } catch (Exception e) { // this isn't disastrous } // get the counter for the output of the mapper. Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN); return bytesCounter.getValue(); }