List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.hp.hpl.jena.sparql.algebra.MyOpVisitor.java
License:Open Source License
public void execute() { Configuration conf = new Configuration(); FileSystem fs = null; try {//from w ww . j a v a 2 s . com fs = FileSystem.get(conf); Path out = new Path("output"); if (!fs.exists(out)) { fs.delete(out, true); fs.mkdirs(out); } } catch (IOException e) { e.printStackTrace(); } Triple[] Q = new Triple[0]; Q = opBGP.getPattern().getList().toArray(Q); Set<Var> vars = PatternVars.vars(query.getQueryPattern()); JoinPlaner.setid(id); JoinPlaner.newVaRS(vars); try { JoinPlaner.form(Q); JoinPlaner.removeNonJoiningVaribles(Q); int i = 0; while (!JoinPlaner.isEmpty()) { String v = JoinPlaner.getNextJoin(); System.out.println(v); i++; } if (i == 0) { Path outFile = new Path("output/Join_" + id + "_" + 0); OutputBuffer out = new OutputBuffer(outFile, fs); //if (fs.exists(outFile)) { // fs.delete(outFile,true); //} //fs.create(outFile); QueryProcessor.executeSelect(Q[0], out, "P0"); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java
License:Open Source License
/** * // w ww . j a va 2s. co m * @param srcFileName * @param fileName * @param rlen * @param clen * @param nnz * @throws IOException */ public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path src = new Path(srcFileName); Path merge = new Path(fileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(merge)) { hdfs.delete(merge, true); } OutputStream out = hdfs.create(merge, true); // write out the header first StringBuilder sb = new StringBuilder(); sb.append("%%MatrixMarket matrix coordinate real general\n"); // output number of rows, number of columns and number of nnz sb.append(rlen + " " + clen + " " + nnz + "\n"); out.write(sb.toString().getBytes()); // if the source is a directory if (hdfs.getFileStatus(src).isDirectory()) { try { FileStatus[] contents = hdfs.listStatus(src); for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { InputStream in = hdfs.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, conf, false); } finally { IOUtilFunctions.closeSilently(in); } } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(src)) { InputStream in = null; try { in = hdfs.open(src); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(src.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. * The part files are created by CSV_WRITE MR job. * //w w w. j a v a2s. com * This method is invoked from CP-write instruction. * * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path mergedFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(mergedFilePath)) { hdfs.delete(mergedFilePath, true); } OutputStream out = hdfs.create(mergedFilePath, true); // write out the header, if needed if (csvprop.hasHeader()) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(csvprop.getDelim()); } sb.append('\n'); out.write(sb.toString().getBytes()); sb.setLength(0); } // if the source is a directory if (hdfs.isDirectory(srcFilePath)) { try { FileStatus[] contents = hdfs.listStatus(srcFilePath); Path[] partPaths = new Path[contents.length]; int numPartFiles = 0; for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { partPaths[i] = contents[i].getPath(); numPartFiles++; } } Arrays.sort(partPaths); for (int i = 0; i < numPartFiles; i++) { InputStream in = hdfs.open(partPaths[i]); try { IOUtils.copyBytes(in, out, conf, false); if (i < numPartFiles - 1) out.write('\n'); } finally { IOUtilFunctions.closeSilently(in); } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(srcFilePath)) { InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * /*from w w w .j av a2 s . c o m*/ * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ @SuppressWarnings("unchecked") public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path destFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (!_props.hasHeader()) { // simply move srcFile to destFile /* * TODO: Remove this roundabout way! * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv * & the only path that exists already on HDFS is /user/biadmin/csv/. * In this case: the directory structure /user/biadmin/csv/temp/out must be created. * Simple hdfs.rename() does not seem to create this directory structure. */ // delete the destination file, if exists already //boolean ret1 = hdfs.delete(destFilePath, true); // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created. //boolean ret2 = hdfs.createNewFile(destFilePath); // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/ //boolean ret3 = hdfs.delete(destFilePath, true); // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv //boolean ret4 = hdfs.rename(srcFilePath, destFilePath); //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4); return; } // construct the header line StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(_props.getDelim()); } sb.append('\n'); if (hdfs.isDirectory(srcFilePath)) { // compute sorted order among part files ArrayList<Path> files = new ArrayList<Path>(); for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath()); Collections.sort(files); // first part file path Path firstpart = files.get(0); // create a temp file, and add header and contents of first part Path tmp = new Path(firstpart.toString() + ".tmp"); OutputStream out = hdfs.create(tmp, true); out.write(sb.toString().getBytes()); sb.setLength(0); // copy rest of the data from firstpart InputStream in = null; try { in = hdfs.open(firstpart); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } // rename tmp to firstpart hdfs.delete(firstpart, true); hdfs.rename(tmp, firstpart); // rename srcfile to destFile hdfs.delete(destFilePath, true); hdfs.createNewFile(destFilePath); // force the creation of directory structure hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure hdfs.rename(srcFilePath, destFilePath); // move the data } else if (hdfs.isFile(srcFilePath)) { // create destination file OutputStream out = hdfs.create(destFilePath, true); // write header out.write(sb.toString().getBytes()); sb.setLength(0); // copy the data from srcFile InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java
License:Open Source License
@Override public void cleanupJob(JobContext context) throws IOException { JobConf conf = context.getJobConf(); // do the clean up of temporary directory Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { FileSystem fs = outputPath.getFileSystem(conf); context.getProgressible().progress(); if (fs.exists(outputPath)) fs.delete(outputPath, true); }// w w w .j a va2s . c o m }
From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java
License:Open Source License
@Override public void commitTask(TaskAttemptContext context) throws IOException { JobConf conf = context.getJobConf(); TaskAttemptID attemptId = context.getTaskAttemptID(); // get the mapping between index to output filename outputs = MRJobConfiguration.getOutputs(conf); //get temp task output path (compatible with hadoop1 and hadoop2) Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf); FileSystem fs = taskOutPath.getFileSystem(conf); if (!fs.exists(taskOutPath)) throw new IOException("Task output path " + taskOutPath.toString() + "does not exist."); // Move the task outputs to their final places context.getProgressible().progress(); moveFinalTaskOutputs(context, fs, taskOutPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutPath, true)) LOG.debug(/*from w w w.ja v a 2 s. c om*/ "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath); }
From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java
License:Open Source License
/** * //from w w w . ja v a 2s . c o m * @param context * @param fs * @param file * @throws IOException */ private void moveFileToDestination(TaskAttemptContext context, FileSystem fs, Path file) throws IOException { JobConf conf = context.getJobConf(); TaskAttemptID attemptId = context.getTaskAttemptID(); //get output index and final destination String taskType = (conf.getBoolean(JobContext.TASK_ISMAP, true)) ? "m" : "r"; String name = file.getName(); int charIx = name.indexOf("-" + taskType + "-"); int index = Integer.parseInt(name.substring(0, charIx)); Path finalPath = new Path(outputs[index], file.getName()); //move file from 'file' to 'finalPath' if (!fs.rename(file, finalPath)) { if (!fs.delete(finalPath, true)) throw new IOException("Failed to delete earlier output " + finalPath + " for rename of " + file + " in task " + attemptId); if (!fs.rename(file, finalPath)) throw new IOException( "Failed to save output " + finalPath + " for rename of " + file + " in task: " + attemptId); } }
From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java
License:Open Source License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param conf the job to sample/*from w w w . j ava2 s. c om*/ * @param partFile where to write the output file to * @throws IOException if something goes wrong * @throws IllegalAccessException * @throws InstantiationException */ @SuppressWarnings({ "unchecked", "unused", "deprecation" }) public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException { SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat(); Sampler sampler = new Sampler(); Class<? extends WritableComparable> targetKeyClass; targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class); //get input converter information int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0); int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0); //indicate whether the matrix value in this mapper is a matrix cell or a matrix block int partitions = conf.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 1000); InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks()); int samples = Math.min(10, splits.length); long recordsPerSample = sampleSize / samples; int sampleStep = splits.length / samples; // take N samples from different parts of the input int totalcount = 0; for (int i = 0; i < samples; ++i) { SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat .getRecordReader(splits[sampleStep * i], conf, null); int count = 0; WritableComparable key = (WritableComparable) reader.createKey(); Writable value = (Writable) reader.createValue(); while (reader.next(key, value) && count < recordsPerSample) { Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0); inputConverter.setBlockSize(brlen, bclen); inputConverter.convert(key, value); while (inputConverter.hasNext()) { Pair pair = inputConverter.next(); if (pair.getKey() instanceof DoubleWritable) { sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get())); } else if (pair.getValue() instanceof MatrixCell) { sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue())); } else throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass()); count++; } key = (WritableComparable) reader.createKey(); value = (Writable) reader.createValue(); } totalcount += count; } if (totalcount == 0) //empty input files sampler.addValue(new DoubleWritable(0)); FileSystem outFs = partFile.getFileSystem(conf); if (outFs.exists(partFile)) { outFs.delete(partFile, false); } //note: key value always double/null as expected by partitioner SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class); NullWritable nullValue = NullWritable.get(); int index0 = -1, i = 0; boolean lessthan0 = true; for (WritableComparable splitValue : sampler.createPartitions(partitions)) { writer.append(splitValue, nullValue); if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) { index0 = i; lessthan0 = false; } i++; } if (lessthan0) index0 = partitions - 1; writer.close(); return index0; }
From source file:com.ibm.bi.dml.runtime.transform.ApplyTfCSVMR.java
License:Open Source License
public static JobReturn runJob(String inputPath, String specPath, String mapsPath, String tmpPath, String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException { JobConf job = new JobConf(ApplyTfCSVMR.class); job.setJobName("ApplyTfCSV"); /* Setup MapReduce Job */ job.setJarByClass(ApplyTfCSVMR.class); // set relevant classes job.setMapperClass(ApplyTfCSVMapper.class); job.setNumReduceTasks(0);/*from ww w .ja v a 2 s . c o m*/ // Add transformation metadata file as well as partOffsetsFile to Distributed cache DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job); DistributedCache.createSymlink(job); Path cachefile = new Path(partOffsetsFile); DistributedCache.addCacheFile(cachefile.toUri(), job); DistributedCache.createSymlink(job); // set input and output properties job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInt("dfs.replication", replication); FileInputFormat.addInputPath(job, new Path(inputPath)); // delete outputPath, if exists already. Path outPath = new Path(outputPath); FileSystem fs = FileSystem.get(job); fs.delete(outPath, true); FileOutputFormat.setOutputPath(job, outPath); job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader())); job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim()); if (inputDataProperties.getNAStrings() != null) // Adding "dummy" string to handle the case of na_strings = "" job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings())); job.set(MRJobConfiguration.TF_SPEC_FILE, specPath); job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath)); job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath); job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols); job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath); job.set(MRJobConfiguration.TF_HEADER, headerLine); job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString()); job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath); //turn off adaptivemr job.setBoolean("adaptivemr.map.enable", false); // Run the job RunningJob runjob = JobClient.runJob(job); // Since transform CSV produces part files w/ prefix transform-part-*, // delete all the "default" part-..... files deletePartFiles(fs, outPath); MatrixCharacteristics mc = new MatrixCharacteristics(); return new JobReturn(new MatrixCharacteristics[] { mc }, runjob.isSuccessful()); }
From source file:com.ibm.bi.dml.runtime.transform.ApplyTfCSVMR.java
License:Open Source License
private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException { PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().startsWith("part-"); }//from w ww . j av a 2 s .c o m }; FileStatus[] list = fs.listStatus(path, filter); for (FileStatus stat : list) { fs.delete(stat.getPath(), false); } }