List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java
License:Apache License
@Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException { // Read configuration for the target path, first from jobconf, then from table properties String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); }//from w w w. j a v a 2 s . c om // Target path's last component is also the column family name. final Path columnFamilyPath = new Path(hfilePath); final String columnFamilyName = columnFamilyPath.getName(); final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName); final Job job = new Job(jc); setCompressOutput(job, isCompressed); setOutputPath(job, finalOutPath); // Create the HFile writer final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims() .newTaskAttemptContext(job.getConfiguration(), progressable); final Path outputdir = FileOutputFormat.getOutputPath(tac); final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter( tac); // Individual columns are going to be pivoted to HBase cells, // and for each row, they need to be written out in order // of column name, so sort the column names now, creating a // mapping to their column position. However, the first // column is interpreted as the row key. String columnList = tableProperties.getProperty("columns"); String[] columnArray = columnList.split(","); final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); int i = 0; for (String columnName : columnArray) { if (i != 0) { columnMap.put(Bytes.toBytes(columnName), i); } ++i; } return new RecordWriter() { @Override public void close(boolean abort) throws IOException { try { fileWriter.close(null); if (abort) { return; } // Move the hfiles file(s) from the task output directory to the // location specified by the user. FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER); if ((files == null) || (files.length == 0)) { throw new IOException("No family directories found in " + srcDir); } if (files.length != 1) { throw new IOException("Multiple family directories found in " + srcDir); } srcDir = files[0].getPath(); if (srcDir.getName().equals(columnFamilyName)) { break; } } for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) { fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName())); } // Hive actually wants a file as task output (not a directory), so // replace the empty directory with an empty file to keep it happy. fs.delete(outputdir, true); fs.createNewFile(outputdir); } catch (InterruptedException ex) { throw new IOException(ex); } } private void writeText(Text text) throws IOException { // Decompose the incoming text row into fields. String s = text.toString(); String[] fields = s.split("\u0001"); assert (fields.length <= (columnMap.size() + 1)); // First field is the row key. byte[] rowKeyBytes = Bytes.toBytes(fields[0]); // Remaining fields are cells addressed by column name within row. for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) { byte[] columnNameBytes = entry.getKey(); int iColumn = entry.getValue(); String val; if (iColumn >= fields.length) { // trailing blank field val = ""; } else { val = fields[iColumn]; if ("\\N".equals(val)) { // omit nulls continue; } } byte[] valBytes = Bytes.toBytes(val); KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes); try { fileWriter.write(null, kv); } catch (IOException e) { LOG.error("Failed while writing row: " + s); throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } private void writePut(PutWritable put) throws IOException { ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap(); for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) { Collections.sort(entry.getValue(), new CellComparator()); for (Cell c : entry.getValue()) { try { fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } } @Override public void write(Writable w) throws IOException { if (w instanceof Text) { writeText((Text) w); } else if (w instanceof PutWritable) { writePut((PutWritable) w); } else { throw new IOException("Unexpected writable " + w); } } }; }
From source file:com.github.gaoyangthu.demo.mapred.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf;//from ww w .j a v a 2 s . c om int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]"); ToolRunner.printGenericCommandUsage(System.out); return -1; } conf = new JobConf(getConf()); // Pick up the parameters, should the user set these width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class); for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-depth")) { depth = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-height")) { height = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-width")) { width = Integer.parseInt(args[++i].trim()); } } // Set parameters for MR tasks to pick up either which way the user sets // them or not conf.setInt("pent.width", width); conf.setInt("pent.height", height); conf.setInt("pent.depth", depth); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from w w w . j a v a 2s . com */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static void mkdir(Path path, boolean overwrite) throws IOException { Configuration config = new Configuration(); FileSystem fs = path.getFileSystem(config); if (fs.exists(path) && !overwrite) { throw new IllegalStateException("Mkdir exception:path=" + path.toString() + " exists"); }//w w w. j a v a 2 s . c o m if (fs.exists(path)) { fs.delete(path, true); } fs.mkdirs(path); fs.close(); }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static void delete(Configuration conf, Iterable<Path> paths) throws IOException { if (conf == null) { conf = new Configuration(); }//from w w w . j a v a 2 s .co m for (Path path : paths) { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { fs.delete(path, true); } } }
From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java
License:Open Source License
public File handleFileResult(Path path) { FileSystem fs = null; try {/* w w w . j av a2 s.c o m*/ Path outputPartPath = new Path(path, "part-r-00000"); // copy file from HDFS to local temporary file Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Source file is " + outputPartPath.toString()); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); fs = FileSystem.get(conf); File createTempFile = File.createTempFile("vcf", "out"); createTempFile.delete(); Path outPath = new Path(createTempFile.toURI()); FileSystem localSystem = FileSystem.get(new Configuration()); Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Destination file is " + outPath.toString()); if (!fs.exists(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input file not found"); } if (!fs.isFile(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input should be a file"); } if (localSystem.exists(outPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Output already exists"); } // doesn't quite work yet, no time to finish before poster, check results manually on hdfs FileUtil.copy(fs, outputPartPath, localSystem, outPath, true, true, conf); return new File(outPath.toUri()); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()).fatal(null, ex); } finally { if (fs != null) { try { fs.delete(path, true); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()) .warn("IOException when clearing after text output", ex); } } } return null; }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private void cleanPreviousIteration(int iter, Configuration conf, Path outputDir) throws IOException { FileSystem fs = FileSystem.get(conf); Path prevIterDir = new Path(outputDir, "v" + (iter - 1)); fs.delete(prevIterDir, true); fs.close();/*from w ww . j a v a 2s . c o m*/ }
From source file:com.google.mr4c.content.HDFSContentFactory.java
License:Open Source License
public boolean deleteContent(URI uri) throws IOException { Path path = toPath(uri);/*from ww w .j a v a2s . c o m*/ FileSystem fs = FileSystem.get(uri, m_config); return fs.delete(path, false); }
From source file:com.google.mr4c.hadoop.HadoopTestUtils.java
License:Open Source License
private static void startDFSCluster() throws IOException { // wipe out the space if it exists FileSystem localFS = FileSystem.get(new Configuration()); Path dfsRoot = new Path(DFS_DIR); if (localFS.exists(dfsRoot)) { if (localFS.exists(dfsRoot) && !localFS.delete(dfsRoot, true)) { throw new IOException(String.format("Couldn't delete DFS root [%s]", dfsRoot)); }//from w ww . ja v a 2s.c o m } // create the name and data directories File nameDir = createDFSDir("name"); File dataDir = createDFSDir("data"); File tmpDir = createDFSDir("tmp"); Configuration conf = new Configuration(); conf.set("dfs.name.dir", nameDir.getAbsolutePath()); conf.set("dfs.data.dir", dataDir.getAbsolutePath()); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); // Namespace ID is stored in the "tmp" directory. // If we don't get it inside our DFS, we'll get an error on restart when the id doesn't match. // See //www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-multi-node-cluster/#caveats if (System.getProperty("hadoop.log.dir") == null) { System.setProperty("hadoop.log.dir", "/tmp/mr4c/hadoop/log"); } MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); builder.manageNameDfsDirs(false); // we created the name node directory builder.manageDataDfsDirs(false); // we created the data node directory builder.startupOption(StartupOption.FORMAT); // might not matter, just in case // take defaults on everything else s_dfsCluster = builder.build(); }
From source file:com.hadoop.compression.lzo.LzoIndex.java
License:Open Source License
/** * Index an lzo file to allow the input format to split them into separate map * jobs./*from www . ja v a 2 s. co m*/ * * @param fs File system that contains the file. * @param lzoFile the lzo file to index. For filename.lzo, the created index file will be * filename.lzo.index. * @throws IOException */ public static void createIndex(FileSystem fs, Path lzoFile) throws IOException { Configuration conf = fs.getConf(); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(lzoFile); if (null == codec) { throw new IOException("Could not find codec for file " + lzoFile + " - you may need to add the LZO codec to your io.compression.codecs " + "configuration in core-site.xml"); } ((Configurable) codec).setConf(conf); FSDataInputStream is = null; FSDataOutputStream os = null; Path outputFile = lzoFile.suffix(LZO_INDEX_SUFFIX); Path tmpOutputFile = lzoFile.suffix(LZO_TMP_INDEX_SUFFIX); // Track whether an exception was thrown or not, so we know to either // delete the tmp index file on failure, or rename it to the new index file on success. boolean indexingSucceeded = false; try { is = fs.open(lzoFile); os = fs.create(tmpOutputFile); LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor(); // Solely for reading the header codec.createInputStream(is, decompressor); int numCompressedChecksums = decompressor.getCompressedChecksumsCount(); int numDecompressedChecksums = decompressor.getDecompressedChecksumsCount(); while (true) { // read and ignore, we just want to get to the next int int uncompressedBlockSize = is.readInt(); if (uncompressedBlockSize == 0) { break; } else if (uncompressedBlockSize < 0) { throw new EOFException(); } int compressedBlockSize = is.readInt(); if (compressedBlockSize <= 0) { throw new IOException("Could not read compressed block size"); } // See LzopInputStream.getCompressedData boolean isUncompressedBlock = (uncompressedBlockSize == compressedBlockSize); int numChecksumsToSkip = isUncompressedBlock ? numDecompressedChecksums : numDecompressedChecksums + numCompressedChecksums; long pos = is.getPos(); // write the pos of the block start os.writeLong(pos - 8); // seek to the start of the next block, skip any checksums is.seek(pos + compressedBlockSize + (4 * numChecksumsToSkip)); } // If we're here, indexing was successful. indexingSucceeded = true; } finally { // Close any open streams. if (is != null) { is.close(); } if (os != null) { os.close(); } if (!indexingSucceeded) { // If indexing didn't succeed (i.e. an exception was thrown), clean up after ourselves. fs.delete(tmpOutputFile, false); } else { // Otherwise, rename filename.lzo.index.tmp to filename.lzo.index. fs.rename(tmpOutputFile, outputFile); } } }