List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java
License:Apache License
@Override public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000); heartBeater = new HeartBeater(context, waitTimeHeartBeater); heartBeater.needHeartBeat();//w w w . jav a2s.co m conf = context.getConfiguration(); this.context = context; outputFormat.setConf(context.getConfiguration()); return new RecordWriter<ITuple, NullWritable>() { // Temporary and permanent Paths for properly writing Hadoop output files private Map<Integer, Path> permPool = new HashMap<Integer, Path>(); private Map<Integer, Path> tempPool = new HashMap<Integer, Path>(); private void initSql(int partition) throws IOException, InterruptedException { // HDFS final location of the generated partition file. It will be // loaded to the temporary folder in the HDFS than finally will be // committed by the OutputCommitter to the proper location. FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter( SploutSQLProxyOutputFormat.this.context); Path perm = new Path(committer.getWorkPath(), partition + ".db"); FileSystem fs = perm.getFileSystem(conf); // Make a task unique name that contains the actual index output name to // make debugging simpler // Note: if using JVM reuse, the sequence number will not be reset for a // new task using the jvm Path temp = conf.getLocalPath("mapred.local.dir", "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.' + FILE_SEQUENCE.incrementAndGet()); FileSystem localFileSystem = FileSystem.getLocal(conf); if (localFileSystem.exists(temp)) { localFileSystem.delete(temp, true); } localFileSystem.mkdirs(temp); Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db")); // permPool.put(partition, perm); tempPool.put(partition, new Path(temp, partition + ".db")); outputFormat.initPartition(partition, local); } @Override public void close(TaskAttemptContext ctx) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(ctx.getConfiguration()); try { if (ctx != null) { heartBeater.setProgress(ctx); } outputFormat.close(); for (Map.Entry<Integer, Path> entry : permPool.entrySet()) { // Hadoop - completeLocalOutput() fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey())); } } finally { // in any case, destroy the HeartBeater heartBeater.cancelHeartBeat(); } } @Override public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException { int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD); if (tempPool.get(partition) == null) { initSql(partition); } outputFormat.write(tuple); } }; }
From source file:com.splout.db.hadoop.TablespaceGenerator.java
License:Apache License
/** * Samples the input, if needed./*from ww w .ja v a 2 s. co m*/ */ protected PartitionMap sample(int nPartitions, Configuration conf, TupleSampler.SamplingType samplingType, TupleSampler.SamplingOptions samplingOptions) throws TupleSamplerException, IOException { FileSystem fileSystem = outputPath.getFileSystem(conf); // Number of records to sample long recordsToSample = conf.getLong("splout.sampling.records.to.sample", 100000); // The sampler will generate a file with samples to use to create the // partition map Path sampledInput = new Path(outputPath, OUT_SAMPLED_INPUT); Path sampledInputSorted = new Path(outputPath, OUT_SAMPLED_INPUT_SORTED); TupleSampler sampler = new TupleSampler(samplingType, samplingOptions, callingClass); long retrivedSamples = sampler.sample(tablespace, conf, recordsToSample, sampledInput); // 1.1 Sorting sampled keys on disk fileSystem.delete(sampledInputSorted, true); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fileSystem, Text.class, NullWritable.class, conf); sorter.sort(sampledInput, sampledInputSorted); // Start the reader @SuppressWarnings("deprecation") final SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, sampledInputSorted, conf); Log.info(retrivedSamples + " total keys sampled."); /* * 2: Calculate partition map */ Nextable nextable = new Nextable() { @Override public boolean next(Writable writable) throws IOException { return reader.next(writable); } }; List<PartitionEntry> partitionEntries = calculatePartitions(nPartitions, retrivedSamples, nextable); reader.close(); fileSystem.delete(sampledInput, true); fileSystem.delete(sampledInputSorted, true); // 2.2 Create the partition map return new PartitionMap(partitionEntries); }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
@SuppressWarnings("deprecation") private long fullScanSampling(TablespaceSpec tablespace, final long sampleSize, Configuration hadoopConf, Path outputPath, final int nSplits) throws TupleSamplerException { MapOnlyJobBuilder builder = new MapOnlyJobBuilder(hadoopConf, "Reservoir Sampling to path " + outputPath); for (Table table : tablespace.getPartitionedTables()) { final TableSpec tableSpec = table.getTableSpec(); final String getPartitionByJavaScript = tableSpec.getPartitionByJavaScript(); for (TableInput inputFile : table.getFiles()) { final RecordProcessor processor = inputFile.getRecordProcessor(); for (Path path : inputFile.getPaths()) { builder.addInput(path, inputFile.getFormat(), new MapOnlyMapper<ITuple, NullWritable, Text, NullWritable>() { final int nSamples = (int) (sampleSize / nSplits); final String[] samples = new String[nSamples]; CounterInterface counterInterface; long recordCounter = 0; JavascriptEngine jsEngine = null; @Override protected void setup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { counterInterface = new CounterInterface(context); // Initialize JavaScript engine if needed if (getPartitionByJavaScript != null) { try { jsEngine = new JavascriptEngine(getPartitionByJavaScript); } catch (Throwable e) { throw new RuntimeException(e); }//from w ww .ja va2s . c o m } } ; // Collect Tuples with decreasing probability // (http://en.wikipedia.org/wiki/Reservoir_sampling) protected void map(ITuple key, NullWritable value, Context context) throws IOException, InterruptedException { ITuple uTuple; try { uTuple = processor.process(key, key.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple == null) { // user may have filtered the record return; } long reservoirIndex; if (recordCounter < nSamples) { reservoirIndex = recordCounter; } else { reservoirIndex = (long) (Math.random() * recordCounter); } if (reservoirIndex < nSamples) { String pkey = null; try { pkey = TablespaceGenerator.getPartitionByKey(uTuple, tableSpec, jsEngine); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } samples[(int) reservoirIndex] = pkey; } recordCounter++; } // Write the in-memory sampled Tuples protected void cleanup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { Text key = new Text(); for (String keyStr : samples) { if (keyStr != null) { key.set(keyStr); context.write(key, NullWritable.get()); } } } }, inputFile.getSpecificHadoopInputFormatContext()); } } } // Set output path Path outReservoirPath = new Path(outputPath + "-reservoir"); builder.setOutput(outReservoirPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class, NullWritable.class); builder.setJarByClass(callingClass); try { Job job = null; job = builder.createJob(); if (!job.waitForCompletion(true)) { throw new TupleSamplerException("Reservoir Sampling failed!"); } } catch (Exception e) { throw new TupleSamplerException("Error creating or launching the sampling job.", e); } finally { try { builder.cleanUpInstanceFiles(); } catch (IOException e) { throw new TupleSamplerException("Error cleaning up the sampling job.", e); } } long retrievedSamples = 0; try { FileSystem outFs = outReservoirPath.getFileSystem(hadoopConf); if (outFs.listStatus(outReservoirPath) == null) { throw new IOException("Output folder not created: the Job failed!"); } retrievedSamples = 0; // Instantiate the writer we will write samples to SequenceFile.Writer writer = new SequenceFile.Writer(outFs, hadoopConf, outputPath, Text.class, NullWritable.class); // Aggregate the output into a single file for being consistent with the other sampling methods for (FileStatus fileStatus : outFs.listStatus(outReservoirPath)) { Path thisPath = fileStatus.getPath(); if (thisPath.getName().startsWith("part-m-")) { SequenceFile.Reader reader = new SequenceFile.Reader(outFs, thisPath, hadoopConf); Text key = new Text(); while (reader.next(key)) { writer.append(key, NullWritable.get()); retrievedSamples++; } reader.close(); } } writer.close(); outFs.delete(outReservoirPath, true); } catch (IOException e) { throw new TupleSamplerException("Error consolidating the sample job results into one file.", e); } return retrievedSamples; }
From source file:com.splunk.shuttl.archiver.endtoend.ArchiverEndToEndTest.java
License:Apache License
private void deleteArchivingTmpPath(FileSystem hadoopFileSystem) { try {/*from ww w . j a va 2 s. c om*/ URI configuredTmp = archiveConfiguration.getTmpDirectory(); hadoopFileSystem.delete(new Path(configuredTmp), true); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.splunk.shuttl.archiver.endtoend.ArchiverEndToEndTest.java
License:Apache License
private void deleteArchivingRoot(FileSystem hadoopFileSystem) { try {/* w ww .j av a 2s . c o m*/ URI configuredRoot = archiveConfiguration.getArchivingRoot(); hadoopFileSystem.delete(new Path(configuredRoot), true); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.splunk.shuttl.archiver.filesystem.hadoop.HadoopArchiveFileSystemSlowTest.java
License:Apache License
@Test(groups = { "end-to-end" }) @Parameters(value = { "hadoop.host", "hadoop.port" }) public void rename_dirWithMultipleLevelsOfNonExistingFiles_renamesDirectory(String hadoopHost, String hadoopPort) throws IOException { FileSystem hadoopFileSystem = TUtilsFunctional.getHadoopFileSystem(hadoopHost, hadoopPort); String simpleClassName = getClass().getSimpleName(); Path path = new Path(simpleClassName + "/1/foo/dir/").makeQualified(hadoopFileSystem); Path otherRoot = new Path(simpleClassName + "/2/foo/dir").makeQualified(hadoopFileSystem); HadoopArchiveFileSystem realFileStructure = new HadoopArchiveFileSystem(hadoopFileSystem); try {//from ww w .j av a2 s . co m hadoopFileSystem.mkdirs(path); assertTrue(hadoopFileSystem.exists(path)); hadoopFileSystem.delete(otherRoot, true); assertFalse(hadoopFileSystem.exists(otherRoot)); // Test realFileStructure.rename(path.toUri().getPath(), otherRoot.toUri().getPath()); assertTrue(hadoopFileSystem.exists(otherRoot)); assertFalse(hadoopFileSystem.exists(path)); } finally { hadoopFileSystem.delete(new Path("/1"), true); hadoopFileSystem.delete(new Path("/2"), true); } }
From source file:com.splunk.shuttl.integration.hadoop.hbase.JobRunner.java
License:Apache License
/** * @throws IOException// w w w . j a va 2 s . c o m * */ private void DeleteOutputPathIfExists(FileSystem fs, Path outputPath) throws IOException { if (fs.exists(outputPath)) fs.delete(outputPath, true); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTarget.java
License:Apache License
boolean validateHadoopDir(String configName, String dirPathTemplate, List<ConfigIssue> issues) { boolean ok;// w w w.j ava 2 s. co m if (!dirPathTemplate.startsWith("/")) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_40)); ok = false; } else { int firstEL = dirPathTemplate.indexOf("$"); if (firstEL > -1) { int lastDir = dirPathTemplate.lastIndexOf("/", firstEL); dirPathTemplate = dirPathTemplate.substring(0, lastDir); } dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate; try { Path dir = new Path(dirPathTemplate); FileSystem fs = getFileSystemForInitDestroy(); if (!fs.exists(dir)) { try { if (fs.mkdirs(dir)) { ok = true; } else { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_41)); ok = false; } } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_42, ex.toString())); ok = false; } } else { try { Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString()); fs.create(dummy).close(); fs.delete(dummy, false); ok = true; } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_43, ex.toString())); ok = false; } } } catch (Exception ex) { LOG.info("Validation Error: " + Errors.HADOOPFS_44.getMessage(), ex.toString(), ex); issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_44, ex.toString())); ok = false; } } return ok; }
From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTargetConfigBean.java
License:Apache License
private boolean validateHadoopDir(final Stage.Context context, final String configName, final String configGroup, String dirPathTemplate, final List<Stage.ConfigIssue> issues) { final AtomicBoolean ok = new AtomicBoolean(true); if (!dirPathTemplate.startsWith("/")) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_40)); ok.set(false);//from ww w. j a v a 2 s . com } else { dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate; try { final Path dir = new Path(dirPathTemplate); final FileSystem fs = getFileSystemForInitDestroy(); getUGI().doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { if (!fs.exists(dir)) { try { if (fs.mkdirs(dir)) { ok.set(true); } else { issues.add( context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_41)); ok.set(false); } } catch (IOException ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_42, ex.toString())); ok.set(false); } } else { try { Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString()); fs.create(dummy).close(); fs.delete(dummy, false); ok.set(true); } catch (IOException ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_43, ex.toString())); ok.set(false); } } return null; } }); } catch (Exception ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_44, ex.toString())); ok.set(false); } } return ok.get(); }