Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000);
    heartBeater = new HeartBeater(context, waitTimeHeartBeater);
    heartBeater.needHeartBeat();//w w w  . jav a2s.co  m
    conf = context.getConfiguration();
    this.context = context;

    outputFormat.setConf(context.getConfiguration());

    return new RecordWriter<ITuple, NullWritable>() {

        // Temporary and permanent Paths for properly writing Hadoop output files
        private Map<Integer, Path> permPool = new HashMap<Integer, Path>();
        private Map<Integer, Path> tempPool = new HashMap<Integer, Path>();

        private void initSql(int partition) throws IOException, InterruptedException {
            // HDFS final location of the generated partition file. It will be
            // loaded to the temporary folder in the HDFS than finally will be
            // committed by the OutputCommitter to the proper location.
            FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(
                    SploutSQLProxyOutputFormat.this.context);
            Path perm = new Path(committer.getWorkPath(), partition + ".db");
            FileSystem fs = perm.getFileSystem(conf);

            // Make a task unique name that contains the actual index output name to
            // make debugging simpler
            // Note: if using JVM reuse, the sequence number will not be reset for a
            // new task using the jvm
            Path temp = conf.getLocalPath("mapred.local.dir",
                    "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.'
                            + FILE_SEQUENCE.incrementAndGet());

            FileSystem localFileSystem = FileSystem.getLocal(conf);
            if (localFileSystem.exists(temp)) {
                localFileSystem.delete(temp, true);
            }
            localFileSystem.mkdirs(temp);

            Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db"));

            //
            permPool.put(partition, perm);
            tempPool.put(partition, new Path(temp, partition + ".db"));

            outputFormat.initPartition(partition, local);
        }

        @Override
        public void close(TaskAttemptContext ctx) throws IOException, InterruptedException {
            FileSystem fs = FileSystem.get(ctx.getConfiguration());
            try {
                if (ctx != null) {
                    heartBeater.setProgress(ctx);
                }
                outputFormat.close();
                for (Map.Entry<Integer, Path> entry : permPool.entrySet()) {
                    // Hadoop - completeLocalOutput()
                    fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey()));
                }
            } finally { // in any case, destroy the HeartBeater
                heartBeater.cancelHeartBeat();
            }
        }

        @Override
        public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException {
            int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
            if (tempPool.get(partition) == null) {
                initSql(partition);
            }
            outputFormat.write(tuple);
        }

    };
}

From source file:com.splout.db.hadoop.TablespaceGenerator.java

License:Apache License

/**
 * Samples the input, if needed./*from   ww  w .ja v  a  2  s.  co m*/
 */
protected PartitionMap sample(int nPartitions, Configuration conf, TupleSampler.SamplingType samplingType,
        TupleSampler.SamplingOptions samplingOptions) throws TupleSamplerException, IOException {

    FileSystem fileSystem = outputPath.getFileSystem(conf);

    // Number of records to sample
    long recordsToSample = conf.getLong("splout.sampling.records.to.sample", 100000);

    // The sampler will generate a file with samples to use to create the
    // partition map
    Path sampledInput = new Path(outputPath, OUT_SAMPLED_INPUT);
    Path sampledInputSorted = new Path(outputPath, OUT_SAMPLED_INPUT_SORTED);
    TupleSampler sampler = new TupleSampler(samplingType, samplingOptions, callingClass);
    long retrivedSamples = sampler.sample(tablespace, conf, recordsToSample, sampledInput);

    // 1.1 Sorting sampled keys on disk
    fileSystem.delete(sampledInputSorted, true);
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fileSystem, Text.class, NullWritable.class, conf);
    sorter.sort(sampledInput, sampledInputSorted);

    // Start the reader
    @SuppressWarnings("deprecation")
    final SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, sampledInputSorted, conf);

    Log.info(retrivedSamples + " total keys sampled.");

    /*
     * 2: Calculate partition map
     */
    Nextable nextable = new Nextable() {
        @Override
        public boolean next(Writable writable) throws IOException {
            return reader.next(writable);
        }
    };
    List<PartitionEntry> partitionEntries = calculatePartitions(nPartitions, retrivedSamples, nextable);

    reader.close();
    fileSystem.delete(sampledInput, true);
    fileSystem.delete(sampledInputSorted, true);

    // 2.2 Create the partition map
    return new PartitionMap(partitionEntries);
}

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

@SuppressWarnings("deprecation")
private long fullScanSampling(TablespaceSpec tablespace, final long sampleSize, Configuration hadoopConf,
        Path outputPath, final int nSplits) throws TupleSamplerException {

    MapOnlyJobBuilder builder = new MapOnlyJobBuilder(hadoopConf, "Reservoir Sampling to path " + outputPath);

    for (Table table : tablespace.getPartitionedTables()) {
        final TableSpec tableSpec = table.getTableSpec();
        final String getPartitionByJavaScript = tableSpec.getPartitionByJavaScript();
        for (TableInput inputFile : table.getFiles()) {
            final RecordProcessor processor = inputFile.getRecordProcessor();
            for (Path path : inputFile.getPaths()) {
                builder.addInput(path, inputFile.getFormat(),
                        new MapOnlyMapper<ITuple, NullWritable, Text, NullWritable>() {

                            final int nSamples = (int) (sampleSize / nSplits);
                            final String[] samples = new String[nSamples];

                            CounterInterface counterInterface;
                            long recordCounter = 0;

                            JavascriptEngine jsEngine = null;

                            @Override
                            protected void setup(Context context, MultipleOutputsCollector coll)
                                    throws IOException, InterruptedException {
                                counterInterface = new CounterInterface(context);
                                // Initialize JavaScript engine if needed
                                if (getPartitionByJavaScript != null) {
                                    try {
                                        jsEngine = new JavascriptEngine(getPartitionByJavaScript);
                                    } catch (Throwable e) {
                                        throw new RuntimeException(e);
                                    }//from   w ww .ja va2s  . c  o m
                                }
                            }

                        ;

                            // Collect Tuples with decreasing probability
                            // (http://en.wikipedia.org/wiki/Reservoir_sampling)
                            protected void map(ITuple key, NullWritable value, Context context)
                                    throws IOException, InterruptedException {
                                ITuple uTuple;
                                try {
                                    uTuple = processor.process(key, key.getSchema().getName(),
                                            counterInterface);
                                } catch (Throwable e) {
                                    throw new RuntimeException(e);
                                }
                                if (uTuple == null) { // user may have filtered the record
                                    return;
                                }

                                long reservoirIndex;
                                if (recordCounter < nSamples) {
                                    reservoirIndex = recordCounter;
                                } else {
                                    reservoirIndex = (long) (Math.random() * recordCounter);
                                }

                                if (reservoirIndex < nSamples) {
                                    String pkey = null;
                                    try {
                                        pkey = TablespaceGenerator.getPartitionByKey(uTuple, tableSpec,
                                                jsEngine);
                                    } catch (Throwable e) {
                                        throw new RuntimeException("Error when determining partition key.", e);
                                    }
                                    samples[(int) reservoirIndex] = pkey;
                                }

                                recordCounter++;
                            }

                            // Write the in-memory sampled Tuples
                            protected void cleanup(Context context, MultipleOutputsCollector coll)
                                    throws IOException, InterruptedException {
                                Text key = new Text();
                                for (String keyStr : samples) {
                                    if (keyStr != null) {
                                        key.set(keyStr);
                                        context.write(key, NullWritable.get());
                                    }
                                }
                            }
                        }, inputFile.getSpecificHadoopInputFormatContext());
            }
        }
    }
    // Set output path
    Path outReservoirPath = new Path(outputPath + "-reservoir");
    builder.setOutput(outReservoirPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
            NullWritable.class);
    builder.setJarByClass(callingClass);

    try {
        Job job = null;
        job = builder.createJob();

        if (!job.waitForCompletion(true)) {
            throw new TupleSamplerException("Reservoir Sampling failed!");
        }
    } catch (Exception e) {
        throw new TupleSamplerException("Error creating or launching the sampling job.", e);
    } finally {
        try {
            builder.cleanUpInstanceFiles();
        } catch (IOException e) {
            throw new TupleSamplerException("Error cleaning up the sampling job.", e);
        }
    }

    long retrievedSamples = 0;
    try {
        FileSystem outFs = outReservoirPath.getFileSystem(hadoopConf);
        if (outFs.listStatus(outReservoirPath) == null) {
            throw new IOException("Output folder not created: the Job failed!");
        }

        retrievedSamples = 0;
        // Instantiate the writer we will write samples to
        SequenceFile.Writer writer = new SequenceFile.Writer(outFs, hadoopConf, outputPath, Text.class,
                NullWritable.class);

        // Aggregate the output into a single file for being consistent with the other sampling methods
        for (FileStatus fileStatus : outFs.listStatus(outReservoirPath)) {
            Path thisPath = fileStatus.getPath();
            if (thisPath.getName().startsWith("part-m-")) {
                SequenceFile.Reader reader = new SequenceFile.Reader(outFs, thisPath, hadoopConf);
                Text key = new Text();
                while (reader.next(key)) {
                    writer.append(key, NullWritable.get());
                    retrievedSamples++;
                }
                reader.close();
            }
        }

        writer.close();
        outFs.delete(outReservoirPath, true);
    } catch (IOException e) {
        throw new TupleSamplerException("Error consolidating the sample job results into one file.", e);
    }

    return retrievedSamples;
}

From source file:com.splunk.shuttl.archiver.endtoend.ArchiverEndToEndTest.java

License:Apache License

private void deleteArchivingTmpPath(FileSystem hadoopFileSystem) {
    try {/*from ww  w . j a va 2 s.  c om*/
        URI configuredTmp = archiveConfiguration.getTmpDirectory();
        hadoopFileSystem.delete(new Path(configuredTmp), true);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.splunk.shuttl.archiver.endtoend.ArchiverEndToEndTest.java

License:Apache License

private void deleteArchivingRoot(FileSystem hadoopFileSystem) {
    try {/*  w ww  .j av a  2s . c  o  m*/
        URI configuredRoot = archiveConfiguration.getArchivingRoot();
        hadoopFileSystem.delete(new Path(configuredRoot), true);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.splunk.shuttl.archiver.filesystem.hadoop.HadoopArchiveFileSystemSlowTest.java

License:Apache License

@Test(groups = { "end-to-end" })
@Parameters(value = { "hadoop.host", "hadoop.port" })
public void rename_dirWithMultipleLevelsOfNonExistingFiles_renamesDirectory(String hadoopHost,
        String hadoopPort) throws IOException {
    FileSystem hadoopFileSystem = TUtilsFunctional.getHadoopFileSystem(hadoopHost, hadoopPort);
    String simpleClassName = getClass().getSimpleName();
    Path path = new Path(simpleClassName + "/1/foo/dir/").makeQualified(hadoopFileSystem);
    Path otherRoot = new Path(simpleClassName + "/2/foo/dir").makeQualified(hadoopFileSystem);

    HadoopArchiveFileSystem realFileStructure = new HadoopArchiveFileSystem(hadoopFileSystem);
    try {//from ww w  .j av a2  s .  co m
        hadoopFileSystem.mkdirs(path);
        assertTrue(hadoopFileSystem.exists(path));
        hadoopFileSystem.delete(otherRoot, true);
        assertFalse(hadoopFileSystem.exists(otherRoot));

        // Test
        realFileStructure.rename(path.toUri().getPath(), otherRoot.toUri().getPath());

        assertTrue(hadoopFileSystem.exists(otherRoot));
        assertFalse(hadoopFileSystem.exists(path));
    } finally {
        hadoopFileSystem.delete(new Path("/1"), true);
        hadoopFileSystem.delete(new Path("/2"), true);
    }
}

From source file:com.splunk.shuttl.integration.hadoop.hbase.JobRunner.java

License:Apache License

/**
 * @throws IOException// w  w w .  j  a va  2 s  .  c o m
 * 
 */
private void DeleteOutputPathIfExists(FileSystem fs, Path outputPath) throws IOException {
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTarget.java

License:Apache License

boolean validateHadoopDir(String configName, String dirPathTemplate, List<ConfigIssue> issues) {
    boolean ok;// w  w w.j  ava 2  s. co  m
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_40));
        ok = false;
    } else {
        int firstEL = dirPathTemplate.indexOf("$");
        if (firstEL > -1) {
            int lastDir = dirPathTemplate.lastIndexOf("/", firstEL);
            dirPathTemplate = dirPathTemplate.substring(0, lastDir);
        }
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            Path dir = new Path(dirPathTemplate);
            FileSystem fs = getFileSystemForInitDestroy();
            if (!fs.exists(dir)) {
                try {
                    if (fs.mkdirs(dir)) {
                        ok = true;
                    } else {
                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                                Errors.HADOOPFS_41));
                        ok = false;
                    }
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_42, ex.toString()));
                    ok = false;
                }
            } else {
                try {
                    Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                    fs.create(dummy).close();
                    fs.delete(dummy, false);
                    ok = true;
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_43, ex.toString()));
                    ok = false;
                }
            }
        } catch (Exception ex) {
            LOG.info("Validation Error: " + Errors.HADOOPFS_44.getMessage(), ex.toString(), ex);
            issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_44,
                    ex.toString()));
            ok = false;
        }
    }
    return ok;
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTargetConfigBean.java

License:Apache License

private boolean validateHadoopDir(final Stage.Context context, final String configName,
        final String configGroup, String dirPathTemplate, final List<Stage.ConfigIssue> issues) {
    final AtomicBoolean ok = new AtomicBoolean(true);
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_40));
        ok.set(false);//from   ww  w. j  a  v  a  2 s  .  com
    } else {
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            final Path dir = new Path(dirPathTemplate);
            final FileSystem fs = getFileSystemForInitDestroy();
            getUGI().doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws Exception {
                    if (!fs.exists(dir)) {
                        try {
                            if (fs.mkdirs(dir)) {
                                ok.set(true);
                            } else {
                                issues.add(
                                        context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_41));
                                ok.set(false);
                            }
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_42,
                                    ex.toString()));
                            ok.set(false);
                        }
                    } else {
                        try {
                            Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                            fs.create(dummy).close();
                            fs.delete(dummy, false);
                            ok.set(true);
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_43,
                                    ex.toString()));
                            ok.set(false);
                        }
                    }
                    return null;
                }
            });
        } catch (Exception ex) {
            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_44, ex.toString()));
            ok.set(false);
        }
    }
    return ok.get();
}