Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000);
    heartBeater = new HeartBeater(context, waitTimeHeartBeater);
    heartBeater.needHeartBeat();//w w w  . jav a2s.co  m
    conf = context.getConfiguration();
    this.context = context;

    outputFormat.setConf(context.getConfiguration());

    return new RecordWriter<ITuple, NullWritable>() {

        // Temporary and permanent Paths for properly writing Hadoop output files
        private Map<Integer, Path> permPool = new HashMap<Integer, Path>();
        private Map<Integer, Path> tempPool = new HashMap<Integer, Path>();

        private void initSql(int partition) throws IOException, InterruptedException {
            // HDFS final location of the generated partition file. It will be
            // loaded to the temporary folder in the HDFS than finally will be
            // committed by the OutputCommitter to the proper location.
            FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(
                    SploutSQLProxyOutputFormat.this.context);
            Path perm = new Path(committer.getWorkPath(), partition + ".db");
            FileSystem fs = perm.getFileSystem(conf);

            // Make a task unique name that contains the actual index output name to
            // make debugging simpler
            // Note: if using JVM reuse, the sequence number will not be reset for a
            // new task using the jvm
            Path temp = conf.getLocalPath("mapred.local.dir",
                    "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.'
                            + FILE_SEQUENCE.incrementAndGet());

            FileSystem localFileSystem = FileSystem.getLocal(conf);
            if (localFileSystem.exists(temp)) {
                localFileSystem.delete(temp, true);
            }
            localFileSystem.mkdirs(temp);

            Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db"));

            //
            permPool.put(partition, perm);
            tempPool.put(partition, new Path(temp, partition + ".db"));

            outputFormat.initPartition(partition, local);
        }

        @Override
        public void close(TaskAttemptContext ctx) throws IOException, InterruptedException {
            FileSystem fs = FileSystem.get(ctx.getConfiguration());
            try {
                if (ctx != null) {
                    heartBeater.setProgress(ctx);
                }
                outputFormat.close();
                for (Map.Entry<Integer, Path> entry : permPool.entrySet()) {
                    // Hadoop - completeLocalOutput()
                    fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey()));
                }
            } finally { // in any case, destroy the HeartBeater
                heartBeater.cancelHeartBeat();
            }
        }

        @Override
        public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException {
            int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
            if (tempPool.get(partition) == null) {
                initSql(partition);
            }
            outputFormat.write(tuple);
        }

    };
}

From source file:com.splout.db.hadoop.TablespaceGenerator.java

License:Apache License

/**
 * Samples the input, if needed./*from   ww  w .ja v  a  2  s.  co m*/
 */
protected PartitionMap sample(int nPartitions, Configuration conf, TupleSampler.SamplingType samplingType,
        TupleSampler.SamplingOptions samplingOptions) throws TupleSamplerException, IOException {

    FileSystem fileSystem = outputPath.getFileSystem(conf);

    // Number of records to sample
    long recordsToSample = conf.getLong("splout.sampling.records.to.sample", 100000);

    // The sampler will generate a file with samples to use to create the
    // partition map
    Path sampledInput = new Path(outputPath, OUT_SAMPLED_INPUT);
    Path sampledInputSorted = new Path(outputPath, OUT_SAMPLED_INPUT_SORTED);
    TupleSampler sampler = new TupleSampler(samplingType, samplingOptions, callingClass);
    long retrivedSamples = sampler.sample(tablespace, conf, recordsToSample, sampledInput);

    // 1.1 Sorting sampled keys on disk
    fileSystem.delete(sampledInputSorted, true);
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fileSystem, Text.class, NullWritable.class, conf);
    sorter.sort(sampledInput, sampledInputSorted);

    // Start the reader
    @SuppressWarnings("deprecation")
    final SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, sampledInputSorted, conf);

    Log.info(retrivedSamples + " total keys sampled.");

    /*
     * 2: Calculate partition map
     */
    Nextable nextable = new Nextable() {
        @Override
        public boolean next(Writable writable) throws IOException {
            return reader.next(writable);
        }
    };
    List<PartitionEntry> partitionEntries = calculatePartitions(nPartitions, retrivedSamples, nextable);

    reader.close();
    fileSystem.delete(sampledInput, true);
    fileSystem.delete(sampledInputSorted, true);

    // 2.2 Create the partition map
    return new PartitionMap(partitionEntries);
}

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

@SuppressWarnings("deprecation")
private long fullScanSampling(TablespaceSpec tablespace, final long sampleSize, Configuration hadoopConf,
        Path outputPath, final int nSplits) throws TupleSamplerException {

    MapOnlyJobBuilder builder = new MapOnlyJobBuilder(hadoopConf, "Reservoir Sampling to path " + outputPath);

    for (Table table : tablespace.getPartitionedTables()) {
        final TableSpec tableSpec = table.getTableSpec();
        final String getPartitionByJavaScript = tableSpec.getPartitionByJavaScript();
        for (TableInput inputFile : table.getFiles()) {
            final RecordProcessor processor = inputFile.getRecordProcessor();
            for (Path path : inputFile.getPaths()) {
                builder.addInput(path, inputFile.getFormat(),
                        new MapOnlyMapper<ITuple, NullWritable, Text, NullWritable>() {

                            final int nSamples = (int) (sampleSize / nSplits);
                            final String[] samples = new String[nSamples];

                            CounterInterface counterInterface;
                            long recordCounter = 0;

                            JavascriptEngine jsEngine = null;

                            @Override
                            protected void setup(Context context, MultipleOutputsCollector coll)
                                    throws IOException, InterruptedException {
                                counterInterface = new CounterInterface(context);
                                // Initialize JavaScript engine if needed
                                if (getPartitionByJavaScript != null) {
                                    try {
                                        jsEngine = new JavascriptEngine(getPartitionByJavaScript);
                                    } catch (Throwable e) {
                                        throw new RuntimeException(e);
                                    }//from   w ww .ja va2s  . c  o m
                                }
                            }

                        ;

                            // Collect Tuples with decreasing probability
                            // (http://en.wikipedia.org/wiki/Reservoir_sampling)
                            protected void map(ITuple key, NullWritable value, Context context)
                                    throws IOException, InterruptedException {
                                ITuple uTuple;
                                try {
                                    uTuple = processor.process(key, key.getSchema().getName(),
                                            counterInterface);
                                } catch (Throwable e) {
                                    throw new RuntimeException(e);
                                }
                                if (uTuple == null) { // user may have filtered the record
                                    return;
                                }

                                long reservoirIndex;
                                if (recordCounter < nSamples) {
                                    reservoirIndex = recordCounter;
                                } else {
                                    reservoirIndex = (long) (Math.random() * recordCounter);
                                }

                                if (reservoirIndex < nSamples) {
                                    String pkey = null;
                                    try {
                                        pkey = TablespaceGenerator.getPartitionByKey(uTuple, tableSpec,
                                                jsEngine);
                                    } catch (Throwable e) {
                                        throw new RuntimeException("Error when determining partition key.", e);
                                    }
                                    samples[(int) reservoirIndex] = pkey;
                                }

                                recordCounter++;
                            }

                            // Write the in-memory sampled Tuples
                            protected void cleanup(Context context, MultipleOutputsCollector coll)
                                    throws IOException, InterruptedException {
                                Text key = new Text();
                                for (String keyStr : samples) {
                                    if (keyStr != null) {
                                        key.set(keyStr);
                                        context.write(key, NullWritable.get());
                                    }
                                }
                            }
                        }, inputFile.getSpecificHadoopInputFormatContext());
            }
        }
    }
    // Set output path
    Path outReservoirPath = new Path(outputPath + "-reservoir");
    builder.setOutput(outReservoirPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
            NullWritable.class);
    builder.setJarByClass(callingClass);

    try {
        Job job = null;
        job = builder.createJob();

        if (!job.waitForCompletion(true)) {
            throw new TupleSamplerException("Reservoir Sampling failed!");
        }
    } catch (Exception e) {
        throw new TupleSamplerException("Error creating or launching the sampling job.", e);
    } finally {
        try {
            builder.cleanUpInstanceFiles();
        } catch (IOException e) {
            throw new TupleSamplerException("Error cleaning up the sampling job.", e);
        }
    }

    long retrievedSamples = 0;
    try {
        FileSystem outFs = outReservoirPath.getFileSystem(hadoopConf);
        if (outFs.listStatus(outReservoirPath) == null) {
            throw new IOException("Output folder not created: the Job failed!");
        }

        retrievedSamples = 0;
        // Instantiate the writer we will write samples to
        SequenceFile.Writer writer = new SequenceFile.Writer(outFs, hadoopConf, outputPath, Text.class,
                NullWritable.class);

        // Aggregate the output into a single file for being consistent with the other sampling methods
        for (FileStatus fileStatus : outFs.listStatus(outReservoirPath)) {
            Path thisPath = fileStatus.getPath();
            if (thisPath.getName().startsWith("part-m-")) {
                SequenceFile.Reader reader = new SequenceFile.Reader(outFs, thisPath, hadoopConf);
                Text key = new Text();
                while (reader.next(key)) {
                    writer.append(key, NullWritable.get());
                    retrievedSamples++;
                }
                reader.close();
            }
        }

        writer.close();
        outFs.delete(outReservoirPath, true);
    } catch (IOException e) {
        throw new TupleSamplerException("Error consolidating the sample job results into one file.", e);
    }

    return retrievedSamples;
}

From source file:com.splunk.shuttl.archiver.endtoend.ArchiverEndToEndTest.java

License:Apache License

private void deleteArchivingTmpPath(FileSystem hadoopFileSystem) {
    try {/*from ww  w . j a va 2 s.  c om*/
        URI configuredTmp = archiveConfiguration.getTmpDirectory();
        hadoopFileSystem.delete(new Path(configuredTmp), true);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.splunk.shuttl.archiver.endtoend.ArchiverEndToEndTest.java

License:Apache License

private void deleteArchivingRoot(FileSystem hadoopFileSystem) {
    try {/*  w ww  .j av a  2s . c  o  m*/
        URI configuredRoot = archiveConfiguration.getArchivingRoot();
        hadoopFileSystem.delete(new Path(configuredRoot), true);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.splunk.shuttl.archiver.filesystem.hadoop.HadoopArchiveFileSystemSlowTest.java

License:Apache License

@Test(groups = { "end-to-end" })
@Parameters(value = { "hadoop.host", "hadoop.port" })
public void rename_dirWithMultipleLevelsOfNonExistingFiles_renamesDirectory(String hadoopHost,
        String hadoopPort) throws IOException {
    FileSystem hadoopFileSystem = TUtilsFunctional.getHadoopFileSystem(hadoopHost, hadoopPort);
    String simpleClassName = getClass().getSimpleName();
    Path path = new Path(simpleClassName + "/1/foo/dir/").makeQualified(hadoopFileSystem);
    Path otherRoot = new Path(simpleClassName + "/2/foo/dir").makeQualified(hadoopFileSystem);

    HadoopArchiveFileSystem realFileStructure = new HadoopArchiveFileSystem(hadoopFileSystem);
    try {//from ww w  .j av a2  s .  co m
        hadoopFileSystem.mkdirs(path);
        assertTrue(hadoopFileSystem.exists(path));
        hadoopFileSystem.delete(otherRoot, true);
        assertFalse(hadoopFileSystem.exists(otherRoot));

        // Test
        realFileStructure.rename(path.toUri().getPath(), otherRoot.toUri().getPath());

        assertTrue(hadoopFileSystem.exists(otherRoot));
        assertFalse(hadoopFileSystem.exists(path));
    } finally {
        hadoopFileSystem.delete(new Path("/1"), true);
        hadoopFileSystem.delete(new Path("/2"), true);
    }
}

From source file:com.splunk.shuttl.integration.hadoop.hbase.JobRunner.java

License:Apache License

/**
 * @throws IOException// w  w w .  j  a va  2 s  .  c o m
 * 
 */
private void DeleteOutputPathIfExists(FileSystem fs, Path outputPath) throws IOException {
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTarget.java

License:Apache License

boolean validateHadoopDir(String configName, String dirPathTemplate, List<ConfigIssue> issues) {
    boolean ok;// w  w w.j  ava 2  s. co  m
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_40));
        ok = false;
    } else {
        int firstEL = dirPathTemplate.indexOf("$");
        if (firstEL > -1) {
            int lastDir = dirPathTemplate.lastIndexOf("/", firstEL);
            dirPathTemplate = dirPathTemplate.substring(0, lastDir);
        }
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            Path dir = new Path(dirPathTemplate);
            FileSystem fs = getFileSystemForInitDestroy();
            if (!fs.exists(dir)) {
                try {
                    if (fs.mkdirs(dir)) {
                        ok = true;
                    } else {
                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                                Errors.HADOOPFS_41));
                        ok = false;
                    }
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_42, ex.toString()));
                    ok = false;
                }
            } else {
                try {
                    Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                    fs.create(dummy).close();
                    fs.delete(dummy, false);
                    ok = true;
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_43, ex.toString()));
                    ok = false;
                }
            }
        } catch (Exception ex) {
            LOG.info("Validation Error: " + Errors.HADOOPFS_44.getMessage(), ex.toString(), ex);
            issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_44,
                    ex.toString()));
            ok = false;
        }
    }
    return ok;
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTargetConfigBean.java

License:Apache License

private boolean validateHadoopDir(final Stage.Context context, final String configName,
        final String configGroup, String dirPathTemplate, final List<Stage.ConfigIssue> issues) {
    final AtomicBoolean ok = new AtomicBoolean(true);
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_40));
        ok.set(false);//from   ww  w. j  a  v  a  2 s  .  com
    } else {
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            final Path dir = new Path(dirPathTemplate);
            final FileSystem fs = getFileSystemForInitDestroy();
            getUGI().doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws Exception {
                    if (!fs.exists(dir)) {
                        try {
                            if (fs.mkdirs(dir)) {
                                ok.set(true);
                            } else {
                                issues.add(
                                        context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_41));
                                ok.set(false);
                            }
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_42,
                                    ex.toString()));
                            ok.set(false);
                        }
                    } else {
                        try {
                            Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                            fs.create(dummy).close();
                            fs.delete(dummy, false);
                            ok.set(true);
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_43,
                                    ex.toString()));
                            ok.set(false);
                        }
                    }
                    return null;
                }
            });
        } catch (Exception ex) {
            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_44, ex.toString()));
            ok.set(false);
        }
    }
    return ok.get();
}