Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.ibm.stocator.fs.swift2d.systemtests.StreamingSwiftTest.java

License:Open Source License

@Test
public void accessObjectWithSpaceTest() throws Exception {
    FileSystem fs = new ObjectStoreFileSystem();
    Configuration conf = new Configuration();
    String uriString = conf.get("fs.swift2d.test.uri");
    Assume.assumeNotNull(uriString);/*from  ww w .j  a  v  a2s  . com*/
    // adding suffix with space to the container name
    String scheme = "swift2d";
    String objectName = "/a/testObject.txt";
    URI publicContainerURI = new URI(uriString + objectName);
    // initialize file system
    fs.initialize(publicContainerURI, conf);
    FileStatus objectFS = null;
    Path f = null;
    try {
        FSDataOutputStream fsDataOutputStream = null;
        String currObjName = null;
        for (int i = 0; i < 5; i++) {
            currObjName = objectName + String.valueOf(i);
            // create timer
            createObjectTimer(90000.0, currObjName);
            publicContainerURI = new URI(scheme + "://" + getHost(URI.create(uriString)) + "/" + currObjName);
            f = new Path(publicContainerURI.toString());
            fsDataOutputStream = fs.create(f);
            String line = null;
            while (!objectExpired) {
                // generates input
                byte[] bytes = new byte[0];
                line = "\"2017-7-15 3:6:43\"," + String.valueOf(Math.random()) + ",6,18" + "\n";
                ByteBuffer linesBB = ByteBuffer.wrap(line.getBytes());
                bytes = new byte[linesBB.limit()];
                linesBB.get(bytes);

                // writes to output
                fsDataOutputStream.write(bytes);

                // simulate delays in input
                Thread.sleep(50);
            }
            fsDataOutputStream.close();
            objectExpired = false;
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.assertNotNull("Unable to access public object.", objectFS);
    } finally {
        fs.delete(f, true);
    }
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

public static void assertDeleted(FileSystem fs, Path file, boolean recursive) throws IOException {
    assertPathExists(fs, "about to be deleted file", file);
    fs.delete(file, recursive);
    assertPathDoesNotExist(fs, "Deleted file", file);
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

/**
 * Touch a file: fails if it is already there
 * @param fs filesystem//from   ww w  .  ja v  a  2  s .  c om
 * @param path path
 * @throws IOException IO problems
 */
public static void touch(FileSystem fs, Path path) throws IOException {
    fs.delete(path, true);
    writeTextFile(fs, path, null, false);
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

public static void cleanup(String action, FileSystem fileSystem, String cleanupPath) {
    noteAction(action);//w ww  .j a v  a2 s. c  o  m
    try {
        if (fileSystem != null) {
            fileSystem.delete(new Path(cleanupPath).makeQualified(fileSystem), true);
        }
    } catch (Exception e) {
        LOG.error("Error deleting in " + action + " - " + cleanupPath + ": " + e, e);
    }
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

/**
 * Deletes all files in a container/*from  w ww .j  a v a 2s . com*/
 * @param fileSystem
 * @param BaseUri
 * @throws IOException
   */
public static void cleanupAllFiles(FileSystem fileSystem, String BaseUri) throws IOException {
    try {
        if (fileSystem != null) {
            // Clean up generated files
            Path rootDir = new Path(BaseUri);
            FileStatus[] files = fileSystem.listStatus(rootDir);
            for (FileStatus file : files) {
                fileSystem.delete(file.getPath(), false);
            }
        }
    } catch (Exception e) {
        LOG.error("Error in deleting all files.");
    }
}

From source file:com.iflytek.spider.crawl.CrawlDb.java

License:Apache License

public void update(Path crawlDb, Path[] segments, boolean additionsAllowed, boolean force)
        throws IOException, InterruptedException, ClassNotFoundException {
    FileSystem fs = FileSystem.get(getConf());
    Path lock = new Path(crawlDb, LOCK_NAME);
    LockUtil.createLockFile(fs, lock, force);
    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: starting");
        LOG.info("CrawlDb update: db: " + crawlDb);
        LOG.info("CrawlDb update: segments: " + Arrays.asList(segments));
        LOG.info("CrawlDb update: additions allowed: " + additionsAllowed);
    }//from  w  w w  . j ava2 s  . c  o m

    Job job = CrawlDb.createJob(getConf(), crawlDb);
    job.getConfiguration().setBoolean(CRAWLDB_ADDITIONS_ALLOWED, additionsAllowed);
    for (int i = 0; i < segments.length; i++) {
        Path fetch = new Path(segments[i], CrawlDatum.FETCH_DIR_NAME);
        Path parse = new Path(segments[i], CrawlDatum.PARSE_DIR_NAME);
        if (fs.exists(fetch)) {
            FileInputFormat.addInputPath(job, fetch);
        }
        if (fs.exists(parse)) {
            FileInputFormat.addInputPath(job, parse);
        } else {
            LOG.info(" - skipping invalid segment " + segments[i]);
        }
    }

    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: Merging segment data into db.");
    }
    try {
        job.waitForCompletion(true);
    } catch (IOException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    } catch (InterruptedException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    } catch (ClassNotFoundException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    }

    CrawlDb.install(job, crawlDb);
    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: done");
    }
}

From source file:com.iflytek.spider.crawl.CrawlDb.java

License:Apache License

public static void install(Job job, Path crawlDb) throws IOException {
    Path newCrawlDb = FileOutputFormat.getOutputPath(job);
    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path old = new Path(crawlDb, "old");
    Path current = new Path(crawlDb, CURRENT_NAME);
    if (fs.exists(current)) {
        if (fs.exists(old))
            fs.delete(old, true);
        fs.rename(current, old);//from  w  w  w  . j  a v  a  2 s  .c  o m
    }
    fs.mkdirs(crawlDb);
    fs.rename(newCrawlDb, current);
    if (fs.exists(old))
        fs.delete(old, true);
    Path lock = new Path(crawlDb, LOCK_NAME);
    LockUtil.removeLockFile(fs, lock);
}

From source file:com.iflytek.spider.crawl.GeneratorSmart.java

License:Apache License

/**
 * Generate fetchlists in one or more segments. Whether to filter URLs or not
 * is read from the crawl.generate.filter property in the configuration files.
 * If the property is not found, the URLs are filtered. Same for the
 * normalisation./*from  ww  w  . j a  v a 2s  .  co  m*/
 * 
 * @param dbDir
 *          Crawl database directory
 * @param segments
 *          Segments directory
 * @param numLists
 *          Number of reduce tasks
 * @param curTime
 *          Current time in milliseconds
 * 
 * @return Path to generated segment or null if no entries were selected
 * 
 * @throws IOException
 *           When an I/O error occurs
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public Path[] generate(Path dbDir, Path segments, int numLists, long curTime, boolean force)
        throws IOException, InterruptedException, ClassNotFoundException {
    //getConf().set("mapred.temp.dir", "d:/tmp");
    Path tempDir = new Path(
            getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

    Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
    FileSystem fs = FileSystem.get(getConf());
    LockUtil.createLockFile(fs, lock, force);

    LOG.info("Generator: Selecting best-scoring urls due for fetch.");
    LOG.info("Generator: starting");

    Job job = AvroJob.getAvroJob(getConf());
    if (numLists == -1) { // for politeness make
        numLists = job.getNumReduceTasks(); // a partition per fetch task
    }
    if ("local".equals(job.getConfiguration().get("mapred.job.tracker")) && numLists != 1) {
        // override
        LOG.info("Generator: jobtracker is 'local', generating exactly one partition.");
        numLists = 1;
    }
    LOG.info("Generator: with " + numLists + " partition.");
    job.getConfiguration().setLong(GENERATOR_CUR_TIME, curTime);
    // record real generation time
    long generateTime = System.currentTimeMillis();
    job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime);

    FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
    job.setInputFormatClass(AvroPairInputFormat.class);

    job.setMapperClass(SelectorMapper.class);
    job.setReducerClass(SelectorReducer.class);

    FileOutputFormat.setOutputPath(job, tempDir);
    //job.setOutputFormatClass(AvroPairOutputFormat.class);
    job.setOutputFormatClass(GeneratorOutputFormat.class);
    job.setOutputKeyClass(Float.class);
    job.setOutputValueClass(SelectorEntry.class);
    // AvroMultipleOutputs.addNamedOutput(job, "seq",
    // AvroPairOutputFormat.class, Float.class, SelectorEntry.class);
    try {
        job.waitForCompletion(true);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }

    // read the subdirectories generated in the temp
    // output and turn them into segments
    List<Path> generatedSegments = new ArrayList<Path>();

    FileStatus[] status = fs.listStatus(tempDir);
    try {
        for (FileStatus stat : status) {
            Path subfetchlist = stat.getPath();
            if (!subfetchlist.getName().startsWith("fetchlist-"))
                continue;
            // start a new partition job for this segment
            Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);

            fs.createNewFile(new Path(newSeg, "generatored"));
            generatedSegments.add(newSeg);
        }
    } catch (Exception e) {
        LOG.warn("Generator: exception while partitioning segments, exiting ...");
        fs.delete(tempDir, true);
        return null;
    }

    if (generatedSegments.size() == 0) {
        LOG.warn("Generator: 0 records selected for fetching, exiting ...");
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        return null;
    }

    if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
        // update the db from tempDir
        Path tempDir2 = new Path(
                getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

        job = AvroJob.getAvroJob(getConf());
        job.setJobName("generate: updatedb " + dbDir);
        job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime);
        for (Path segmpaths : generatedSegments) {
            Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME);
            FileInputFormat.addInputPath(job, subGenDir);
        }
        FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
        job.setInputFormatClass(AvroPairInputFormat.class);
        job.setMapperClass(CrawlDbUpdateMapper.class);
        // job.setReducerClass(CrawlDbUpdater.class);
        job.setOutputFormatClass(AvroMapOutputFormat.class);
        job.setOutputKeyClass(String.class);
        job.setOutputValueClass(CrawlDatum.class);
        FileOutputFormat.setOutputPath(job, tempDir2);
        try {
            job.waitForCompletion(true);
            CrawlDb.install(job, dbDir);
        } catch (IOException e) {
            LockUtil.removeLockFile(fs, lock);
            fs.delete(tempDir, true);
            fs.delete(tempDir2, true);
            throw e;
        }
        fs.delete(tempDir2, true);
    }

    LockUtil.removeLockFile(fs, lock);
    fs.delete(tempDir, true);

    if (LOG.isInfoEnabled()) {
        LOG.info("Generator: done.");
    }
    Path[] patharray = new Path[generatedSegments.size()];
    return generatedSegments.toArray(patharray);
}

From source file:com.iflytek.spider.parse.ParseSegment.java

License:Apache License

public int run(String[] args) throws Exception {

    String usage = "Usage: ParseSegment segments";

    if (args.length == 0) {
        System.err.println(usage);
        System.exit(-1);/* ww w. j ava 2 s  . co  m*/
    }
    FileSystem fs = FileSystem.get(getConf());
    for (FileStatus p : fs.listStatus(new Path(args[0]))) {
        if (fs.exists(new Path(p.getPath(), "crawl_parse")))
            fs.delete(new Path(p.getPath(), "crawl_parse"), true);
        if (fs.exists(new Path(p.getPath(), "parse_data")))
            fs.delete(new Path(p.getPath(), "parse_data"), true);
        parse(p.getPath());
    }
    return 0;
}

From source file:com.iflytek.spider.util.FSUtils.java

License:Apache License

/**
 * Replaces the current path with the new path and if set removes the old
 * path. If removeOld is set to false then the old path will be set to the
 * name current.old.//ww w  .  j  a  v a  2s  .c o  m
 * 
 * @param fs The FileSystem.
 * @param current The end path, the one being replaced.
 * @param replacement The path to replace with.
 * @param removeOld True if we are removing the current path.
 * 
 * @throws IOException If an error occurs during replacement.
 */
public static void replace(FileSystem fs, Path current, Path replacement, boolean removeOld)
        throws IOException {

    // rename any current path to old
    Path old = new Path(current + ".old");
    if (fs.exists(current)) {
        fs.rename(current, old);
    }

    // rename the new path to current and remove the old path if needed
    fs.rename(replacement, current);
    if (fs.exists(old) && removeOld) {
        fs.delete(old, true);
    }
}