Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.ibm.stocator.fs.swift2d.systemtests.StreamingSwiftTest.java

License:Open Source License

@Test
public void accessObjectWithSpaceTest() throws Exception {
    FileSystem fs = new ObjectStoreFileSystem();
    Configuration conf = new Configuration();
    String uriString = conf.get("fs.swift2d.test.uri");
    Assume.assumeNotNull(uriString);/*from  ww w .j  a  v  a2s  . com*/
    // adding suffix with space to the container name
    String scheme = "swift2d";
    String objectName = "/a/testObject.txt";
    URI publicContainerURI = new URI(uriString + objectName);
    // initialize file system
    fs.initialize(publicContainerURI, conf);
    FileStatus objectFS = null;
    Path f = null;
    try {
        FSDataOutputStream fsDataOutputStream = null;
        String currObjName = null;
        for (int i = 0; i < 5; i++) {
            currObjName = objectName + String.valueOf(i);
            // create timer
            createObjectTimer(90000.0, currObjName);
            publicContainerURI = new URI(scheme + "://" + getHost(URI.create(uriString)) + "/" + currObjName);
            f = new Path(publicContainerURI.toString());
            fsDataOutputStream = fs.create(f);
            String line = null;
            while (!objectExpired) {
                // generates input
                byte[] bytes = new byte[0];
                line = "\"2017-7-15 3:6:43\"," + String.valueOf(Math.random()) + ",6,18" + "\n";
                ByteBuffer linesBB = ByteBuffer.wrap(line.getBytes());
                bytes = new byte[linesBB.limit()];
                linesBB.get(bytes);

                // writes to output
                fsDataOutputStream.write(bytes);

                // simulate delays in input
                Thread.sleep(50);
            }
            fsDataOutputStream.close();
            objectExpired = false;
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.assertNotNull("Unable to access public object.", objectFS);
    } finally {
        fs.delete(f, true);
    }
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

public static void assertDeleted(FileSystem fs, Path file, boolean recursive) throws IOException {
    assertPathExists(fs, "about to be deleted file", file);
    fs.delete(file, recursive);
    assertPathDoesNotExist(fs, "Deleted file", file);
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

/**
 * Touch a file: fails if it is already there
 * @param fs filesystem//from   ww w  .  ja v  a  2  s .  c om
 * @param path path
 * @throws IOException IO problems
 */
public static void touch(FileSystem fs, Path path) throws IOException {
    fs.delete(path, true);
    writeTextFile(fs, path, null, false);
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

public static void cleanup(String action, FileSystem fileSystem, String cleanupPath) {
    noteAction(action);//w ww  .j a v  a2 s. c  o  m
    try {
        if (fileSystem != null) {
            fileSystem.delete(new Path(cleanupPath).makeQualified(fileSystem), true);
        }
    } catch (Exception e) {
        LOG.error("Error deleting in " + action + " - " + cleanupPath + ": " + e, e);
    }
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

/**
 * Deletes all files in a container/*from  w ww .j  a v a 2s . com*/
 * @param fileSystem
 * @param BaseUri
 * @throws IOException
   */
public static void cleanupAllFiles(FileSystem fileSystem, String BaseUri) throws IOException {
    try {
        if (fileSystem != null) {
            // Clean up generated files
            Path rootDir = new Path(BaseUri);
            FileStatus[] files = fileSystem.listStatus(rootDir);
            for (FileStatus file : files) {
                fileSystem.delete(file.getPath(), false);
            }
        }
    } catch (Exception e) {
        LOG.error("Error in deleting all files.");
    }
}

From source file:com.iflytek.spider.crawl.CrawlDb.java

License:Apache License

public void update(Path crawlDb, Path[] segments, boolean additionsAllowed, boolean force)
        throws IOException, InterruptedException, ClassNotFoundException {
    FileSystem fs = FileSystem.get(getConf());
    Path lock = new Path(crawlDb, LOCK_NAME);
    LockUtil.createLockFile(fs, lock, force);
    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: starting");
        LOG.info("CrawlDb update: db: " + crawlDb);
        LOG.info("CrawlDb update: segments: " + Arrays.asList(segments));
        LOG.info("CrawlDb update: additions allowed: " + additionsAllowed);
    }//from  w  w w  . j ava2 s  . c  o m

    Job job = CrawlDb.createJob(getConf(), crawlDb);
    job.getConfiguration().setBoolean(CRAWLDB_ADDITIONS_ALLOWED, additionsAllowed);
    for (int i = 0; i < segments.length; i++) {
        Path fetch = new Path(segments[i], CrawlDatum.FETCH_DIR_NAME);
        Path parse = new Path(segments[i], CrawlDatum.PARSE_DIR_NAME);
        if (fs.exists(fetch)) {
            FileInputFormat.addInputPath(job, fetch);
        }
        if (fs.exists(parse)) {
            FileInputFormat.addInputPath(job, parse);
        } else {
            LOG.info(" - skipping invalid segment " + segments[i]);
        }
    }

    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: Merging segment data into db.");
    }
    try {
        job.waitForCompletion(true);
    } catch (IOException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    } catch (InterruptedException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    } catch (ClassNotFoundException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    }

    CrawlDb.install(job, crawlDb);
    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: done");
    }
}

From source file:com.iflytek.spider.crawl.CrawlDb.java

License:Apache License

public static void install(Job job, Path crawlDb) throws IOException {
    Path newCrawlDb = FileOutputFormat.getOutputPath(job);
    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path old = new Path(crawlDb, "old");
    Path current = new Path(crawlDb, CURRENT_NAME);
    if (fs.exists(current)) {
        if (fs.exists(old))
            fs.delete(old, true);
        fs.rename(current, old);//from  w  w  w  . j  a v  a  2 s  .c  o m
    }
    fs.mkdirs(crawlDb);
    fs.rename(newCrawlDb, current);
    if (fs.exists(old))
        fs.delete(old, true);
    Path lock = new Path(crawlDb, LOCK_NAME);
    LockUtil.removeLockFile(fs, lock);
}

From source file:com.iflytek.spider.crawl.GeneratorSmart.java

License:Apache License

/**
 * Generate fetchlists in one or more segments. Whether to filter URLs or not
 * is read from the crawl.generate.filter property in the configuration files.
 * If the property is not found, the URLs are filtered. Same for the
 * normalisation./*from  ww  w  . j a  v a 2s  .  co  m*/
 * 
 * @param dbDir
 *          Crawl database directory
 * @param segments
 *          Segments directory
 * @param numLists
 *          Number of reduce tasks
 * @param curTime
 *          Current time in milliseconds
 * 
 * @return Path to generated segment or null if no entries were selected
 * 
 * @throws IOException
 *           When an I/O error occurs
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public Path[] generate(Path dbDir, Path segments, int numLists, long curTime, boolean force)
        throws IOException, InterruptedException, ClassNotFoundException {
    //getConf().set("mapred.temp.dir", "d:/tmp");
    Path tempDir = new Path(
            getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

    Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
    FileSystem fs = FileSystem.get(getConf());
    LockUtil.createLockFile(fs, lock, force);

    LOG.info("Generator: Selecting best-scoring urls due for fetch.");
    LOG.info("Generator: starting");

    Job job = AvroJob.getAvroJob(getConf());
    if (numLists == -1) { // for politeness make
        numLists = job.getNumReduceTasks(); // a partition per fetch task
    }
    if ("local".equals(job.getConfiguration().get("mapred.job.tracker")) && numLists != 1) {
        // override
        LOG.info("Generator: jobtracker is 'local', generating exactly one partition.");
        numLists = 1;
    }
    LOG.info("Generator: with " + numLists + " partition.");
    job.getConfiguration().setLong(GENERATOR_CUR_TIME, curTime);
    // record real generation time
    long generateTime = System.currentTimeMillis();
    job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime);

    FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
    job.setInputFormatClass(AvroPairInputFormat.class);

    job.setMapperClass(SelectorMapper.class);
    job.setReducerClass(SelectorReducer.class);

    FileOutputFormat.setOutputPath(job, tempDir);
    //job.setOutputFormatClass(AvroPairOutputFormat.class);
    job.setOutputFormatClass(GeneratorOutputFormat.class);
    job.setOutputKeyClass(Float.class);
    job.setOutputValueClass(SelectorEntry.class);
    // AvroMultipleOutputs.addNamedOutput(job, "seq",
    // AvroPairOutputFormat.class, Float.class, SelectorEntry.class);
    try {
        job.waitForCompletion(true);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }

    // read the subdirectories generated in the temp
    // output and turn them into segments
    List<Path> generatedSegments = new ArrayList<Path>();

    FileStatus[] status = fs.listStatus(tempDir);
    try {
        for (FileStatus stat : status) {
            Path subfetchlist = stat.getPath();
            if (!subfetchlist.getName().startsWith("fetchlist-"))
                continue;
            // start a new partition job for this segment
            Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);

            fs.createNewFile(new Path(newSeg, "generatored"));
            generatedSegments.add(newSeg);
        }
    } catch (Exception e) {
        LOG.warn("Generator: exception while partitioning segments, exiting ...");
        fs.delete(tempDir, true);
        return null;
    }

    if (generatedSegments.size() == 0) {
        LOG.warn("Generator: 0 records selected for fetching, exiting ...");
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        return null;
    }

    if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
        // update the db from tempDir
        Path tempDir2 = new Path(
                getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

        job = AvroJob.getAvroJob(getConf());
        job.setJobName("generate: updatedb " + dbDir);
        job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime);
        for (Path segmpaths : generatedSegments) {
            Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME);
            FileInputFormat.addInputPath(job, subGenDir);
        }
        FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
        job.setInputFormatClass(AvroPairInputFormat.class);
        job.setMapperClass(CrawlDbUpdateMapper.class);
        // job.setReducerClass(CrawlDbUpdater.class);
        job.setOutputFormatClass(AvroMapOutputFormat.class);
        job.setOutputKeyClass(String.class);
        job.setOutputValueClass(CrawlDatum.class);
        FileOutputFormat.setOutputPath(job, tempDir2);
        try {
            job.waitForCompletion(true);
            CrawlDb.install(job, dbDir);
        } catch (IOException e) {
            LockUtil.removeLockFile(fs, lock);
            fs.delete(tempDir, true);
            fs.delete(tempDir2, true);
            throw e;
        }
        fs.delete(tempDir2, true);
    }

    LockUtil.removeLockFile(fs, lock);
    fs.delete(tempDir, true);

    if (LOG.isInfoEnabled()) {
        LOG.info("Generator: done.");
    }
    Path[] patharray = new Path[generatedSegments.size()];
    return generatedSegments.toArray(patharray);
}

From source file:com.iflytek.spider.parse.ParseSegment.java

License:Apache License

public int run(String[] args) throws Exception {

    String usage = "Usage: ParseSegment segments";

    if (args.length == 0) {
        System.err.println(usage);
        System.exit(-1);/* ww w. j ava 2 s  . co  m*/
    }
    FileSystem fs = FileSystem.get(getConf());
    for (FileStatus p : fs.listStatus(new Path(args[0]))) {
        if (fs.exists(new Path(p.getPath(), "crawl_parse")))
            fs.delete(new Path(p.getPath(), "crawl_parse"), true);
        if (fs.exists(new Path(p.getPath(), "parse_data")))
            fs.delete(new Path(p.getPath(), "parse_data"), true);
        parse(p.getPath());
    }
    return 0;
}

From source file:com.iflytek.spider.util.FSUtils.java

License:Apache License

/**
 * Replaces the current path with the new path and if set removes the old
 * path. If removeOld is set to false then the old path will be set to the
 * name current.old.//ww w  .  j  a  v a  2s  .c o  m
 * 
 * @param fs The FileSystem.
 * @param current The end path, the one being replaced.
 * @param replacement The path to replace with.
 * @param removeOld True if we are removing the current path.
 * 
 * @throws IOException If an error occurs during replacement.
 */
public static void replace(FileSystem fs, Path current, Path replacement, boolean removeOld)
        throws IOException {

    // rename any current path to old
    Path old = new Path(current + ".old");
    if (fs.exists(current)) {
        fs.rename(current, old);
    }

    // rename the new path to current and remove the old path if needed
    fs.rename(replacement, current);
    if (fs.exists(old) && removeOld) {
        fs.delete(old, true);
    }
}