List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.ibm.stocator.fs.swift2d.systemtests.StreamingSwiftTest.java
License:Open Source License
@Test public void accessObjectWithSpaceTest() throws Exception { FileSystem fs = new ObjectStoreFileSystem(); Configuration conf = new Configuration(); String uriString = conf.get("fs.swift2d.test.uri"); Assume.assumeNotNull(uriString);/*from ww w .j a v a2s . com*/ // adding suffix with space to the container name String scheme = "swift2d"; String objectName = "/a/testObject.txt"; URI publicContainerURI = new URI(uriString + objectName); // initialize file system fs.initialize(publicContainerURI, conf); FileStatus objectFS = null; Path f = null; try { FSDataOutputStream fsDataOutputStream = null; String currObjName = null; for (int i = 0; i < 5; i++) { currObjName = objectName + String.valueOf(i); // create timer createObjectTimer(90000.0, currObjName); publicContainerURI = new URI(scheme + "://" + getHost(URI.create(uriString)) + "/" + currObjName); f = new Path(publicContainerURI.toString()); fsDataOutputStream = fs.create(f); String line = null; while (!objectExpired) { // generates input byte[] bytes = new byte[0]; line = "\"2017-7-15 3:6:43\"," + String.valueOf(Math.random()) + ",6,18" + "\n"; ByteBuffer linesBB = ByteBuffer.wrap(line.getBytes()); bytes = new byte[linesBB.limit()]; linesBB.get(bytes); // writes to output fsDataOutputStream.write(bytes); // simulate delays in input Thread.sleep(50); } fsDataOutputStream.close(); objectExpired = false; } } catch (Exception e) { e.printStackTrace(); Assert.assertNotNull("Unable to access public object.", objectFS); } finally { fs.delete(f, true); } }
From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java
License:Open Source License
public static void assertDeleted(FileSystem fs, Path file, boolean recursive) throws IOException { assertPathExists(fs, "about to be deleted file", file); fs.delete(file, recursive); assertPathDoesNotExist(fs, "Deleted file", file); }
From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java
License:Open Source License
/** * Touch a file: fails if it is already there * @param fs filesystem//from ww w . ja v a 2 s . c om * @param path path * @throws IOException IO problems */ public static void touch(FileSystem fs, Path path) throws IOException { fs.delete(path, true); writeTextFile(fs, path, null, false); }
From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java
License:Open Source License
public static void cleanup(String action, FileSystem fileSystem, String cleanupPath) { noteAction(action);//w ww .j a v a2 s. c o m try { if (fileSystem != null) { fileSystem.delete(new Path(cleanupPath).makeQualified(fileSystem), true); } } catch (Exception e) { LOG.error("Error deleting in " + action + " - " + cleanupPath + ": " + e, e); } }
From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java
License:Open Source License
/** * Deletes all files in a container/*from w ww .j a v a 2s . com*/ * @param fileSystem * @param BaseUri * @throws IOException */ public static void cleanupAllFiles(FileSystem fileSystem, String BaseUri) throws IOException { try { if (fileSystem != null) { // Clean up generated files Path rootDir = new Path(BaseUri); FileStatus[] files = fileSystem.listStatus(rootDir); for (FileStatus file : files) { fileSystem.delete(file.getPath(), false); } } } catch (Exception e) { LOG.error("Error in deleting all files."); } }
From source file:com.iflytek.spider.crawl.CrawlDb.java
License:Apache License
public void update(Path crawlDb, Path[] segments, boolean additionsAllowed, boolean force) throws IOException, InterruptedException, ClassNotFoundException { FileSystem fs = FileSystem.get(getConf()); Path lock = new Path(crawlDb, LOCK_NAME); LockUtil.createLockFile(fs, lock, force); if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: starting"); LOG.info("CrawlDb update: db: " + crawlDb); LOG.info("CrawlDb update: segments: " + Arrays.asList(segments)); LOG.info("CrawlDb update: additions allowed: " + additionsAllowed); }//from w w w . j ava2 s . c o m Job job = CrawlDb.createJob(getConf(), crawlDb); job.getConfiguration().setBoolean(CRAWLDB_ADDITIONS_ALLOWED, additionsAllowed); for (int i = 0; i < segments.length; i++) { Path fetch = new Path(segments[i], CrawlDatum.FETCH_DIR_NAME); Path parse = new Path(segments[i], CrawlDatum.PARSE_DIR_NAME); if (fs.exists(fetch)) { FileInputFormat.addInputPath(job, fetch); } if (fs.exists(parse)) { FileInputFormat.addInputPath(job, parse); } else { LOG.info(" - skipping invalid segment " + segments[i]); } } if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: Merging segment data into db."); } try { job.waitForCompletion(true); } catch (IOException e) { LockUtil.removeLockFile(fs, lock); Path outPath = FileOutputFormat.getOutputPath(job); if (fs.exists(outPath)) fs.delete(outPath, true); throw e; } catch (InterruptedException e) { LockUtil.removeLockFile(fs, lock); Path outPath = FileOutputFormat.getOutputPath(job); if (fs.exists(outPath)) fs.delete(outPath, true); throw e; } catch (ClassNotFoundException e) { LockUtil.removeLockFile(fs, lock); Path outPath = FileOutputFormat.getOutputPath(job); if (fs.exists(outPath)) fs.delete(outPath, true); throw e; } CrawlDb.install(job, crawlDb); if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); } }
From source file:com.iflytek.spider.crawl.CrawlDb.java
License:Apache License
public static void install(Job job, Path crawlDb) throws IOException { Path newCrawlDb = FileOutputFormat.getOutputPath(job); FileSystem fs = FileSystem.get(job.getConfiguration()); Path old = new Path(crawlDb, "old"); Path current = new Path(crawlDb, CURRENT_NAME); if (fs.exists(current)) { if (fs.exists(old)) fs.delete(old, true); fs.rename(current, old);//from w w w . j a v a 2 s .c o m } fs.mkdirs(crawlDb); fs.rename(newCrawlDb, current); if (fs.exists(old)) fs.delete(old, true); Path lock = new Path(crawlDb, LOCK_NAME); LockUtil.removeLockFile(fs, lock); }
From source file:com.iflytek.spider.crawl.GeneratorSmart.java
License:Apache License
/** * Generate fetchlists in one or more segments. Whether to filter URLs or not * is read from the crawl.generate.filter property in the configuration files. * If the property is not found, the URLs are filtered. Same for the * normalisation./*from ww w . j a v a 2s . co m*/ * * @param dbDir * Crawl database directory * @param segments * Segments directory * @param numLists * Number of reduce tasks * @param curTime * Current time in milliseconds * * @return Path to generated segment or null if no entries were selected * * @throws IOException * When an I/O error occurs * @throws ClassNotFoundException * @throws InterruptedException */ public Path[] generate(Path dbDir, Path segments, int numLists, long curTime, boolean force) throws IOException, InterruptedException, ClassNotFoundException { //getConf().set("mapred.temp.dir", "d:/tmp"); Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis()); Path lock = new Path(dbDir, CrawlDb.LOCK_NAME); FileSystem fs = FileSystem.get(getConf()); LockUtil.createLockFile(fs, lock, force); LOG.info("Generator: Selecting best-scoring urls due for fetch."); LOG.info("Generator: starting"); Job job = AvroJob.getAvroJob(getConf()); if (numLists == -1) { // for politeness make numLists = job.getNumReduceTasks(); // a partition per fetch task } if ("local".equals(job.getConfiguration().get("mapred.job.tracker")) && numLists != 1) { // override LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); numLists = 1; } LOG.info("Generator: with " + numLists + " partition."); job.getConfiguration().setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime); FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME)); job.setInputFormatClass(AvroPairInputFormat.class); job.setMapperClass(SelectorMapper.class); job.setReducerClass(SelectorReducer.class); FileOutputFormat.setOutputPath(job, tempDir); //job.setOutputFormatClass(AvroPairOutputFormat.class); job.setOutputFormatClass(GeneratorOutputFormat.class); job.setOutputKeyClass(Float.class); job.setOutputValueClass(SelectorEntry.class); // AvroMultipleOutputs.addNamedOutput(job, "seq", // AvroPairOutputFormat.class, Float.class, SelectorEntry.class); try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); return null; } // read the subdirectories generated in the temp // output and turn them into segments List<Path> generatedSegments = new ArrayList<Path>(); FileStatus[] status = fs.listStatus(tempDir); try { for (FileStatus stat : status) { Path subfetchlist = stat.getPath(); if (!subfetchlist.getName().startsWith("fetchlist-")) continue; // start a new partition job for this segment Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists); fs.createNewFile(new Path(newSeg, "generatored")); generatedSegments.add(newSeg); } } catch (Exception e) { LOG.warn("Generator: exception while partitioning segments, exiting ..."); fs.delete(tempDir, true); return null; } if (generatedSegments.size() == 0) { LOG.warn("Generator: 0 records selected for fetching, exiting ..."); LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); return null; } if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) { // update the db from tempDir Path tempDir2 = new Path( getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis()); job = AvroJob.getAvroJob(getConf()); job.setJobName("generate: updatedb " + dbDir); job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime); for (Path segmpaths : generatedSegments) { Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME); FileInputFormat.addInputPath(job, subGenDir); } FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME)); job.setInputFormatClass(AvroPairInputFormat.class); job.setMapperClass(CrawlDbUpdateMapper.class); // job.setReducerClass(CrawlDbUpdater.class); job.setOutputFormatClass(AvroMapOutputFormat.class); job.setOutputKeyClass(String.class); job.setOutputValueClass(CrawlDatum.class); FileOutputFormat.setOutputPath(job, tempDir2); try { job.waitForCompletion(true); CrawlDb.install(job, dbDir); } catch (IOException e) { LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); fs.delete(tempDir2, true); throw e; } fs.delete(tempDir2, true); } LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); if (LOG.isInfoEnabled()) { LOG.info("Generator: done."); } Path[] patharray = new Path[generatedSegments.size()]; return generatedSegments.toArray(patharray); }
From source file:com.iflytek.spider.parse.ParseSegment.java
License:Apache License
public int run(String[] args) throws Exception { String usage = "Usage: ParseSegment segments"; if (args.length == 0) { System.err.println(usage); System.exit(-1);/* ww w. j ava 2 s . co m*/ } FileSystem fs = FileSystem.get(getConf()); for (FileStatus p : fs.listStatus(new Path(args[0]))) { if (fs.exists(new Path(p.getPath(), "crawl_parse"))) fs.delete(new Path(p.getPath(), "crawl_parse"), true); if (fs.exists(new Path(p.getPath(), "parse_data"))) fs.delete(new Path(p.getPath(), "parse_data"), true); parse(p.getPath()); } return 0; }
From source file:com.iflytek.spider.util.FSUtils.java
License:Apache License
/** * Replaces the current path with the new path and if set removes the old * path. If removeOld is set to false then the old path will be set to the * name current.old.//ww w . j a v a 2s .c o m * * @param fs The FileSystem. * @param current The end path, the one being replaced. * @param replacement The path to replace with. * @param removeOld True if we are removing the current path. * * @throws IOException If an error occurs during replacement. */ public static void replace(FileSystem fs, Path current, Path replacement, boolean removeOld) throws IOException { // rename any current path to old Path old = new Path(current + ".old"); if (fs.exists(current)) { fs.rename(current, old); } // rename the new path to current and remove the old path if needed fs.rename(replacement, current); if (fs.exists(old) && removeOld) { fs.delete(old, true); } }