List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:org.apache.nutch.protocol.Content.java
License:Apache License
public static void main(String args[]) throws Exception { String usage = "Content (-local | -dfs <namenode:port>) recno batchId"; if (args.length < 3) { System.out.println("usage:" + usage); return;/* w w w . j av a 2s . co m*/ } GenericOptionsParser optParser = new GenericOptionsParser(NutchConfiguration.create(), args); String[] argv = optParser.getRemainingArgs(); Configuration conf = optParser.getConfiguration(); FileSystem fs = FileSystem.get(conf); try { int recno = Integer.parseInt(argv[0]); String batchId = argv[1]; Path file = new Path(batchId, DIR_NAME); System.out.println("Reading from file: " + file); ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(), conf); Content content = new Content(); contents.get(recno, content); System.out.println("Retrieved " + recno + " from file " + file); System.out.println(content); contents.close(); } finally { fs.close(); } }
From source file:org.apache.nutch.tools.CommonCrawlDataDumper.java
License:Apache License
/** * Dumps the reverse engineered CBOR content from the provided segment * directories if a parent directory contains more than one segment, * otherwise a single segment can be passed as an argument. If the boolean * argument is provided then the CBOR is also zipped. * * @param outputDir the directory you wish to dump the raw content to. This * directory will be created. * @param segmentRootDir a directory containing one or more segments. * @param linkdb Path to linkdb. * @param gzip a boolean flag indicating whether the CBOR content should also * be gzipped./* www .j av a 2s .c o m*/ * @param epochFilename if {@code true}, output files will be names using the epoch time (in milliseconds). * @param extension a file extension to use with output documents. * @throws Exception if any exception occurs. */ public void dump(File outputDir, File segmentRootDir, File linkdb, boolean gzip, String[] mimeTypes, boolean epochFilename, String extension, boolean warc) throws Exception { if (gzip) { LOG.info("Gzipping CBOR data has been skipped"); } // total file counts Map<String, Integer> typeCounts = new HashMap<>(); // filtered file counters Map<String, Integer> filteredCounts = new HashMap<>(); Configuration nutchConfig = NutchConfiguration.create(); Path segmentRootPath = new Path(segmentRootDir.toString()); FileSystem fs = segmentRootPath.getFileSystem(nutchConfig); //get all paths List<Path> parts = new ArrayList<>(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(segmentRootPath, true); String partPattern = ".*" + File.separator + Content.DIR_NAME + File.separator + "part-[0-9]{5}" + File.separator + "data"; while (files.hasNext()) { LocatedFileStatus next = files.next(); if (next.isFile()) { Path path = next.getPath(); if (path.toString().matches(partPattern)) { parts.add(path); } } } LinkDbReader linkDbReader = null; if (linkdb != null) { linkDbReader = new LinkDbReader(nutchConfig, new Path(linkdb.toString())); } if (parts == null || parts.size() == 0) { LOG.error("No segment directories found in {} ", segmentRootDir.getAbsolutePath()); System.exit(1); } LOG.info("Found {} segment parts", parts.size()); if (gzip && !warc) { fileList = new ArrayList<>(); constructNewStream(outputDir); } for (Path segmentPart : parts) { LOG.info("Processing segment Part : [ {} ]", segmentPart); try { SequenceFile.Reader reader = new SequenceFile.Reader(nutchConfig, SequenceFile.Reader.file(segmentPart)); Writable key = (Writable) reader.getKeyClass().getConstructor().newInstance(); Content content = null; while (reader.next(key)) { content = new Content(); reader.getCurrentValue(content); Metadata metadata = content.getMetadata(); String url = key.toString(); String baseName = FilenameUtils.getBaseName(url); String extensionName = FilenameUtils.getExtension(url); if (!extension.isEmpty()) { extensionName = extension; } else if ((extensionName == null) || extensionName.isEmpty()) { extensionName = "html"; } String outputFullPath = null; String outputRelativePath = null; String filename = null; String timestamp = null; String reverseKey = null; if (epochFilename || config.getReverseKey()) { try { long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z") .parse(getDate(metadata.get("Date"))).getTime(); timestamp = String.valueOf(epoch); } catch (ParseException pe) { LOG.warn(pe.getMessage()); } reverseKey = reverseUrl(url); config.setReverseKeyValue( reverseKey.replace("/", "_") + "_" + DigestUtils.sha1Hex(url) + "_" + timestamp); } if (!warc) { if (epochFilename) { outputFullPath = DumpFileUtil.createFileNameFromUrl(outputDir.getAbsolutePath(), reverseKey, url, timestamp, extensionName, !gzip); outputRelativePath = outputFullPath.substring(0, outputFullPath.lastIndexOf(File.separator) - 1); filename = content.getMetadata().get(Metadata.DATE) + "." + extensionName; } else { String md5Ofurl = DumpFileUtil.getUrlMD5(url); String fullDir = DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(), md5Ofurl, !gzip); filename = DumpFileUtil.createFileName(md5Ofurl, baseName, extensionName); outputFullPath = String.format("%s/%s", fullDir, filename); String[] fullPathLevels = fullDir.split(Pattern.quote(File.separator)); String firstLevelDirName = fullPathLevels[fullPathLevels.length - 2]; String secondLevelDirName = fullPathLevels[fullPathLevels.length - 1]; outputRelativePath = firstLevelDirName + secondLevelDirName; } } // Encode all filetypes if no mimetypes have been given Boolean filter = (mimeTypes == null); String jsonData = ""; try { String mimeType = new Tika().detect(content.getContent()); // Maps file to JSON-based structure Set<String> inUrls = null; //there may be duplicates, so using set if (linkDbReader != null) { Inlinks inlinks = linkDbReader.getInlinks((Text) key); if (inlinks != null) { Iterator<Inlink> iterator = inlinks.iterator(); inUrls = new LinkedHashSet<>(); while (inUrls.size() <= MAX_INLINKS && iterator.hasNext()) { inUrls.add(iterator.next().getFromUrl()); } } } //TODO: Make this Jackson Format implementation reusable try (CommonCrawlFormat format = CommonCrawlFormatFactory .getCommonCrawlFormat(warc ? "WARC" : "JACKSON", nutchConfig, config)) { if (inUrls != null) { format.setInLinks(new ArrayList<>(inUrls)); } jsonData = format.getJsonData(url, content, metadata); } collectStats(typeCounts, mimeType); // collects statistics for the given mimetypes if ((mimeType != null) && (mimeTypes != null) && Arrays.asList(mimeTypes).contains(mimeType)) { collectStats(filteredCounts, mimeType); filter = true; } } catch (IOException ioe) { LOG.error("Fatal error in creating JSON data: " + ioe.getMessage()); return; } if (!warc) { if (filter) { byte[] byteData = serializeCBORData(jsonData); if (!gzip) { File outputFile = new File(outputFullPath); if (outputFile.exists()) { LOG.info("Skipping writing: [" + outputFullPath + "]: file already exists"); } else { LOG.info("Writing: [" + outputFullPath + "]"); IOUtils.copy(new ByteArrayInputStream(byteData), new FileOutputStream(outputFile)); } } else { if (fileList.contains(outputFullPath)) { LOG.info("Skipping compressing: [" + outputFullPath + "]: file already exists"); } else { fileList.add(outputFullPath); LOG.info("Compressing: [" + outputFullPath + "]"); //TarArchiveEntry tarEntry = new TarArchiveEntry(firstLevelDirName + File.separator + secondLevelDirName + File.separator + filename); TarArchiveEntry tarEntry = new TarArchiveEntry( outputRelativePath + File.separator + filename); tarEntry.setSize(byteData.length); tarOutput.putArchiveEntry(tarEntry); tarOutput.write(byteData); tarOutput.closeArchiveEntry(); } } } } } reader.close(); } catch (Exception e) { LOG.warn("SKIPPED: {} Because : {}", segmentPart, e.getMessage()); } finally { fs.close(); } } if (gzip && !warc) { closeStream(); } if (!typeCounts.isEmpty()) { LOG.info("CommonsCrawlDataDumper File Stats: " + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts)); } }
From source file:org.apache.nutch.tools.DmozParser.java
License:Apache License
/** * Command-line access. User may add URLs via a flat text file or the * structured DMOZ file. By default, we ignore Adult material (as categorized * by DMOZ)./*from w w w.j a v a 2 s .co m*/ */ public static void main(String argv[]) throws Exception { if (argv.length < 1) { System.err.println( "Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-snippet] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]"); return; } // // Parse the command line, figure out what kind of // URL file we need to load // int subsetDenom = 1; int skew = 0; String dmozFile = argv[0]; boolean includeAdult = false; boolean snippet = false; Pattern topicPattern = null; Vector<String> topics = new Vector<String>(); Configuration conf = NutchConfiguration.create(); store = StorageUtils.createWebStore(conf, String.class, WebPage.class); FileSystem fs = FileSystem.get(conf); try { for (int i = 1; i < argv.length; i++) { if ("-includeAdultMaterial".equals(argv[i])) { includeAdult = true; } else if ("-subset".equals(argv[i])) { subsetDenom = Integer.parseInt(argv[i + 1]); i++; } else if ("-topic".equals(argv[i])) { topics.addElement(argv[i + 1]); i++; } else if ("-topicFile".equals(argv[i])) { addTopicsFromFile(argv[i + 1], topics); i++; } else if ("-skew".equals(argv[i])) { skew = Integer.parseInt(argv[i + 1]); i++; } else if ("-snippet".equals(argv[i])) { snippet = true; } } DmozParser parser = new DmozParser(); if (!topics.isEmpty()) { String regExp = new String("^("); int j = 0; for (; j < topics.size() - 1; ++j) { regExp = regExp.concat(topics.get(j)); regExp = regExp.concat("|"); } regExp = regExp.concat(topics.get(j)); regExp = regExp.concat(").*"); LOG.info("Topic selection pattern = " + regExp); topicPattern = Pattern.compile(regExp); } parser.parseDmozFile(new File(dmozFile), subsetDenom, includeAdult, skew, topicPattern, snippet); } finally { fs.close(); } }
From source file:org.apache.oozie.command.XLogPurgeXCommand.java
License:Apache License
@Override protected Void execute() throws CommandException { XLogCopyService xls = Services.get().get(XLogCopyService.class); String hdfsDir = xls.getConfHdfsLogDir(); XLog.Info.get().clear();//from w w w .j a v a 2s. com XLog log = XLog.getLog(getClass()); HadoopAccessorService has = Services.get().get(HadoopAccessorService.class); URI uri = new Path(hdfsDir).toUri(); Configuration fsConf = has.createJobConf(uri.getAuthority()); FileSystem fs = null; try { fs = has.createFileSystem(System.getProperty("user.name"), uri, fsConf); } catch (Exception ex) { log.error("user has to be specified to access hdfs", new HadoopAccessorException(ErrorCode.E0902, "user has to be specified to access FileSystem")); } deleteJobLogs(fs, hdfsDir, wfList); deleteJobLogs(fs, hdfsDir, coordList); deleteJobLogs(fs, hdfsDir, bundleList); try { fs.close(); } catch (IOException ex) { LOG.error("cannot close filesystem"); } return null; }
From source file:org.apache.pirk.test.distributed.DistributedTestDriver.java
License:Apache License
/** * Delete all necessary inputs, clean up *///from w w w.ja v a 2s. co m public static void cleanup(FileSystem fs, String dataSchemasProp, String querySchemasProp, String stopListProp) throws Exception { Inputs.deleteESInput(); fs.close(); SystemConfiguration.setProperty("pir.stopListFile", stopListProp); // Force the query and data schemas to load their original values if (!dataSchemasProp.equals("none")) { DataSchemaLoader.initialize(); } if (!querySchemasProp.equals("none")) { QuerySchemaLoader.initialize(); } }
From source file:org.apache.ranger.plugin.store.file.BaseFileStore.java
License:Apache License
protected void close(FileSystem fs) { if (fs != null) { try {/*from w ww . j a v a 2s.c o m*/ fs.close(); } catch (IOException excp) { // ignore } } }
From source file:org.apache.ranger.services.hdfs.HDFSRangerTest.java
License:Apache License
@org.junit.Test public void readTest() throws Exception { FileSystem fileSystem = hdfsCluster.getFileSystem(); // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser" final Path file = new Path("/tmp/tmpdir/data-file2"); FSDataOutputStream out = fileSystem.create(file); for (int i = 0; i < 1024; ++i) { out.write(("data" + i + "\n").getBytes("UTF-8")); out.flush();// w ww .j av a2s . c o m } out.close(); // Change permissions to read-only fileSystem.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE)); // Now try to read the file as "bob" - this should be allowed (by the policy - user) UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Read the file FSDataInputStream in = fs.open(file); ByteArrayOutputStream output = new ByteArrayOutputStream(); IOUtils.copy(in, output); String content = new String(output.toByteArray()); Assert.assertTrue(content.startsWith("data0")); fs.close(); return null; } }); // Now try to read the file as "alice" - this should be allowed (by the policy - group) ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" }); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Read the file FSDataInputStream in = fs.open(file); ByteArrayOutputStream output = new ByteArrayOutputStream(); IOUtils.copy(in, output); String content = new String(output.toByteArray()); Assert.assertTrue(content.startsWith("data0")); fs.close(); return null; } }); // Now try to read the file as unknown user "eve" - this should not be allowed ugi = UserGroupInformation.createUserForTesting("eve", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Read the file try { fs.open(file); Assert.fail("Failure expected on an incorrect permission"); } catch (RemoteException ex) { // expected Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName())); } fs.close(); return null; } }); }
From source file:org.apache.ranger.services.hdfs.HDFSRangerTest.java
License:Apache License
@org.junit.Test public void writeTest() throws Exception { FileSystem fileSystem = hdfsCluster.getFileSystem(); // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser" final Path file = new Path("/tmp/tmpdir2/data-file3"); FSDataOutputStream out = fileSystem.create(file); for (int i = 0; i < 1024; ++i) { out.write(("data" + i + "\n").getBytes("UTF-8")); out.flush();/*from w w w .j a v a2 s . c om*/ } out.close(); // Now try to write to the file as "bob" - this should be allowed (by the policy - user) UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Write to the file fs.append(file); fs.close(); return null; } }); // Now try to write to the file as "alice" - this should be allowed (by the policy - group) ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" }); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Write to the file fs.append(file); fs.close(); return null; } }); // Now try to read the file as unknown user "eve" - this should not be allowed ugi = UserGroupInformation.createUserForTesting("eve", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Write to the file try { fs.append(file); Assert.fail("Failure expected on an incorrect permission"); } catch (RemoteException ex) { // expected Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName())); } fs.close(); return null; } }); }
From source file:org.apache.ranger.services.hdfs.HDFSRangerTest.java
License:Apache License
@org.junit.Test public void executeTest() throws Exception { FileSystem fileSystem = hdfsCluster.getFileSystem(); // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser" final Path file = new Path("/tmp/tmpdir3/data-file2"); FSDataOutputStream out = fileSystem.create(file); for (int i = 0; i < 1024; ++i) { out.write(("data" + i + "\n").getBytes("UTF-8")); out.flush();// w w w . ja v a 2 s .c o m } out.close(); // Change permissions to read-only fileSystem.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE)); // Change the parent directory permissions to be execute only for the owner Path parentDir = new Path("/tmp/tmpdir3"); fileSystem.setPermission(parentDir, new FsPermission(FsAction.EXECUTE, FsAction.NONE, FsAction.NONE)); // Try to read the directory as "bob" - this should be allowed (by the policy - user) UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); fs.close(); return null; } }); // Try to read the directory as "alice" - this should be allowed (by the policy - group) ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" }); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); fs.close(); return null; } }); // Now try to read the directory as unknown user "eve" - this should not be allowed ugi = UserGroupInformation.createUserForTesting("eve", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Write to the file try { RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); Assert.fail("Failure expected on an incorrect permission"); } catch (RemoteException ex) { // expected Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName())); } fs.close(); return null; } }); }
From source file:org.apache.reef.runtime.mesos.evaluator.REEFExecutor.java
License:Apache License
public final void onEvaluatorLaunch(final EvaluatorLaunch evaluatorLaunch) { LOG.log(Level.INFO, "Launch!!!! {0}", evaluatorLaunch.toString()); assert (evaluatorLaunch.getIdentifier().toString().equals(this.mesosExecutorId)); final ExecutorService evaluatorLaunchExecutorService = Executors.newSingleThreadExecutor(); evaluatorLaunchExecutorService.submit(new Thread() { public void run() { try { final List<String> command = Arrays.asList(evaluatorLaunch.getCommand().toString().split(" ")); LOG.log(Level.INFO, "Command!!!! {0}", command); final FileSystem fileSystem = FileSystem.get(new Configuration()); final Path hdfsFolder = new Path(fileSystem.getUri() + "/" + mesosExecutorId); final File localFolder = new File(fileNames.getREEFFolderName(), fileNames.getLocalFolderName()); FileUtil.copy(fileSystem, hdfsFolder, localFolder, true, new Configuration()); evaluatorProcess = new ProcessBuilder().command(command) .redirectError(new File(fileNames.getEvaluatorStderrFileName())) .redirectOutput(new File(fileNames.getEvaluatorStdoutFileName())).start(); evaluatorProcessExitValue = evaluatorProcess.waitFor(); fileSystem.close(); } catch (IOException | InterruptedException e) { throw new RuntimeException(e); }//from w ww. ja v a2 s .com } }); evaluatorLaunchExecutorService.shutdown(); }