Example usage for org.apache.hadoop.fs FileSystem close

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

Close this FileSystem instance.

Usage

From source file:org.apache.nutch.protocol.Content.java

License:Apache License

public static void main(String args[]) throws Exception {

    String usage = "Content (-local | -dfs <namenode:port>) recno batchId";

    if (args.length < 3) {
        System.out.println("usage:" + usage);
        return;/*  w  w  w .  j  av  a 2s  .  co m*/
    }

    GenericOptionsParser optParser = new GenericOptionsParser(NutchConfiguration.create(), args);
    String[] argv = optParser.getRemainingArgs();
    Configuration conf = optParser.getConfiguration();

    FileSystem fs = FileSystem.get(conf);
    try {
        int recno = Integer.parseInt(argv[0]);
        String batchId = argv[1];

        Path file = new Path(batchId, DIR_NAME);
        System.out.println("Reading from file: " + file);

        ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(), conf);

        Content content = new Content();
        contents.get(recno, content);
        System.out.println("Retrieved " + recno + " from file " + file);

        System.out.println(content);

        contents.close();
    } finally {
        fs.close();
    }
}

From source file:org.apache.nutch.tools.CommonCrawlDataDumper.java

License:Apache License

/**
 * Dumps the reverse engineered CBOR content from the provided segment
 * directories if a parent directory contains more than one segment,
 * otherwise a single segment can be passed as an argument. If the boolean
 * argument is provided then the CBOR is also zipped.
 *
 * @param outputDir      the directory you wish to dump the raw content to. This
 *                       directory will be created.
 * @param segmentRootDir a directory containing one or more segments.
 * @param linkdb         Path to linkdb.
 * @param gzip           a boolean flag indicating whether the CBOR content should also
 *                       be gzipped./* www .j  av  a  2s  .c  o  m*/
 * @param epochFilename  if {@code true}, output files will be names using the epoch time (in milliseconds).
 * @param extension      a file extension to use with output documents.
 * @throws Exception if any exception occurs.
 */
public void dump(File outputDir, File segmentRootDir, File linkdb, boolean gzip, String[] mimeTypes,
        boolean epochFilename, String extension, boolean warc) throws Exception {
    if (gzip) {
        LOG.info("Gzipping CBOR data has been skipped");
    }
    // total file counts
    Map<String, Integer> typeCounts = new HashMap<>();
    // filtered file counters
    Map<String, Integer> filteredCounts = new HashMap<>();

    Configuration nutchConfig = NutchConfiguration.create();
    Path segmentRootPath = new Path(segmentRootDir.toString());
    FileSystem fs = segmentRootPath.getFileSystem(nutchConfig);

    //get all paths
    List<Path> parts = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> files = fs.listFiles(segmentRootPath, true);
    String partPattern = ".*" + File.separator + Content.DIR_NAME + File.separator + "part-[0-9]{5}"
            + File.separator + "data";
    while (files.hasNext()) {
        LocatedFileStatus next = files.next();
        if (next.isFile()) {
            Path path = next.getPath();
            if (path.toString().matches(partPattern)) {
                parts.add(path);
            }
        }
    }

    LinkDbReader linkDbReader = null;
    if (linkdb != null) {
        linkDbReader = new LinkDbReader(nutchConfig, new Path(linkdb.toString()));
    }
    if (parts == null || parts.size() == 0) {
        LOG.error("No segment directories found in {} ", segmentRootDir.getAbsolutePath());
        System.exit(1);
    }
    LOG.info("Found {} segment parts", parts.size());
    if (gzip && !warc) {
        fileList = new ArrayList<>();
        constructNewStream(outputDir);
    }

    for (Path segmentPart : parts) {
        LOG.info("Processing segment Part : [ {} ]", segmentPart);
        try {
            SequenceFile.Reader reader = new SequenceFile.Reader(nutchConfig,
                    SequenceFile.Reader.file(segmentPart));

            Writable key = (Writable) reader.getKeyClass().getConstructor().newInstance();

            Content content = null;
            while (reader.next(key)) {
                content = new Content();
                reader.getCurrentValue(content);
                Metadata metadata = content.getMetadata();
                String url = key.toString();

                String baseName = FilenameUtils.getBaseName(url);
                String extensionName = FilenameUtils.getExtension(url);

                if (!extension.isEmpty()) {
                    extensionName = extension;
                } else if ((extensionName == null) || extensionName.isEmpty()) {
                    extensionName = "html";
                }

                String outputFullPath = null;
                String outputRelativePath = null;
                String filename = null;
                String timestamp = null;
                String reverseKey = null;

                if (epochFilename || config.getReverseKey()) {
                    try {
                        long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z")
                                .parse(getDate(metadata.get("Date"))).getTime();
                        timestamp = String.valueOf(epoch);
                    } catch (ParseException pe) {
                        LOG.warn(pe.getMessage());
                    }

                    reverseKey = reverseUrl(url);
                    config.setReverseKeyValue(
                            reverseKey.replace("/", "_") + "_" + DigestUtils.sha1Hex(url) + "_" + timestamp);
                }

                if (!warc) {
                    if (epochFilename) {
                        outputFullPath = DumpFileUtil.createFileNameFromUrl(outputDir.getAbsolutePath(),
                                reverseKey, url, timestamp, extensionName, !gzip);
                        outputRelativePath = outputFullPath.substring(0,
                                outputFullPath.lastIndexOf(File.separator) - 1);
                        filename = content.getMetadata().get(Metadata.DATE) + "." + extensionName;
                    } else {
                        String md5Ofurl = DumpFileUtil.getUrlMD5(url);
                        String fullDir = DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(),
                                md5Ofurl, !gzip);
                        filename = DumpFileUtil.createFileName(md5Ofurl, baseName, extensionName);
                        outputFullPath = String.format("%s/%s", fullDir, filename);

                        String[] fullPathLevels = fullDir.split(Pattern.quote(File.separator));
                        String firstLevelDirName = fullPathLevels[fullPathLevels.length - 2];
                        String secondLevelDirName = fullPathLevels[fullPathLevels.length - 1];
                        outputRelativePath = firstLevelDirName + secondLevelDirName;
                    }
                }
                // Encode all filetypes if no mimetypes have been given
                Boolean filter = (mimeTypes == null);

                String jsonData = "";
                try {
                    String mimeType = new Tika().detect(content.getContent());
                    // Maps file to JSON-based structure

                    Set<String> inUrls = null; //there may be duplicates, so using set
                    if (linkDbReader != null) {
                        Inlinks inlinks = linkDbReader.getInlinks((Text) key);
                        if (inlinks != null) {
                            Iterator<Inlink> iterator = inlinks.iterator();
                            inUrls = new LinkedHashSet<>();
                            while (inUrls.size() <= MAX_INLINKS && iterator.hasNext()) {
                                inUrls.add(iterator.next().getFromUrl());
                            }
                        }
                    }
                    //TODO: Make this Jackson Format implementation reusable
                    try (CommonCrawlFormat format = CommonCrawlFormatFactory
                            .getCommonCrawlFormat(warc ? "WARC" : "JACKSON", nutchConfig, config)) {
                        if (inUrls != null) {
                            format.setInLinks(new ArrayList<>(inUrls));
                        }
                        jsonData = format.getJsonData(url, content, metadata);
                    }

                    collectStats(typeCounts, mimeType);
                    // collects statistics for the given mimetypes
                    if ((mimeType != null) && (mimeTypes != null)
                            && Arrays.asList(mimeTypes).contains(mimeType)) {
                        collectStats(filteredCounts, mimeType);
                        filter = true;
                    }
                } catch (IOException ioe) {
                    LOG.error("Fatal error in creating JSON data: " + ioe.getMessage());
                    return;
                }

                if (!warc) {
                    if (filter) {
                        byte[] byteData = serializeCBORData(jsonData);

                        if (!gzip) {
                            File outputFile = new File(outputFullPath);
                            if (outputFile.exists()) {
                                LOG.info("Skipping writing: [" + outputFullPath + "]: file already exists");
                            } else {
                                LOG.info("Writing: [" + outputFullPath + "]");
                                IOUtils.copy(new ByteArrayInputStream(byteData),
                                        new FileOutputStream(outputFile));
                            }
                        } else {
                            if (fileList.contains(outputFullPath)) {
                                LOG.info("Skipping compressing: [" + outputFullPath + "]: file already exists");
                            } else {
                                fileList.add(outputFullPath);
                                LOG.info("Compressing: [" + outputFullPath + "]");
                                //TarArchiveEntry tarEntry = new TarArchiveEntry(firstLevelDirName + File.separator + secondLevelDirName + File.separator + filename);
                                TarArchiveEntry tarEntry = new TarArchiveEntry(
                                        outputRelativePath + File.separator + filename);
                                tarEntry.setSize(byteData.length);
                                tarOutput.putArchiveEntry(tarEntry);
                                tarOutput.write(byteData);
                                tarOutput.closeArchiveEntry();
                            }
                        }
                    }
                }
            }
            reader.close();
        } catch (Exception e) {
            LOG.warn("SKIPPED: {} Because : {}", segmentPart, e.getMessage());
        } finally {
            fs.close();
        }
    }

    if (gzip && !warc) {
        closeStream();
    }

    if (!typeCounts.isEmpty()) {
        LOG.info("CommonsCrawlDataDumper File Stats: "
                + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts));
    }

}

From source file:org.apache.nutch.tools.DmozParser.java

License:Apache License

/**
 * Command-line access. User may add URLs via a flat text file or the
 * structured DMOZ file. By default, we ignore Adult material (as categorized
 * by DMOZ)./*from   w w  w.j a v a 2 s .co  m*/
 */
public static void main(String argv[]) throws Exception {
    if (argv.length < 1) {
        System.err.println(
                "Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-snippet] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
        return;
    }

    //
    // Parse the command line, figure out what kind of
    // URL file we need to load
    //
    int subsetDenom = 1;
    int skew = 0;
    String dmozFile = argv[0];
    boolean includeAdult = false;
    boolean snippet = false;
    Pattern topicPattern = null;
    Vector<String> topics = new Vector<String>();

    Configuration conf = NutchConfiguration.create();
    store = StorageUtils.createWebStore(conf, String.class, WebPage.class);
    FileSystem fs = FileSystem.get(conf);
    try {
        for (int i = 1; i < argv.length; i++) {
            if ("-includeAdultMaterial".equals(argv[i])) {
                includeAdult = true;
            } else if ("-subset".equals(argv[i])) {
                subsetDenom = Integer.parseInt(argv[i + 1]);
                i++;
            } else if ("-topic".equals(argv[i])) {
                topics.addElement(argv[i + 1]);
                i++;
            } else if ("-topicFile".equals(argv[i])) {
                addTopicsFromFile(argv[i + 1], topics);
                i++;
            } else if ("-skew".equals(argv[i])) {
                skew = Integer.parseInt(argv[i + 1]);
                i++;
            } else if ("-snippet".equals(argv[i])) {
                snippet = true;
            }
        }

        DmozParser parser = new DmozParser();

        if (!topics.isEmpty()) {
            String regExp = new String("^(");
            int j = 0;
            for (; j < topics.size() - 1; ++j) {
                regExp = regExp.concat(topics.get(j));
                regExp = regExp.concat("|");
            }
            regExp = regExp.concat(topics.get(j));
            regExp = regExp.concat(").*");
            LOG.info("Topic selection pattern = " + regExp);
            topicPattern = Pattern.compile(regExp);
        }

        parser.parseDmozFile(new File(dmozFile), subsetDenom, includeAdult, skew, topicPattern, snippet);

    } finally {
        fs.close();
    }
}

From source file:org.apache.oozie.command.XLogPurgeXCommand.java

License:Apache License

@Override
protected Void execute() throws CommandException {
    XLogCopyService xls = Services.get().get(XLogCopyService.class);
    String hdfsDir = xls.getConfHdfsLogDir();

    XLog.Info.get().clear();//from w w w .j a v  a  2s.  com
    XLog log = XLog.getLog(getClass());
    HadoopAccessorService has = Services.get().get(HadoopAccessorService.class);
    URI uri = new Path(hdfsDir).toUri();
    Configuration fsConf = has.createJobConf(uri.getAuthority());
    FileSystem fs = null;

    try {
        fs = has.createFileSystem(System.getProperty("user.name"), uri, fsConf);
    } catch (Exception ex) {
        log.error("user has to be specified to access hdfs",
                new HadoopAccessorException(ErrorCode.E0902, "user has to be specified to access FileSystem"));
    }

    deleteJobLogs(fs, hdfsDir, wfList);
    deleteJobLogs(fs, hdfsDir, coordList);
    deleteJobLogs(fs, hdfsDir, bundleList);

    try {
        fs.close();
    } catch (IOException ex) {
        LOG.error("cannot close filesystem");
    }
    return null;
}

From source file:org.apache.pirk.test.distributed.DistributedTestDriver.java

License:Apache License

/**
 * Delete all necessary inputs, clean up
 *///from  w w w.ja  v a 2s. co m
public static void cleanup(FileSystem fs, String dataSchemasProp, String querySchemasProp, String stopListProp)
        throws Exception {
    Inputs.deleteESInput();
    fs.close();

    SystemConfiguration.setProperty("pir.stopListFile", stopListProp);

    // Force the query and data schemas to load their original values
    if (!dataSchemasProp.equals("none")) {
        DataSchemaLoader.initialize();
    }

    if (!querySchemasProp.equals("none")) {
        QuerySchemaLoader.initialize();
    }
}

From source file:org.apache.ranger.plugin.store.file.BaseFileStore.java

License:Apache License

protected void close(FileSystem fs) {
    if (fs != null) {
        try {/*from w ww . j  a  v a 2s.c  o m*/
            fs.close();
        } catch (IOException excp) {
            // ignore
        }
    }
}

From source file:org.apache.ranger.services.hdfs.HDFSRangerTest.java

License:Apache License

@org.junit.Test
public void readTest() throws Exception {
    FileSystem fileSystem = hdfsCluster.getFileSystem();

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir/data-file2");
    FSDataOutputStream out = fileSystem.create(file);
    for (int i = 0; i < 1024; ++i) {
        out.write(("data" + i + "\n").getBytes("UTF-8"));
        out.flush();//  w  ww  .j  av  a2s  .  c  o m
    }
    out.close();

    // Change permissions to read-only
    fileSystem.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE));

    // Now try to read the file as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Read the file
            FSDataInputStream in = fs.open(file);
            ByteArrayOutputStream output = new ByteArrayOutputStream();
            IOUtils.copy(in, output);
            String content = new String(output.toByteArray());
            Assert.assertTrue(content.startsWith("data0"));

            fs.close();
            return null;
        }
    });

    // Now try to read the file as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" });
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Read the file
            FSDataInputStream in = fs.open(file);
            ByteArrayOutputStream output = new ByteArrayOutputStream();
            IOUtils.copy(in, output);
            String content = new String(output.toByteArray());
            Assert.assertTrue(content.startsWith("data0"));

            fs.close();
            return null;
        }
    });

    // Now try to read the file as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Read the file
            try {
                fs.open(file);
                Assert.fail("Failure expected on an incorrect permission");
            } catch (RemoteException ex) {
                // expected
                Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName()));
            }

            fs.close();
            return null;
        }
    });
}

From source file:org.apache.ranger.services.hdfs.HDFSRangerTest.java

License:Apache License

@org.junit.Test
public void writeTest() throws Exception {

    FileSystem fileSystem = hdfsCluster.getFileSystem();

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir2/data-file3");
    FSDataOutputStream out = fileSystem.create(file);
    for (int i = 0; i < 1024; ++i) {
        out.write(("data" + i + "\n").getBytes("UTF-8"));
        out.flush();/*from w w  w  .j  a  v a2 s .  c  om*/
    }
    out.close();

    // Now try to write to the file as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Write to the file
            fs.append(file);

            fs.close();
            return null;
        }
    });

    // Now try to write to the file as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" });
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Write to the file
            fs.append(file);

            fs.close();
            return null;
        }
    });

    // Now try to read the file as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Write to the file
            try {
                fs.append(file);
                Assert.fail("Failure expected on an incorrect permission");
            } catch (RemoteException ex) {
                // expected
                Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName()));
            }

            fs.close();
            return null;
        }
    });
}

From source file:org.apache.ranger.services.hdfs.HDFSRangerTest.java

License:Apache License

@org.junit.Test
public void executeTest() throws Exception {
    FileSystem fileSystem = hdfsCluster.getFileSystem();

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir3/data-file2");
    FSDataOutputStream out = fileSystem.create(file);
    for (int i = 0; i < 1024; ++i) {
        out.write(("data" + i + "\n").getBytes("UTF-8"));
        out.flush();// w w  w  . ja v a  2 s .c  o  m
    }
    out.close();

    // Change permissions to read-only
    fileSystem.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE));

    // Change the parent directory permissions to be execute only for the owner
    Path parentDir = new Path("/tmp/tmpdir3");
    fileSystem.setPermission(parentDir, new FsPermission(FsAction.EXECUTE, FsAction.NONE, FsAction.NONE));

    // Try to read the directory as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
            Assert.assertTrue(iter.hasNext());

            fs.close();
            return null;
        }
    });

    // Try to read the directory as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" });
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
            Assert.assertTrue(iter.hasNext());

            fs.close();
            return null;
        }
    });

    // Now try to read the directory as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[] {});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", defaultFs);

            FileSystem fs = FileSystem.get(conf);

            // Write to the file
            try {
                RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
                Assert.assertTrue(iter.hasNext());
                Assert.fail("Failure expected on an incorrect permission");
            } catch (RemoteException ex) {
                // expected
                Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName()));
            }

            fs.close();
            return null;
        }
    });

}

From source file:org.apache.reef.runtime.mesos.evaluator.REEFExecutor.java

License:Apache License

public final void onEvaluatorLaunch(final EvaluatorLaunch evaluatorLaunch) {
    LOG.log(Level.INFO, "Launch!!!! {0}", evaluatorLaunch.toString());
    assert (evaluatorLaunch.getIdentifier().toString().equals(this.mesosExecutorId));
    final ExecutorService evaluatorLaunchExecutorService = Executors.newSingleThreadExecutor();
    evaluatorLaunchExecutorService.submit(new Thread() {
        public void run() {
            try {
                final List<String> command = Arrays.asList(evaluatorLaunch.getCommand().toString().split(" "));
                LOG.log(Level.INFO, "Command!!!! {0}", command);
                final FileSystem fileSystem = FileSystem.get(new Configuration());
                final Path hdfsFolder = new Path(fileSystem.getUri() + "/" + mesosExecutorId);
                final File localFolder = new File(fileNames.getREEFFolderName(),
                        fileNames.getLocalFolderName());

                FileUtil.copy(fileSystem, hdfsFolder, localFolder, true, new Configuration());

                evaluatorProcess = new ProcessBuilder().command(command)
                        .redirectError(new File(fileNames.getEvaluatorStderrFileName()))
                        .redirectOutput(new File(fileNames.getEvaluatorStdoutFileName())).start();

                evaluatorProcessExitValue = evaluatorProcess.waitFor();

                fileSystem.close();
            } catch (IOException | InterruptedException e) {
                throw new RuntimeException(e);
            }//from  w  ww. ja  v  a2  s  .com
        }
    });
    evaluatorLaunchExecutorService.shutdown();
}