Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:org.apache.nutch.admin.management.FileUtil.java

License:Apache License

public static long size(Path folder, Configuration configuration) throws IOException {

    FileSystem fileSystem = FileSystem.get(configuration);
    // Path[] files = fileSystem.listPaths(folder);
    FileStatus[] filestatuses = fileSystem.listStatus(folder);
    int len = filestatuses.length;
    Path[] files = new Path[len];
    for (int i = 0; i < len; i++) {
        files[i] = filestatuses[i].getPath();
    }//from  ww  w .ja  va2s .com

    long size = 0;
    for (int i = 0; files != null && i < files.length; i++) {
        Path file = files[i];
        if (fileSystem.isDirectory(file)) {
            size = size + size(file, configuration);
        }
        size = size + fileSystem.getLength(file);
    }
    return size + fileSystem.getLength(folder);
}

From source file:org.apache.nutch.admin.management.FileUtil.java

License:Apache License

/**
 * @return true if parse.done exists/*  ww w .  j ava2 s.  c om*/
 */
public static boolean isIndexed(Path segment, Configuration configuration) throws IOException {

    FileSystem system = FileSystem.get(configuration);
    // Path[] files = system.listPaths(new Path(segment, "index"));
    FileStatus[] filestatuses = system.listStatus(new Path(segment, "index"));
    int len = filestatuses.length;
    Path[] files = new Path[len];
    for (int i = 0; i < len; i++) {
        files[i] = filestatuses[i].getPath();
    }

    boolean ret = false;
    for (int i = 0; i < files.length; i++) {
        //e.g. file = part-00000
        Path file = files[i];
        if (system.isDirectory(file) && file.getName().startsWith("part-")) {
            ret = exists(configuration, file, "index.done");
            if (!ret) {
                break;
            }
        }
    }
    return ret;
}

From source file:org.apache.nutch.indexer.FsDirectory.java

License:Apache License

public FsDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
        create();/*from  w  w w .  ja va  2  s.c  o  m*/
    }

    if (!fs.isDirectory(directory))
        throw new IOException(directory + " not a directory");
}

From source file:org.apache.oozie.action.hadoop.TestHive2ActionExecutor.java

License:Apache License

@SuppressWarnings("deprecation")
public void testHive2Action() throws Exception {
    setupHiveServer2();/*w w w . j  av  a  2 s . c o m*/
    Path inputDir = new Path(getFsTestCaseDir(), INPUT_DIRNAME);
    Path outputDir = new Path(getFsTestCaseDir(), OUTPUT_DIRNAME);
    FileSystem fs = getFileSystem();

    {
        String query = getHive2Script(inputDir.toString(), outputDir.toString());
        Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME)));
        dataWriter.write(SAMPLE_DATA_TEXT);
        dataWriter.close();
        Context context = createContext(getQueryActionXml(query));
        final RunningJob launcherJob = submitAction(context,
                Namespace.getNamespace("uri:oozie:hive2-action:0.2"));
        String launcherId = context.getAction().getExternalId();
        waitFor(200 * 1000, new Predicate() {
            @Override
            public boolean evaluate() throws Exception {
                return launcherJob.isComplete();
            }
        });
        assertTrue(launcherJob.isSuccessful());
        Configuration conf = new XConfiguration();
        conf.set("user.name", getTestUser());
        Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(),
                context.getActionDir(), conf);
        assertFalse(LauncherMapperHelper.hasIdSwap(actionData));
        Hive2ActionExecutor ae = new Hive2ActionExecutor();
        ae.check(context, context.getAction());
        assertTrue(launcherId.equals(context.getAction().getExternalId()));
        assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
        ae.end(context, context.getAction());
        assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());
        assertNotNull(context.getAction().getData());
        Properties outputData = new Properties();
        outputData.load(new StringReader(context.getAction().getData()));
        assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
        assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs());
        assertTrue(fs.exists(outputDir));
        assertTrue(fs.isDirectory(outputDir));
    }
    {
        Path script = new Path(getAppPath(), HIVE_SCRIPT_FILENAME);
        Writer scriptWriter = new OutputStreamWriter(fs.create(script));
        scriptWriter.write(getHive2Script(inputDir.toString(), outputDir.toString()));
        scriptWriter.close();

        Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME)));
        dataWriter.write(SAMPLE_DATA_TEXT);
        dataWriter.close();
        Context context = createContext(getScriptActionXml());
        final RunningJob launcherJob = submitAction(context,
                Namespace.getNamespace("uri:oozie:hive2-action:0.1"));
        String launcherId = context.getAction().getExternalId();
        waitFor(200 * 1000, new Predicate() {
            @Override
            public boolean evaluate() throws Exception {
                return launcherJob.isComplete();
            }
        });
        assertTrue(launcherJob.isSuccessful());
        Configuration conf = new XConfiguration();
        conf.set("user.name", getTestUser());
        Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(),
                context.getActionDir(), conf);
        assertFalse(LauncherMapperHelper.hasIdSwap(actionData));
        Hive2ActionExecutor ae = new Hive2ActionExecutor();
        ae.check(context, context.getAction());
        assertTrue(launcherId.equals(context.getAction().getExternalId()));
        assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
        ae.end(context, context.getAction());
        assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());
        assertNotNull(context.getAction().getData());
        Properties outputData = new Properties();
        outputData.load(new StringReader(context.getAction().getData()));
        assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
        assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs());
        assertTrue(fs.exists(outputDir));
        assertTrue(fs.isDirectory(outputDir));
    }
}

From source file:org.apache.oozie.action.hadoop.TestHiveActionExecutor.java

License:Apache License

public void testHiveAction() throws Exception {
    Path inputDir = new Path(getFsTestCaseDir(), INPUT_DIRNAME);
    Path outputDir = new Path(getFsTestCaseDir(), OUTPUT_DIRNAME);
    String hiveScript = getHiveScript(inputDir.toString(), outputDir.toString());
    FileSystem fs = getFileSystem();

    {/*from  ww  w . j a v  a 2 s .co  m*/
        Path script = new Path(getAppPath(), HIVE_SCRIPT_FILENAME);
        Writer scriptWriter = new OutputStreamWriter(fs.create(script));
        scriptWriter.write(hiveScript);
        scriptWriter.close();
        Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME)));
        dataWriter.write(SAMPLE_DATA_TEXT);
        dataWriter.close();
        Context context = createContext(getActionScriptXml());
        Namespace ns = Namespace.getNamespace("uri:oozie:hive-action:0.2");
        final RunningJob launcherJob = submitAction(context, ns);
        String launcherId = context.getAction().getExternalId();
        waitFor(200 * 1000, new Predicate() {
            public boolean evaluate() throws Exception {
                return launcherJob.isComplete();
            }
        });
        assertTrue(launcherJob.isSuccessful());
        Configuration conf = new XConfiguration();
        conf.set("user.name", getTestUser());
        Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(),
                context.getActionDir(), conf);
        assertFalse(LauncherMapperHelper.hasIdSwap(actionData));
        HiveActionExecutor ae = new HiveActionExecutor();
        ae.check(context, context.getAction());
        assertTrue(launcherId.equals(context.getAction().getExternalId()));
        assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
        assertNotNull(context.getAction().getData());
        ae.end(context, context.getAction());
        assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());
        assertNotNull(context.getAction().getData());
        Properties outputData = new Properties();
        outputData.load(new StringReader(context.getAction().getData()));
        assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
        assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs());
        //while this works in a real cluster, it does not with miniMR
        //assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0);
        //assertTrue(!actionData.get(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS).isEmpty());
        assertTrue(fs.exists(outputDir));
        assertTrue(fs.isDirectory(outputDir));
    }
    {
        Context context = createContext(getActionQueryXml(hiveScript));
        Namespace ns = Namespace.getNamespace("uri:oozie:hive-action:0.6");
        final RunningJob launcherJob = submitAction(context, ns);
        String launcherId = context.getAction().getExternalId();
        waitFor(200 * 1000, new Predicate() {
            public boolean evaluate() throws Exception {
                return launcherJob.isComplete();
            }
        });
        assertTrue(launcherJob.isSuccessful());
        Configuration conf = new XConfiguration();
        conf.set("user.name", getTestUser());
        Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(),
                context.getActionDir(), conf);
        assertFalse(LauncherMapperHelper.hasIdSwap(actionData));
        HiveActionExecutor ae = new HiveActionExecutor();
        ae.check(context, context.getAction());
        assertTrue(launcherId.equals(context.getAction().getExternalId()));
        assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
        assertNotNull(context.getAction().getData());
        ae.end(context, context.getAction());
        assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());
        assertNotNull(context.getAction().getData());
        Properties outputData = new Properties();
        outputData.load(new StringReader(context.getAction().getData()));
        assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
        assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs());
        //while this works in a real cluster, it does not with miniMR
        //assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0);
        //assertTrue(!actionData.get(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS).isEmpty());
        assertTrue(fs.exists(outputDir));
        assertTrue(fs.isDirectory(outputDir));
    }
}

From source file:org.apache.parquet.tools.command.ShowSchemaCommand.java

License:Apache License

@Override
public void execute(CommandLine options) throws Exception {
    super.execute(options);

    String[] args = options.getArgs();
    String input = args[0];/*w  w w.  j a  v a 2 s .co  m*/

    Configuration conf = new Configuration();
    ParquetMetadata metaData;

    Path path = new Path(input);
    FileSystem fs = path.getFileSystem(conf);
    Path file;
    if (fs.isDirectory(path)) {
        FileStatus[] statuses = fs.listStatus(path, HiddenFileFilter.INSTANCE);
        if (statuses.length == 0) {
            throw new RuntimeException("Directory " + path.toString() + " is empty");
        }
        file = statuses[0].getPath();
    } else {
        file = path;
    }
    metaData = ParquetFileReader.readFooter(conf, file, NO_FILTER);
    MessageType schema = metaData.getFileMetaData().getSchema();

    Main.out.println(schema);
    if (options.hasOption('d')) {
        PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter().build();
        MetadataUtils.showDetails(out, metaData);
    }
}

From source file:org.apache.ranger.plugin.store.file.BaseFileStore.java

License:Apache License

protected <T> List<T> loadFromDir(Path dirPath, final String filePrefix, Class<T> cls) throws Exception {
    if (LOG.isDebugEnabled()) {
        LOG.debug("==> BaseFileStore.loadFromDir()");
    }/*from   ww  w.  j  a  va2s  .  com*/

    List<T> ret = new ArrayList<T>();

    try {
        FileSystem fileSystem = getFileSystem(dirPath);

        if (fileSystem.exists(dirPath) && fileSystem.isDirectory(dirPath)) {
            PathFilter filter = new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    return path.getName().startsWith(filePrefix) && path.getName().endsWith(FILE_SUFFIX_JSON);
                }
            };

            FileStatus[] sdFiles = fileSystem.listStatus(dirPath, filter);

            if (sdFiles != null) {
                for (FileStatus sdFile : sdFiles) {
                    T obj = loadFromFile(sdFile.getPath(), cls);

                    if (obj != null) {
                        ret.add(obj);
                    }
                }
            }
        } else {
            LOG.error(dirPath + ": does not exists or not a directory");
        }
    } catch (IOException excp) {
        LOG.warn("error loading service-def in directory " + dirPath, excp);
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("<== BaseFileStore.loadFromDir(): count=" + (ret == null ? 0 : ret.size()));
    }

    return ret;
}

From source file:org.apache.ranger.plugin.store.file.FileStoreUtil.java

License:Apache License

public <T> List<T> loadFromDir(Path dirPath, final String filePrefix, Class<T> cls) throws Exception {
    if (LOG.isDebugEnabled()) {
        LOG.debug("==> FileStoreUtil.loadFromDir()");
    }//from ww w .  j  a v a  2  s .c  o m

    List<T> ret = new ArrayList<T>();

    try {
        FileSystem fileSystem = getFileSystem(dirPath);

        if (fileSystem.exists(dirPath) && fileSystem.isDirectory(dirPath)) {
            PathFilter filter = new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    return path.getName().startsWith(filePrefix) && path.getName().endsWith(FILE_SUFFIX_JSON);
                }
            };

            FileStatus[] sdFiles = fileSystem.listStatus(dirPath, filter);

            if (sdFiles != null) {
                for (FileStatus sdFile : sdFiles) {
                    T obj = loadFromFile(sdFile.getPath(), cls);

                    if (obj != null) {
                        ret.add(obj);
                    }
                }
            }
        } else {
            LOG.error(dirPath + ": does not exists or not a directory");
        }
    } catch (IOException excp) {
        LOG.warn("error loading service-def in directory " + dirPath, excp);
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("<== FileStoreUtil.loadFromDir(): count=" + (ret == null ? 0 : ret.size()));
    }

    return ret;
}

From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java

License:Apache License

/**
 * Set up the MapReduce job to use file inputs from previous iterations.
 * @param   fileMapper  Mapper class for generated triples
 * @param   incMapper   Mapper class for generated inconsistenies
 * @param   filter      Exclude facts that aren't helpful for inference
 *//*  w  ww  .ja va 2 s .c o m*/
protected void configureFileInput(Class<? extends Mapper<Fact, NullWritable, ?, ?>> fileMapper,
        Class<? extends Mapper<Derivation, NullWritable, ?, ?>> incMapper, final boolean filter)
        throws IOException {
    // Set up file input for all iterations up to this one
    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Path inputPath;
    int iteration = MRReasoningUtils.getCurrentIteration(conf);
    // Set min/max split, if not already provided:
    long blocksize = Long.parseLong(conf.get("dfs.blocksize"));
    String minSplitProp = "mapreduce.input.fileinputformat.split.minsize";
    String maxSplitProp = "mapreduce.input.fileinputformat.split.maxsize";
    conf.set(minSplitProp, conf.get(minSplitProp, String.valueOf(blocksize)));
    conf.set(maxSplitProp, conf.get(maxSplitProp, String.valueOf(blocksize * 8)));
    for (int i = 1; i <= iteration; i++) {
        // Prefer cleaned output...
        inputPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + i);
        // But if there isn't any, try intermediate data:
        if (!fs.isDirectory(inputPath)) {
            inputPath = MRReasoningUtils.getOutputPath(conf,
                    MRReasoningUtils.OUTPUT_BASE + i + MRReasoningUtils.TEMP_SUFFIX);
        }
        // And only proceed if we found one or the other.
        if (fs.isDirectory(inputPath)) {
            // Never include debug output. If filter is true, select only
            // intermediate and schema data, otherwise include everything.
            PathFilter f = new PathFilter() {
                public boolean accept(Path path) {
                    String s = path.getName();
                    if (s.startsWith(MRReasoningUtils.DEBUG_OUT)) {
                        return false;
                    } else {
                        return !filter || s.startsWith(MRReasoningUtils.INTERMEDIATE_OUT)
                                || s.startsWith(MRReasoningUtils.SCHEMA_OUT);
                    }
                }
            };
            for (FileStatus status : fs.listStatus(inputPath, f)) {
                if (status.getLen() > 0) {
                    Path p = status.getPath();
                    String s = p.getName();
                    if (s.startsWith(MRReasoningUtils.INCONSISTENT_OUT)) {
                        if (incMapper != null) {
                            MultipleInputs.addInputPath(job, p, CombineSequenceFileInputFormat.class,
                                    incMapper);
                        }
                    } else {
                        MultipleInputs.addInputPath(job, status.getPath(), CombineSequenceFileInputFormat.class,
                                fileMapper);
                    }
                }
            }
        }
    }
}

From source file:org.apache.rya.reasoning.mr.ConformanceTest.java

License:Apache License

/**
 * Verify that we can infer the correct triples or detect an inconsistency.
 * @param   conf    Specifies working directory, etc.
 * @param   OwlTest   Contains premise/conclusion graphs, will store result
 * @return  Return value of the MapReduce job
 *///from ww w.  ja  v a 2  s .  c om
int runTest(final Configuration conf, final String[] args, final OwlTest test) throws Exception {
    conf.setInt(MRReasoningUtils.STEP_PROP, 0);
    conf.setInt(MRReasoningUtils.SCHEMA_UPDATE_PROP, 0);
    conf.setBoolean(MRReasoningUtils.DEBUG_FLAG, true);
    conf.setBoolean(MRReasoningUtils.OUTPUT_FLAG, true);
    // Connect to MiniAccumulo and load the test
    final Repository repo = MRReasoningUtils.getRepository(conf);
    repo.initialize();
    final RepositoryConnection conn = repo.getConnection();
    conn.clear();
    conn.add(new StringReader(test.premise), "", RDFFormat.RDFXML);
    conn.close();
    repo.shutDown();
    // Run the reasoner
    final ReasoningDriver reasoner = new ReasoningDriver();
    final int result = ToolRunner.run(conf, reasoner, args);
    test.success = (result == 0);
    // Inconsistency test: successful if determined inconsistent
    if (test.types.contains(TEST_INCONSISTENCY)) {
        test.success = test.success && reasoner.hasInconsistencies();
    }
    // Consistency test: successful if determined consistent
    if (test.types.contains(TEST_CONSISTENCY)) {
        test.success = test.success && !reasoner.hasInconsistencies();
    }
    // Other types: we'll need to look at the inferred triples/schema
    if (test.types.contains(TEST_NONENTAILMENT) || test.types.contains(TEST_ENTAILMENT)) {
        System.out.println("Reading inferred triples...");
        // Read in the inferred triples from HDFS:
        final Schema schema = MRReasoningUtils.loadSchema(conf);
        final FileSystem fs = FileSystem.get(conf);
        final Path path = MRReasoningUtils.getOutputPath(conf, "final");
        final OutputCollector inferred = new OutputCollector();
        final NTriplesParser parser = new NTriplesParser();
        parser.setRDFHandler(inferred);
        if (fs.isDirectory(path)) {
            for (final FileStatus status : fs.listStatus(path)) {
                final String s = status.getPath().getName();
                if (s.startsWith(MRReasoningUtils.INCONSISTENT_OUT)
                        || s.startsWith(MRReasoningUtils.DEBUG_OUT)) {
                    continue;
                }
                final BufferedReader br = new BufferedReader(
                        new InputStreamReader(fs.open(status.getPath()), StandardCharsets.UTF_8));
                parser.parse(br, "");
                br.close();
            }
        }
        MRReasoningUtils.deleteIfExists(conf, "final");
        test.inferred.addAll(inferred.triples);
        // Entailment test: successful if expected triples were inferred
        if (test.types.contains(TEST_ENTAILMENT)) {
            // Check expected inferences against the inferred triples and
            // the schema reasoner
            for (final Statement st : test.expected) {
                final Fact fact = new Fact(st);
                if (!test.inferred.contains(st) && !triviallyTrue(fact.getTriple(), schema)
                        && !schema.containsTriple(fact.getTriple())) {
                    test.error.add(st);
                }
            }
        }
        // Non-entailment test: failure if non-expected triples inferred
        if (test.types.contains(TEST_NONENTAILMENT)) {
            for (final Statement st : test.unexpected) {
                final Fact fact = new Fact(st);
                if (test.inferred.contains(st) || schema.containsTriple(fact.getTriple())) {
                    test.error.add(st);
                }
            }
        }
        test.success = test.success && test.error.isEmpty();
    }
    conf.setBoolean(MRReasoningUtils.DEBUG_FLAG, false);
    MRReasoningUtils.clean(conf);
    return result;
}