List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:org.apache.nutch.admin.management.FileUtil.java
License:Apache License
public static long size(Path folder, Configuration configuration) throws IOException { FileSystem fileSystem = FileSystem.get(configuration); // Path[] files = fileSystem.listPaths(folder); FileStatus[] filestatuses = fileSystem.listStatus(folder); int len = filestatuses.length; Path[] files = new Path[len]; for (int i = 0; i < len; i++) { files[i] = filestatuses[i].getPath(); }//from ww w .ja va2s .com long size = 0; for (int i = 0; files != null && i < files.length; i++) { Path file = files[i]; if (fileSystem.isDirectory(file)) { size = size + size(file, configuration); } size = size + fileSystem.getLength(file); } return size + fileSystem.getLength(folder); }
From source file:org.apache.nutch.admin.management.FileUtil.java
License:Apache License
/** * @return true if parse.done exists/* ww w . j ava2 s. c om*/ */ public static boolean isIndexed(Path segment, Configuration configuration) throws IOException { FileSystem system = FileSystem.get(configuration); // Path[] files = system.listPaths(new Path(segment, "index")); FileStatus[] filestatuses = system.listStatus(new Path(segment, "index")); int len = filestatuses.length; Path[] files = new Path[len]; for (int i = 0; i < len; i++) { files[i] = filestatuses[i].getPath(); } boolean ret = false; for (int i = 0; i < files.length; i++) { //e.g. file = part-00000 Path file = files[i]; if (system.isDirectory(file) && file.getName().startsWith("part-")) { ret = exists(configuration, file, "index.done"); if (!ret) { break; } } } return ret; }
From source file:org.apache.nutch.indexer.FsDirectory.java
License:Apache License
public FsDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException { this.fs = fs; this.directory = directory; this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096); if (create) { create();/*from w w w . ja va 2 s.c o m*/ } if (!fs.isDirectory(directory)) throw new IOException(directory + " not a directory"); }
From source file:org.apache.oozie.action.hadoop.TestHive2ActionExecutor.java
License:Apache License
@SuppressWarnings("deprecation") public void testHive2Action() throws Exception { setupHiveServer2();/*w w w . j av a 2 s . c o m*/ Path inputDir = new Path(getFsTestCaseDir(), INPUT_DIRNAME); Path outputDir = new Path(getFsTestCaseDir(), OUTPUT_DIRNAME); FileSystem fs = getFileSystem(); { String query = getHive2Script(inputDir.toString(), outputDir.toString()); Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME))); dataWriter.write(SAMPLE_DATA_TEXT); dataWriter.close(); Context context = createContext(getQueryActionXml(query)); final RunningJob launcherJob = submitAction(context, Namespace.getNamespace("uri:oozie:hive2-action:0.2")); String launcherId = context.getAction().getExternalId(); waitFor(200 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Configuration conf = new XConfiguration(); conf.set("user.name", getTestUser()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); Hive2ActionExecutor ae = new Hive2ActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs()); assertTrue(fs.exists(outputDir)); assertTrue(fs.isDirectory(outputDir)); } { Path script = new Path(getAppPath(), HIVE_SCRIPT_FILENAME); Writer scriptWriter = new OutputStreamWriter(fs.create(script)); scriptWriter.write(getHive2Script(inputDir.toString(), outputDir.toString())); scriptWriter.close(); Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME))); dataWriter.write(SAMPLE_DATA_TEXT); dataWriter.close(); Context context = createContext(getScriptActionXml()); final RunningJob launcherJob = submitAction(context, Namespace.getNamespace("uri:oozie:hive2-action:0.1")); String launcherId = context.getAction().getExternalId(); waitFor(200 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Configuration conf = new XConfiguration(); conf.set("user.name", getTestUser()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); Hive2ActionExecutor ae = new Hive2ActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs()); assertTrue(fs.exists(outputDir)); assertTrue(fs.isDirectory(outputDir)); } }
From source file:org.apache.oozie.action.hadoop.TestHiveActionExecutor.java
License:Apache License
public void testHiveAction() throws Exception { Path inputDir = new Path(getFsTestCaseDir(), INPUT_DIRNAME); Path outputDir = new Path(getFsTestCaseDir(), OUTPUT_DIRNAME); String hiveScript = getHiveScript(inputDir.toString(), outputDir.toString()); FileSystem fs = getFileSystem(); {/*from ww w . j a v a 2 s .co m*/ Path script = new Path(getAppPath(), HIVE_SCRIPT_FILENAME); Writer scriptWriter = new OutputStreamWriter(fs.create(script)); scriptWriter.write(hiveScript); scriptWriter.close(); Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME))); dataWriter.write(SAMPLE_DATA_TEXT); dataWriter.close(); Context context = createContext(getActionScriptXml()); Namespace ns = Namespace.getNamespace("uri:oozie:hive-action:0.2"); final RunningJob launcherJob = submitAction(context, ns); String launcherId = context.getAction().getExternalId(); waitFor(200 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Configuration conf = new XConfiguration(); conf.set("user.name", getTestUser()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); HiveActionExecutor ae = new HiveActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); assertNotNull(context.getAction().getData()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs()); //while this works in a real cluster, it does not with miniMR //assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0); //assertTrue(!actionData.get(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS).isEmpty()); assertTrue(fs.exists(outputDir)); assertTrue(fs.isDirectory(outputDir)); } { Context context = createContext(getActionQueryXml(hiveScript)); Namespace ns = Namespace.getNamespace("uri:oozie:hive-action:0.6"); final RunningJob launcherJob = submitAction(context, ns); String launcherId = context.getAction().getExternalId(); waitFor(200 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Configuration conf = new XConfiguration(); conf.set("user.name", getTestUser()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); HiveActionExecutor ae = new HiveActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); assertNotNull(context.getAction().getData()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs()); //while this works in a real cluster, it does not with miniMR //assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0); //assertTrue(!actionData.get(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS).isEmpty()); assertTrue(fs.exists(outputDir)); assertTrue(fs.isDirectory(outputDir)); } }
From source file:org.apache.parquet.tools.command.ShowSchemaCommand.java
License:Apache License
@Override public void execute(CommandLine options) throws Exception { super.execute(options); String[] args = options.getArgs(); String input = args[0];/*w w w. j a v a 2 s .co m*/ Configuration conf = new Configuration(); ParquetMetadata metaData; Path path = new Path(input); FileSystem fs = path.getFileSystem(conf); Path file; if (fs.isDirectory(path)) { FileStatus[] statuses = fs.listStatus(path, HiddenFileFilter.INSTANCE); if (statuses.length == 0) { throw new RuntimeException("Directory " + path.toString() + " is empty"); } file = statuses[0].getPath(); } else { file = path; } metaData = ParquetFileReader.readFooter(conf, file, NO_FILTER); MessageType schema = metaData.getFileMetaData().getSchema(); Main.out.println(schema); if (options.hasOption('d')) { PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter().build(); MetadataUtils.showDetails(out, metaData); } }
From source file:org.apache.ranger.plugin.store.file.BaseFileStore.java
License:Apache License
protected <T> List<T> loadFromDir(Path dirPath, final String filePrefix, Class<T> cls) throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("==> BaseFileStore.loadFromDir()"); }/*from ww w. j a va2s . com*/ List<T> ret = new ArrayList<T>(); try { FileSystem fileSystem = getFileSystem(dirPath); if (fileSystem.exists(dirPath) && fileSystem.isDirectory(dirPath)) { PathFilter filter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(filePrefix) && path.getName().endsWith(FILE_SUFFIX_JSON); } }; FileStatus[] sdFiles = fileSystem.listStatus(dirPath, filter); if (sdFiles != null) { for (FileStatus sdFile : sdFiles) { T obj = loadFromFile(sdFile.getPath(), cls); if (obj != null) { ret.add(obj); } } } } else { LOG.error(dirPath + ": does not exists or not a directory"); } } catch (IOException excp) { LOG.warn("error loading service-def in directory " + dirPath, excp); } if (LOG.isDebugEnabled()) { LOG.debug("<== BaseFileStore.loadFromDir(): count=" + (ret == null ? 0 : ret.size())); } return ret; }
From source file:org.apache.ranger.plugin.store.file.FileStoreUtil.java
License:Apache License
public <T> List<T> loadFromDir(Path dirPath, final String filePrefix, Class<T> cls) throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("==> FileStoreUtil.loadFromDir()"); }//from ww w . j a v a 2 s .c o m List<T> ret = new ArrayList<T>(); try { FileSystem fileSystem = getFileSystem(dirPath); if (fileSystem.exists(dirPath) && fileSystem.isDirectory(dirPath)) { PathFilter filter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(filePrefix) && path.getName().endsWith(FILE_SUFFIX_JSON); } }; FileStatus[] sdFiles = fileSystem.listStatus(dirPath, filter); if (sdFiles != null) { for (FileStatus sdFile : sdFiles) { T obj = loadFromFile(sdFile.getPath(), cls); if (obj != null) { ret.add(obj); } } } } else { LOG.error(dirPath + ": does not exists or not a directory"); } } catch (IOException excp) { LOG.warn("error loading service-def in directory " + dirPath, excp); } if (LOG.isDebugEnabled()) { LOG.debug("<== FileStoreUtil.loadFromDir(): count=" + (ret == null ? 0 : ret.size())); } return ret; }
From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java
License:Apache License
/** * Set up the MapReduce job to use file inputs from previous iterations. * @param fileMapper Mapper class for generated triples * @param incMapper Mapper class for generated inconsistenies * @param filter Exclude facts that aren't helpful for inference *//* w ww .ja va 2 s .c o m*/ protected void configureFileInput(Class<? extends Mapper<Fact, NullWritable, ?, ?>> fileMapper, Class<? extends Mapper<Derivation, NullWritable, ?, ?>> incMapper, final boolean filter) throws IOException { // Set up file input for all iterations up to this one Configuration conf = job.getConfiguration(); FileSystem fs = FileSystem.get(conf); Path inputPath; int iteration = MRReasoningUtils.getCurrentIteration(conf); // Set min/max split, if not already provided: long blocksize = Long.parseLong(conf.get("dfs.blocksize")); String minSplitProp = "mapreduce.input.fileinputformat.split.minsize"; String maxSplitProp = "mapreduce.input.fileinputformat.split.maxsize"; conf.set(minSplitProp, conf.get(minSplitProp, String.valueOf(blocksize))); conf.set(maxSplitProp, conf.get(maxSplitProp, String.valueOf(blocksize * 8))); for (int i = 1; i <= iteration; i++) { // Prefer cleaned output... inputPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + i); // But if there isn't any, try intermediate data: if (!fs.isDirectory(inputPath)) { inputPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + i + MRReasoningUtils.TEMP_SUFFIX); } // And only proceed if we found one or the other. if (fs.isDirectory(inputPath)) { // Never include debug output. If filter is true, select only // intermediate and schema data, otherwise include everything. PathFilter f = new PathFilter() { public boolean accept(Path path) { String s = path.getName(); if (s.startsWith(MRReasoningUtils.DEBUG_OUT)) { return false; } else { return !filter || s.startsWith(MRReasoningUtils.INTERMEDIATE_OUT) || s.startsWith(MRReasoningUtils.SCHEMA_OUT); } } }; for (FileStatus status : fs.listStatus(inputPath, f)) { if (status.getLen() > 0) { Path p = status.getPath(); String s = p.getName(); if (s.startsWith(MRReasoningUtils.INCONSISTENT_OUT)) { if (incMapper != null) { MultipleInputs.addInputPath(job, p, CombineSequenceFileInputFormat.class, incMapper); } } else { MultipleInputs.addInputPath(job, status.getPath(), CombineSequenceFileInputFormat.class, fileMapper); } } } } } }
From source file:org.apache.rya.reasoning.mr.ConformanceTest.java
License:Apache License
/** * Verify that we can infer the correct triples or detect an inconsistency. * @param conf Specifies working directory, etc. * @param OwlTest Contains premise/conclusion graphs, will store result * @return Return value of the MapReduce job *///from ww w. ja v a 2 s . c om int runTest(final Configuration conf, final String[] args, final OwlTest test) throws Exception { conf.setInt(MRReasoningUtils.STEP_PROP, 0); conf.setInt(MRReasoningUtils.SCHEMA_UPDATE_PROP, 0); conf.setBoolean(MRReasoningUtils.DEBUG_FLAG, true); conf.setBoolean(MRReasoningUtils.OUTPUT_FLAG, true); // Connect to MiniAccumulo and load the test final Repository repo = MRReasoningUtils.getRepository(conf); repo.initialize(); final RepositoryConnection conn = repo.getConnection(); conn.clear(); conn.add(new StringReader(test.premise), "", RDFFormat.RDFXML); conn.close(); repo.shutDown(); // Run the reasoner final ReasoningDriver reasoner = new ReasoningDriver(); final int result = ToolRunner.run(conf, reasoner, args); test.success = (result == 0); // Inconsistency test: successful if determined inconsistent if (test.types.contains(TEST_INCONSISTENCY)) { test.success = test.success && reasoner.hasInconsistencies(); } // Consistency test: successful if determined consistent if (test.types.contains(TEST_CONSISTENCY)) { test.success = test.success && !reasoner.hasInconsistencies(); } // Other types: we'll need to look at the inferred triples/schema if (test.types.contains(TEST_NONENTAILMENT) || test.types.contains(TEST_ENTAILMENT)) { System.out.println("Reading inferred triples..."); // Read in the inferred triples from HDFS: final Schema schema = MRReasoningUtils.loadSchema(conf); final FileSystem fs = FileSystem.get(conf); final Path path = MRReasoningUtils.getOutputPath(conf, "final"); final OutputCollector inferred = new OutputCollector(); final NTriplesParser parser = new NTriplesParser(); parser.setRDFHandler(inferred); if (fs.isDirectory(path)) { for (final FileStatus status : fs.listStatus(path)) { final String s = status.getPath().getName(); if (s.startsWith(MRReasoningUtils.INCONSISTENT_OUT) || s.startsWith(MRReasoningUtils.DEBUG_OUT)) { continue; } final BufferedReader br = new BufferedReader( new InputStreamReader(fs.open(status.getPath()), StandardCharsets.UTF_8)); parser.parse(br, ""); br.close(); } } MRReasoningUtils.deleteIfExists(conf, "final"); test.inferred.addAll(inferred.triples); // Entailment test: successful if expected triples were inferred if (test.types.contains(TEST_ENTAILMENT)) { // Check expected inferences against the inferred triples and // the schema reasoner for (final Statement st : test.expected) { final Fact fact = new Fact(st); if (!test.inferred.contains(st) && !triviallyTrue(fact.getTriple(), schema) && !schema.containsTriple(fact.getTriple())) { test.error.add(st); } } } // Non-entailment test: failure if non-expected triples inferred if (test.types.contains(TEST_NONENTAILMENT)) { for (final Statement st : test.unexpected) { final Fact fact = new Fact(st); if (test.inferred.contains(st) || schema.containsTriple(fact.getTriple())) { test.error.add(st); } } } test.success = test.success && test.error.isEmpty(); } conf.setBoolean(MRReasoningUtils.DEBUG_FLAG, false); MRReasoningUtils.clean(conf); return result; }