Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:org.lab41.mapreduce.BlueprintsGraphDriver.java

License:Apache License

private Job configureJob1(Configuration conf, FaunusGraph faunusGraph, Path intermediatePath,
        Configuration job1Config, FileSystem fs) throws IOException {
    /** Job 1 Configuration **/
    Job job1 = new Job(job1Config);
    job1.setJobName("BluePrintsGraphDriver Job1" + faunusGraph.getInputLocation());
    job1.setJarByClass(BlueprintsGraphDriver.class);
    job1.setMapperClass(BlueprintsGraphOutputMapReduce.VertexMap.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Holder.class);
    job1.setReducerClass(BlueprintsGraphOutputMapReduce.Reduce.class);
    job1.setOutputKeyClass(NullWritable.class);
    job1.setOutputValueClass(FaunusVertex.class);

    job1.setInputFormatClass(faunusGraph.getGraphInputFormat());
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);

    if (fs.isDirectory(intermediatePath)) {
        logger.info("Exists" + intermediatePath + " -- deleting!");
        fs.delete(intermediatePath, true);
    }/*from ww w .  jav  a 2  s .  c  o  m*/

    FileOutputFormat.setOutputPath(job1, intermediatePath);
    Path inputPath = faunusGraph.getInputLocation();
    FileInputFormat.setInputPaths(job1, inputPath);
    /***** Figure out how many reducer ********/

    Path[] paths = SequenceFileInputFormat.getInputPaths(job1);
    long splits = HdfsUtil.getNumOfSplitsForInputs(paths, conf, MB);

    // The job is configure with 4 gb of memory;
    job1.setNumReduceTasks((int) Math.ceil(splits / 48));
    return job1;
}

From source file:org.lab41.mapreduce.IdUsingBulkLoaderDriver.java

License:Apache License

private Job configureJob2(Configuration baseConfiguration, FaunusGraph faunusGraph, FileSystem fs)
        throws IOException {
    Configuration job2Config = new Configuration(baseConfiguration);
    /** Job  2 Configuration **/
    Job job2 = new Job(job2Config);
    job2.setInputFormatClass(SequenceFileInputFormat.class);
    job2.setOutputFormatClass(faunusGraph.getGraphOutputFormat());
    job2.setJobName("IdUsingBulkLoader Job2: " + faunusGraph.getInputLocation());
    job2.setJarByClass(IdUsingBulkLoaderDriver.class);
    job2.setMapperClass(IdUsingBulkLoaderMapReduce.EdgeMapper.class);
    job2.setMapOutputKeyClass(NullWritable.class);
    job2.setMapOutputValueClass(FaunusVertex.class);

    FileInputFormat.setInputPaths(job2, faunusGraph.getInputLocation());
    job2.setNumReduceTasks(0);/*from   w  w w.j av a2 s .c  o m*/

    String strJob2OutputPath = faunusGraph.getOutputLocation().toString();
    Path job2Path = new Path(strJob2OutputPath + "/job2");

    if (fs.isDirectory(job2Path)) {
        logger.info("Exists" + strJob2OutputPath + " --deleteing");
        fs.delete(job2Path, true);
    }

    FileOutputFormat.setOutputPath(job2, job2Path);

    return job2;
}

From source file:org.lab41.mapreduce.IdUsingBulkLoaderDriver.java

License:Apache License

private Job configureJob1(FaunusGraph faunusGraph, Path intermediatePath, Configuration baseConfiguration,
        FileSystem fs) throws IOException {

    Configuration job1Config = new Configuration(baseConfiguration);
    /** Job 1 Configuration **/

    Job job1 = new Job(job1Config);
    job1.setJobName("IdUsingBulkLoader Job1" + faunusGraph.getInputLocation());
    job1.setJarByClass(IdUsingBulkLoaderDriver.class);
    job1.setMapperClass(IdUsingBulkLoaderMapReduce.VertexMapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Holder.class);

    job1.setNumReduceTasks(0);/*from  w w w.  j  av  a 2 s  . c  o  m*/

    job1.setInputFormatClass(faunusGraph.getGraphInputFormat());
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);

    if (fs.isDirectory(intermediatePath)) {
        logger.info("Exists" + intermediatePath + " -- deleting!");
        fs.delete(intermediatePath, true);
    }

    FileOutputFormat.setOutputPath(job1, intermediatePath);
    Path inputPath = faunusGraph.getInputLocation();
    FileInputFormat.setInputPaths(job1, inputPath);
    return job1;
}

From source file:org.mrgeo.test.MapOpTestVectorUtils.java

License:Apache License

public void compareVectors(Configuration conf, String testName) throws IOException {

    Path output = new Path(outputHdfs, testName);
    FileSystem fs = HadoopFileUtils.getFileSystem(conf, output);

    Path[] srcFiles;//from ww  w. ja va 2s. c o  m
    if (fs.isDirectory(output)) {
        FileStatus[] files = fs.listStatus(output);
        if (files == null || files.length == 0) {
            Assert.fail("No files founds: " + output.toString());
        }
        srcFiles = new Path[files.length];

        int cnt = 0;
        for (FileStatus file : files) {
            srcFiles[cnt++] = file.getPath();
        }
    } else {
        srcFiles = new Path[] { output };
    }

    for (Path file : srcFiles) {
        // read in the output file
        final long l = fs.getFileStatus(file).getLen();
        final byte[] testBuffer = new byte[(int) l];
        final FSDataInputStream fdis = fs.open(file);
        fdis.read(testBuffer);
        fdis.close();

        File baselineVector = new File(inputLocal + testName + "/" + file.getName());

        if (!baselineVector.exists()) {
            Assert.fail("Golden test file missing: " + baselineVector.toString());
        }

        // read in the baseline
        final byte[] baselineBuffer = new byte[(int) baselineVector.length()];

        final FileInputStream fis = new FileInputStream(baselineVector);
        fis.read(baselineBuffer);
        fis.close();

        Assert.assertEquals("Output is different!", new String(baselineBuffer), new String(testBuffer));
    }

}

From source file:org.s1ck.ldbc.LDBCToFlink.java

License:Open Source License

private void initFromHDFS() {
    try {//from w  ww .  j av a2  s  .co  m
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path(ldbcDirectory);
        if (!fs.exists(p) || !fs.isDirectory(p)) {
            throw new IllegalArgumentException(
                    String.format("%s does not exist or is not a directory", ldbcDirectory));
        }
        FileStatus[] fileStates = fs.listStatus(p);
        for (FileStatus fileStatus : fileStates) {
            String filePath = fileStatus.getPath().getName();
            if (isVertexFile(filePath)) {
                vertexFilePaths.add(ldbcDirectory + filePath);
            } else if (isEdgeFile(filePath)) {
                edgeFilePaths.add(ldbcDirectory + filePath);
            } else if (isPropertyFile(filePath)) {
                propertyFilePaths.add(ldbcDirectory + filePath);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given texts.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected lines in the file (not including terminating end-of-lines).
 *///from  w w  w .j  a va2s . com
public static void assertReadFile(FileSystem fs, String path, String... expected) throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(path))))) {
            for (String line : expected)
                assertThat(r.readLine(), is(line));
            assertThat(r.readLine(), nullValue());
        }
    } else if (fs.isDirectory(p)) {
        HashSet<String> expect = new HashSet<>(Arrays.asList(expected));
        for (FileStatus fstatus : fs.listStatus(p)) {
            try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(fstatus.getPath())))) {
                String line = null;
                while (null != (line = r.readLine()))
                    if (!expect.remove(line))
                        fail("Unexpected line: " + line);
            }
        }
        // Check before asserting for the message.
        if (expect.size() != 0)
            assertThat("Not all lines found: " + expect.iterator().next(), expect, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given texts.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected lines in the file (not including terminating end-of-lines).
 *///from   ww  w  .  j a v a  2s.co m
public static void assertReadFile(String recordDelimiter, FileSystem fs, String path, String... expected)
        throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(path))))) {
            Scanner s = new Scanner(r).useDelimiter(recordDelimiter);
            for (String line : expected) {
                assertThat(s.next(), is(line));
            }
            assertThat(s.hasNext(), is(false));
        }
    } else if (fs.isDirectory(p)) {
        HashSet<String> expect = new HashSet<>(Arrays.asList(expected));
        for (FileStatus fstatus : fs.listStatus(p)) {
            try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(fstatus.getPath())))) {
                Scanner s = new Scanner(r).useDelimiter(recordDelimiter);
                String line = null;
                while (s.hasNext()) {
                    line = s.next();
                    if (!expect.remove(line))
                        fail("Unexpected line: " + line);
                }
            }
        }
        // Check before asserting for the message.
        if (expect.size() != 0)
            assertThat("Not all lines found: " + expect.iterator().next(), expect, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given avro.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 */// w ww. j  av a 2 s .  c o m
public static void assertReadAvroFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part)
        throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
                new BufferedInputStream(fs.open(new Path(path))), new GenericDatumReader<GenericRecord>())) {
            IndexedRecord record = null;
            while (reader.hasNext()) {
                record = reader.iterator().next();
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if (indexedRecord.equals(record)) {
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadAvroFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given parquet.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 *///  w  w w  . j  a va 2 s .  c  o  m
public static void assertReadParquetFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part)
        throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(fs.getConf(),
                new Path(path))) {
            IndexedRecord record = null;
            while (null != (record = reader.read())) {
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if (indexedRecord.equals(record)) {
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadParquetFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:org.trustedanalytics.resourceserver.data.InputStreamProvider.java

License:Apache License

/**
 * Gets an InputStream for a path on HDFS.
 *
 * If given path is a directory, it will read files inside that dir and create
 * a SequenceInputStream from them, which emulates reading from directory just like from
 * a regular file. Notice that this method is not meant to read huge datasets
 * (as well as the whole project)./*from ww  w.  ja  v a  2s.  c om*/
 * @param path
 * @return
 * @throws IOException
 */
public InputStream getInputStream(Path path) throws IOException {
    Objects.requireNonNull(path);

    FileSystem fs = hdfsConfig.getFileSystem();
    if (fs.isFile(path)) {
        return fs.open(path);
    } else if (fs.isDirectory(path)) {
        FileStatus[] files = fs.listStatus(path);
        List<InputStream> paths = Arrays.stream(files).map(f -> {
            try {
                return fs.open(f.getPath());
            } catch (IOException e) {
                LOGGER.log(Level.SEVERE, "Cannot read file " + f.getPath().toString(), e);
                return null;
            }
        }).filter(f -> f != null).collect(Collectors.toList());
        return new SequenceInputStream(Collections.enumeration(paths));
    } else {
        throw new IllegalArgumentException("Given path " + path.toString() + " is neither file nor directory");
    }
}