Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByKeyWithDelimitedTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat",
            "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text" };
    MergeJob job = runMergeJobs(args);/*from  w  w w.j a v  a2 s.c o m*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(4, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithDelimitedTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"
            + "\nTimon Leonard,716 Ac Ave,1-857-935-3882,62240";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat",
            "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text", };
    MergeJob job = runMergeJobs(args);//  w w w.j  av a2s  .  c  om
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Timon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
            "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);/* w  ww.  j  a  v a  2  s  . c o  m*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByKeyWithKeyValueTextInputFormat() throws Exception {

    final String inputData1 = "A\tMacon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nB\tDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nC\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "A\tTimon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nD\tMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nB\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from  w w  w.j a v  a  2 s  . co  m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(4, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithSequenceFileAsTextInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new IntWritable(2), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from w w  w .ja  v a2s  . co m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public boolean exists(Configuration conf) {
    Path dfsPath = new Path(path);
    try {//  w  ww  .j a v a  2 s . c om
        FileSystem fs = dfsPath.getFileSystem(conf);
        return fs.exists(dfsPath);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public boolean isObsolete(Configuration conf) {
    Path dfsPath = new Path(path);
    try {/*w w w  .ja va 2s.c o  m*/
        FileSystem fs = dfsPath.getFileSystem(conf);
        // this needs to be smart - we should encode in the file metadata the dependents and their dates used
        // so we can verify that any existing antecedent is not newer and declare victory...
        if (fs.exists(dfsPath)) {
            FileStatus[] statuses = fs.listStatus(dfsPath);
            for (FileStatus status : statuses) {
                if (!status.isDir()) {
                    if (format != Formats.AVRO_FORMAT || status.getPath().toString().endsWith(".avro")) {
                        return false; // may check for extension for other types
                    }
                } else {
                    if (!status.getPath().toString().endsWith("/_logs")
                            && !status.getPath().toString().endsWith("/_temporary")) {
                        return false;
                    }
                }
            }
        }
        return true; // needs more work!
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public void clearAndPrepareOutput(Configuration conf) {
    try {//w w  w  . j a  va 2 s  .  c  om
        Path dfsPath = new Path(path);
        FileSystem fs = dfsPath.getFileSystem(conf);
        if (fs.exists(dfsPath)) {
            FileStatus[] statuses = fs.listStatus(dfsPath);
            for (FileStatus status : statuses) {
                if (status.isDir()) {
                    if (!status.getPath().toString().endsWith("/_logs")
                            && !status.getPath().toString().endsWith("/_temporary")) {
                        throw new IllegalArgumentException(
                                "Trying to overwrite directory with child directories: " + path);
                    }
                }
            }
        } else {
            fs.mkdirs(dfsPath);
        }
        fs.delete(dfsPath, true);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.adsame.samelogs.SameLogsSink.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//  w ww  .ja v  a  2  s . c om
public void append(Event e) throws IOException {
    // append the event to the output
    byte[] fn = e.get(TailSource.A_TAILSRCFILE);
    byte[] bd = e.getBody();
    System.out.println("##" + new String(fn) + "##" + new String(bd));

    Map<String, byte[]> maps = e.getAttrs();

    Iterator iter = maps.entrySet().iterator();
    while (iter.hasNext()) {
        Map.Entry entry = (Map.Entry) iter.next();
        String key = (String) entry.getKey();
        System.out.println("key: " + key);
    }

    // here we are assuming the body is a string
    pw.println(new String(e.getBody()));
    pw.flush(); // so we can see it in the file right away

    Configuration configuration = new Configuration();
    FileSystem hdfsFileSystem = FileSystem.get(configuration);
    Path path = new Path("hdfs://nodie-Ubuntu4:9000/user/nodie/input/dfs/hello");
    FSDataOutputStream out;
    System.out.println("exists: " + hdfsFileSystem.exists(path));
    if (hdfsFileSystem.exists(path)) {
        out = hdfsFileSystem.append(path);
    } else {
        out = hdfsFileSystem.create(path);
    }

    out.write(e.getBody());
    out.writeChar('\n');
    out.flush();
    out.close();
}

From source file:com.alexholmes.hadooputils.sort.Sort.java

License:Apache License

/**
 * The driver for the sort MapReduce job.
 *
 * @param jobConf           sort configuration
 * @param numMapTasks       number of map tasks
 * @param numReduceTasks    number of reduce tasks
 * @param sampler           sampler, if required
 * @param codecClass        the compression codec for compressing final outputs
 * @param mapCodecClass     the compression codec for compressing intermediary map outputs
 * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes
 *                          for the job output files
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws IOException        if something went wrong
 * @throws URISyntaxException if a URI wasn't correctly formed
 *///from ww w .  j  ava2 s  .  com
public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks,
        final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass,
        final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes,
        final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException {

    jobConf.setJarByClass(Sort.class);
    jobConf.setJobName("sorter");

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();

    if (numMapTasks != null) {
        jobConf.setNumMapTasks(numMapTasks);
    }
    if (numReduceTasks != null) {
        jobConf.setNumReduceTasks(numReduceTasks);
    } else {
        int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sort.reduces_per_host");
        if (sortReduces != null) {
            numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }

        // Set user-supplied (possibly default) job configs
        jobConf.setNumReduceTasks(numReduces);
    }

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(SortReduce.class);

    jobConf.setInputFormat(SortInputFormat.class);

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    if (mapCodecClass != null) {
        jobConf.setMapOutputCompressorClass(mapCodecClass);
    }

    if (codecClass != null) {
        jobConf.setBoolean("mapred.output.compress", true);
        jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class);
    }

    FileInputFormat.setInputPaths(jobConf, inputDirAsString);
    FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];

        FileSystem fileSystem = FileSystem.get(jobConf);

        if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) {
            inputDir = inputDir.getParent();
        }
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + jobConf.getNumReduceTasks() + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    if (jobResult.isSuccessful()) {
        if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) {
            new LzoIndexer(jobConf).index(new Path(outputDirAsString));
        }
        return true;
    }
    return false;
}