Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:boa.datagen.SeqSort.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    String base = "hdfs://boa-njt/";
    conf.set("fs.default.name", base);
    FileSystem fs = FileSystem.get(conf);

    String inPath = "/tmprepcache/2015-07/";
    StringBuilder sb = new StringBuilder();
    FileStatus[] files = fs.listStatus(new Path(base + inPath));
    for (int i = 0; i < files.length; i++) {
        FileStatus file = files[i];/* w  w w . ja v a  2s.co  m*/
        String name = file.getPath().getName();
        if (name.startsWith("ast-") && name.endsWith(".seq")) {
            try {
                //ToolRunner.run(new Configuration(), new SeqSort(inPath + name, "/tmprepcache/2015-07-sorted/" + name), null);
                sb.append(name + "\n");
            } catch (Throwable t) {
                t.printStackTrace();
            }
        }
    }
    FileIO.writeFileContents(new File("files2sort.txt"), sb.toString());
}

From source file:boa.datagen.SeqSortMerge.java

License:Apache License

public static void main(String[] args) throws IOException {
    conf.set("fs.default.name", base);
    FileSystem fs = FileSystem.get(conf);

    String inPath = "/tmprepcache/2015-07-sorted/";
    while (true) {
        FileStatus[] files = fs.listStatus(new Path(inPath));
        if (files.length < 2)
            break;
        Path path = new Path(inPath + System.currentTimeMillis());
        fs.mkdirs(path);// w ww.  ja  va2 s  . c o m
        SequenceFile.Writer w = SequenceFile.createWriter(fs, conf,
                new Path(inPath + path.getName() + "/part-00000"), Text.class, BytesWritable.class);
        FileStatus[] candidates = getCandidates(files);
        System.out.println("Merging " + candidates.length + " from " + files.length);
        SequenceFile.Reader[] readers = new SequenceFile.Reader[candidates.length];
        for (int i = 0; i < candidates.length; i++)
            readers[i] = new SequenceFile.Reader(fs,
                    new Path(inPath + candidates[i].getPath().getName() + "/part-00000"), conf);
        Text[] keys = new Text[candidates.length];
        BytesWritable[] values = new BytesWritable[candidates.length];
        read(readers, keys, values);
        while (true) {
            int index = min(keys);
            if (keys[index].toString().isEmpty())
                break;
            w.append(keys[index], values[index]);
            read(readers[index], keys[index], values[index]);
        }
        for (int i = 0; i < readers.length; i++)
            readers[i].close();
        w.close();
        for (int i = 0; i < readers.length; i++)
            fs.delete(new Path(inPath + candidates[i].getPath().getName()), true);
    }
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

private void loadHFiles() throws Exception {
    conf = HBaseConfiguration.create();//from  ww w .j a  v  a2 s.co m
    HBaseAdmin hadmin = new HBaseAdmin(conf);
    Path hfofDir = new Path("out");
    FileSystem fs = hfofDir.getFileSystem(conf);
    //if (!fs.exists(hfofDir)) {
    //  throw new FileNotFoundException("HFileOutputFormat dir " +
    //      hfofDir + " not found");
    //}
    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    //if (familyDirStatuses == null) {
    //  throw new FileNotFoundException("No families found in " + hfofDir);
    //}
    int length = 0;
    byte[][] splits = new byte[18000][];
    for (FileStatus stat : familyDirStatuses) {
        if (!stat.isDir()) {
            continue;
        }
        Path familyDir = stat.getPath();
        // Skip _logs, etc
        if (familyDir.getName().startsWith("_"))
            continue;
        //byte[] family = familyDir.getName().getBytes();
        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
        for (Path hfile : hfiles) {
            if (hfile.getName().startsWith("_"))
                continue;

            HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf));
            //HFile.Reader hfr =    new HFile.Reader(fs, hfile, null, false);
            final byte[] first;
            try {
                hfr.loadFileInfo();
                first = hfr.getFirstRowKey();
            } finally {
                hfr.close();
            }
            splits[length] = first.clone();
            length++;
        }
    }
    //System.out.println(length);

    byte[][] splits1 = new byte[length][];

    for (int i = 0; i < splits1.length; i++) {
        splits1[i] = splits[i];
    }
    Arrays.sort(splits1, Bytes.BYTES_COMPARATOR);
    //HTableDescriptor desc = new HTableDescriptor("H2RDF");

    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("A");
    desc.addFamily(family);
    //for (int i = 0; i < splits.length; i++) {
    //   System.out.println(Bytes.toStringBinary(splits[i]));
    //}
    conf.setInt("zookeeper.session.timeout", 600000);
    if (hadmin.tableExists(TABLE_NAME)) {
        hadmin.disableTable(TABLE_NAME);
        hadmin.deleteTable(TABLE_NAME);
    } else {
        hadmin.createTable(desc, splits1);
    }
    //hadmin.createTable(desc);
    String[] args1 = new String[2];
    args1[0] = "out";
    args1[1] = TABLE_NAME;
    //args1[1]="new2";

    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1);

}

From source file:ca.uwaterloo.iss4e.spark.pointperrow.CosineMain.java

License:Open Source License

public void fetch(JavaSparkContext sc, String source) {
    try {/*from   ww w . ja v a  2  s . c  o  m*/
        FileSystem fs = FileSystem.get(new URI(source), new Configuration());
        Path src = new Path(source);
        if (fs.exists(src)) {
            FileStatus[] lists = fs.listStatus(src);
            readFiles(sc, fs, lists);
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }
}

From source file:ca.uwaterloo.iss4e.spark.pointperrow.CosineMain.java

License:Open Source License

public void readFiles(JavaSparkContext sc, FileSystem fs, FileStatus[] files) {
    for (int i = 0; i < files.length; i++) {
        if (files[i].isDirectory()) {
            try {
                readFiles(sc, fs, fs.listStatus(files[i].getPath()));
            } catch (IOException e) {
                e.printStackTrace();/*from  w  w  w  . j a va  2s  .co m*/
            }
        } else {
            if (lines == null) {
                Path p = files[i].getPath();
                lines = sc.textFile(p.toString());
            } else {
                JavaRDD<String> r = sc.textFile(files[i].getPath().toString());
                lines.union(r);
            }
        }
    }
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id");

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + taskId);
            }/*from   w ww . j av a 2  s.c  om*/
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + taskId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true))
                throw new IOException("Failed to delete earlier output of task: " + taskId);

            if (!fs.rename(taskOutput, finalOutputPath))
                throw new IOException("Failed to save output of task: " + taskId);
        }/*from w w w.  j  a va2  s.  c  o m*/

        LOG.debug("Moved {} to {}", taskOutput, finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths)
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
        }
    }
}

From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path p = new Path(args[0]);
    FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration());
    FileStatus[] listStatus = fileSystem.listStatus(p);
    for (FileStatus fileStatus : listStatus) {
        Path path = fileStatus.getPath();
        fileSystem.rename(path, new Path(path.toString() + ".lf"));
    }//from w w w .j  a v a 2s  . c om
}

From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java

License:Apache License

public static void convert(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    if (!fileSystem.exists(path)) {
        System.out.println(path + " does not exists.");
        return;//  w  ww . j  av  a 2  s .c o  m
    }
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        FileStatus[] listStatus = fileSystem.listStatus(path);
        for (FileStatus status : listStatus) {
            convert(status.getPath());
        }
    } else {
        System.out.println("Converting file [" + path + "]");
        HdfsMetaBlock block = new HdfsMetaBlock();
        block.realPosition = 0;
        block.logicalPosition = 0;
        block.length = fileStatus.getLen();
        FSDataOutputStream outputStream = fileSystem.append(path);
        block.write(outputStream);
        outputStream.writeInt(1);
        outputStream.writeLong(fileStatus.getLen());
        outputStream.writeInt(HdfsFileWriter.VERSION);
        outputStream.close();
    }
}

From source file:clone.ReadSequenceFile.java

License:Apache License

private static int readSequenceFilesInDir(Path path, FileSystem fs, int max) {
    int n = 0;/*from w w  w.j  av a 2s.co  m*/
    try {
        FileStatus[] stat = fs.listStatus(path);
        for (int i = 0; i < stat.length; ++i) {
            n += readSequenceFile(stat[i].getPath(), fs, max);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    System.out.println(n + " records read in total.");
    return n;
}