Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.rim.logdriver.util.IndexLogs.java

License:Apache License

private static void findComponents(Map<String, Map<String, Map<String, Map<String, Component>>>> data,
        FileSystem fs, Path path) throws FileNotFoundException, IOException, ParseException {
    // Grab FileStatus for each file in the path  
    FileStatus[] allFiles = fs.listStatus(path);
    // For each file, try to match a pattern that indicates we have identified a component.
    // If we find a match, add or update the component and return.
    try {/* w  w w.  jav a  2 s .co m*/
        for (int i = 0; i < allFiles.length; i++) {
            if (dataPattern.matcher(allFiles[i].getPath().toString()).find()) {
                updateComponent(data, fs, allFiles[i], path);
                return;
            }
        }
        // If we got here no component was matched, so go one level deeper. 
        for (int i = 0; i < allFiles.length; i++) {
            if (allFiles[i].isDirectory()) {
                findComponents(data, fs, allFiles[i].getPath());
            }
        }
    }
    // It's possible that we don't have access to files in this path, or that the path is empty.
    catch (AccessControlException e) {
    } catch (FileNotFoundException e) {
    }
}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from  w ww .  jav a  2  s .c o m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.QueryIndex.java

License:Apache License

private Path getLatestIndex(FileSystem fs) throws FileNotFoundException, IOException {
    // Get path of latest index file
    FileStatus[] indexFiles = fs.listStatus(new Path("/service/_index/"));
    long latestDate = 0;
    for (int i = 0; i < indexFiles.length; i++) {
        try {//from  w  ww .  j  a  va2s. co m
            if (indexFiles[i].getPath().toString().matches(".*\\.json$")) {
                String[] pathPieces = indexFiles[i].getPath().toString().split("\\.");
                long currentDate = Integer.parseInt(pathPieces[pathPieces.length - 2]);
                if (currentDate > latestDate) {
                    latestDate = currentDate;
                }
            }
        } catch (NumberFormatException e) {
        } catch (IndexOutOfBoundsException e) {
        }
    }
    if (latestDate > 0) {
        Date now = new Date();
        Long age = (now.getTime() - (latestDate * 1000)) / oneHour;
        if (age > 24) {
            System.out.println("\nWarning: Index is over 24 hours old!");
        }
        System.out.println("\nIndex is from " + outputFormat.format(latestDate * 1000) + " and is " + age
                + " hours old.\n");
        return new Path("/service/_index/logindex." + latestDate + ".json");
    } else {
        return null;
    }
}

From source file:com.rockstor.compact.mapreduce.CompactDirInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    List<InputSplit> inputSplits = new ArrayList<InputSplit>();
    Compactor.getInstance();/*w w  w  . j av a  2s  .c  o m*/
    Configuration conf = context.getConfiguration();
    Path rootPath = new Path(PathUtil.getInstance().getTaskRootDir());
    FileSystem dfs = FileSystem.get(conf);

    if (!dfs.exists(rootPath)) {
        return inputSplits;
    }

    FileStatus[] fs = dfs.listStatus(rootPath);
    if (fs == null || fs.length == 0) {
        return inputSplits;
    }

    InputSplit inputSplit = null;
    String taskIdName = null;
    for (FileStatus f : fs) {
        if (!f.isDir()) {
            continue;
        }
        taskIdName = f.getPath().getName();
        LOG.info("add task id name: " + taskIdName);
        inputSplit = new CompactDirInputSplit(taskIdName);
        inputSplits.add(inputSplit);
    }

    return inputSplits;
}

From source file:com.rockstor.compact.RockIndexReader.java

License:Apache License

/**
 * @param args/*from w  w  w  .ja v  a 2s  .c om*/
 */
public static void main(String[] args) {
    RockAccessor.connectHDFS();
    FileSystem dfs = RockAccessor.getFileSystem();

    try {
        FileStatus[] fs = dfs.listStatus(new Path("/rockstor/tmp/gb_meta/"));
        RockIndexReader rir = null;
        for (FileStatus fx : fs) {
            try {
                rir = new RockIndexReader();
                rir.open(fx.getPath().toString());
                Chunk c = null;
                while (rir.hasNext()) {
                    c = rir.next();
                    LOG.info(c);
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (rir != null) {
                    rir.close();
                    rir = null;
                }
            }

        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            dfs.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

From source file:com.rockstor.compact.TaskMetaReader.java

License:Apache License

public static void main(String[] argv) {
    RockAccessor.connectHDFS();/* w w w  .j a va 2  s. c  o  m*/
    FileSystem dfs = RockAccessor.getFileSystem();

    try {
        FileStatus[] fs = dfs.listStatus(new Path("/rockstor/tmp/task"));
        TaskMetaReader rir = null;
        for (FileStatus fx : fs) {
            try {
                rir = new TaskMetaReader();
                rir.open(fx.getPath().toString() + "/meta");
                Map<String, byte[]> s = rir.getRocks();
                LOG.info(fx.getPath().toString() + "/meta");
                for (Map.Entry<String, byte[]> kv : s.entrySet()) {
                    LOG.info(kv.getKey());
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (rir != null) {
                    rir.close();
                    rir = null;
                }
            }

        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        try {
            dfs.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

}

From source file:com.scaleunlimited.cascading.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }// w w w .j a  v a  2 s  . c  o  m

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            FileStatus.class, SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list  
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}

From source file:com.scaleunlimited.cascading.FsUtils.java

License:Apache License

/**
 * Protect against earlier versions of Hadoop returning null if there
 * are no sub-directories in <path>
 * //from ww w  . ja v  a  2  s  .c o  m
 * @param fs
 * @param path
 * @return
 * @throws IOException
 */
public static FileStatus[] listStatus(FileSystem fs, Path path) throws IOException {
    FileStatus[] result = fs.listStatus(path);
    if (result == null) {
        result = new FileStatus[0];
    }

    return result;
}

From source file:com.senseidb.util.HDFSIndexCopier.java

License:Apache License

public boolean copy(String src, String dest) {
    try {//from   w w  w  .j  av  a2  s  . co m
        URI srcUri = new URI(src), destUri = new URI(dest);

        Configuration config = new Configuration();
        config.set("fs.default.name", srcUri.resolve("/").toString());

        FileSystem dfs = FileSystem.get(config);
        Path destPath = new Path(destUri.toString());
        FileStatus[] files = dfs.listStatus(new Path(srcUri.toString()));
        if (files == null || files.length == 0)
            return false;

        for (FileStatus f : files) {
            log.info("Copying " + f.getPath().toString());
            dfs.copyToLocalFile(f.getPath(), destPath);
        }

        return true;
    } catch (Exception e) {
        log.error(e.getMessage(), e);
        return false;
    }
}

From source file:com.sirius.hadoop.job.InputUpload.java

License:Apache License

public static void main(String[] args) throws Exception {
    hdfs = new URI("hdfs://hadoop1:8020");
    FileSystem fs = FileSystem.get(hdfs, configuration);

    if (fs.exists(input_path)) {
        fs.delete(input_path, true);//from w  w w  . j  a  v  a  2 s  .  c om
    }

    System.out.println(Arrays.toString(fs.listStatus(new Path("/"))));

    fs.copyFromLocalFile(false, true, new Path("/Users/pippo/Downloads/subscriber_status.statics.input"),
            input_path);
}