Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.rim.logdriver.util.IndexLogs.java

License:Apache License

private static void findComponents(Map<String, Map<String, Map<String, Map<String, Component>>>> data,
        FileSystem fs, Path path) throws FileNotFoundException, IOException, ParseException {
    // Grab FileStatus for each file in the path  
    FileStatus[] allFiles = fs.listStatus(path);
    // For each file, try to match a pattern that indicates we have identified a component.
    // If we find a match, add or update the component and return.
    try {/* w  w w.  jav a  2 s .co m*/
        for (int i = 0; i < allFiles.length; i++) {
            if (dataPattern.matcher(allFiles[i].getPath().toString()).find()) {
                updateComponent(data, fs, allFiles[i], path);
                return;
            }
        }
        // If we got here no component was matched, so go one level deeper. 
        for (int i = 0; i < allFiles.length; i++) {
            if (allFiles[i].isDirectory()) {
                findComponents(data, fs, allFiles[i].getPath());
            }
        }
    }
    // It's possible that we don't have access to files in this path, or that the path is empty.
    catch (AccessControlException e) {
    } catch (FileNotFoundException e) {
    }
}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from  w ww .  jav a  2  s .c o m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.QueryIndex.java

License:Apache License

private Path getLatestIndex(FileSystem fs) throws FileNotFoundException, IOException {
    // Get path of latest index file
    FileStatus[] indexFiles = fs.listStatus(new Path("/service/_index/"));
    long latestDate = 0;
    for (int i = 0; i < indexFiles.length; i++) {
        try {//from  w  ww .  j  a  va2s. co m
            if (indexFiles[i].getPath().toString().matches(".*\\.json$")) {
                String[] pathPieces = indexFiles[i].getPath().toString().split("\\.");
                long currentDate = Integer.parseInt(pathPieces[pathPieces.length - 2]);
                if (currentDate > latestDate) {
                    latestDate = currentDate;
                }
            }
        } catch (NumberFormatException e) {
        } catch (IndexOutOfBoundsException e) {
        }
    }
    if (latestDate > 0) {
        Date now = new Date();
        Long age = (now.getTime() - (latestDate * 1000)) / oneHour;
        if (age > 24) {
            System.out.println("\nWarning: Index is over 24 hours old!");
        }
        System.out.println("\nIndex is from " + outputFormat.format(latestDate * 1000) + " and is " + age
                + " hours old.\n");
        return new Path("/service/_index/logindex." + latestDate + ".json");
    } else {
        return null;
    }
}

From source file:com.rockstor.compact.mapreduce.CompactDirInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    List<InputSplit> inputSplits = new ArrayList<InputSplit>();
    Compactor.getInstance();/*w w  w  . j av a  2s  .c  o m*/
    Configuration conf = context.getConfiguration();
    Path rootPath = new Path(PathUtil.getInstance().getTaskRootDir());
    FileSystem dfs = FileSystem.get(conf);

    if (!dfs.exists(rootPath)) {
        return inputSplits;
    }

    FileStatus[] fs = dfs.listStatus(rootPath);
    if (fs == null || fs.length == 0) {
        return inputSplits;
    }

    InputSplit inputSplit = null;
    String taskIdName = null;
    for (FileStatus f : fs) {
        if (!f.isDir()) {
            continue;
        }
        taskIdName = f.getPath().getName();
        LOG.info("add task id name: " + taskIdName);
        inputSplit = new CompactDirInputSplit(taskIdName);
        inputSplits.add(inputSplit);
    }

    return inputSplits;
}

From source file:com.rockstor.compact.RockIndexReader.java

License:Apache License

/**
 * @param args/*from w  w  w  .ja v  a 2s  .c om*/
 */
public static void main(String[] args) {
    RockAccessor.connectHDFS();
    FileSystem dfs = RockAccessor.getFileSystem();

    try {
        FileStatus[] fs = dfs.listStatus(new Path("/rockstor/tmp/gb_meta/"));
        RockIndexReader rir = null;
        for (FileStatus fx : fs) {
            try {
                rir = new RockIndexReader();
                rir.open(fx.getPath().toString());
                Chunk c = null;
                while (rir.hasNext()) {
                    c = rir.next();
                    LOG.info(c);
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (rir != null) {
                    rir.close();
                    rir = null;
                }
            }

        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            dfs.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

From source file:com.rockstor.compact.TaskMetaReader.java

License:Apache License

public static void main(String[] argv) {
    RockAccessor.connectHDFS();/* w w w  .j a va 2  s. c  o  m*/
    FileSystem dfs = RockAccessor.getFileSystem();

    try {
        FileStatus[] fs = dfs.listStatus(new Path("/rockstor/tmp/task"));
        TaskMetaReader rir = null;
        for (FileStatus fx : fs) {
            try {
                rir = new TaskMetaReader();
                rir.open(fx.getPath().toString() + "/meta");
                Map<String, byte[]> s = rir.getRocks();
                LOG.info(fx.getPath().toString() + "/meta");
                for (Map.Entry<String, byte[]> kv : s.entrySet()) {
                    LOG.info(kv.getKey());
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (rir != null) {
                    rir.close();
                    rir = null;
                }
            }

        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        try {
            dfs.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

}

From source file:com.scaleunlimited.cascading.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }// w w w .j a  v a  2 s  . c  o  m

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            FileStatus.class, SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list  
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}

From source file:com.scaleunlimited.cascading.FsUtils.java

License:Apache License

/**
 * Protect against earlier versions of Hadoop returning null if there
 * are no sub-directories in <path>
 * //from ww w  . ja v  a  2  s  .c o  m
 * @param fs
 * @param path
 * @return
 * @throws IOException
 */
public static FileStatus[] listStatus(FileSystem fs, Path path) throws IOException {
    FileStatus[] result = fs.listStatus(path);
    if (result == null) {
        result = new FileStatus[0];
    }

    return result;
}

From source file:com.senseidb.util.HDFSIndexCopier.java

License:Apache License

public boolean copy(String src, String dest) {
    try {//from   w w  w  .j  av  a2  s  . co m
        URI srcUri = new URI(src), destUri = new URI(dest);

        Configuration config = new Configuration();
        config.set("fs.default.name", srcUri.resolve("/").toString());

        FileSystem dfs = FileSystem.get(config);
        Path destPath = new Path(destUri.toString());
        FileStatus[] files = dfs.listStatus(new Path(srcUri.toString()));
        if (files == null || files.length == 0)
            return false;

        for (FileStatus f : files) {
            log.info("Copying " + f.getPath().toString());
            dfs.copyToLocalFile(f.getPath(), destPath);
        }

        return true;
    } catch (Exception e) {
        log.error(e.getMessage(), e);
        return false;
    }
}

From source file:com.sirius.hadoop.job.InputUpload.java

License:Apache License

public static void main(String[] args) throws Exception {
    hdfs = new URI("hdfs://hadoop1:8020");
    FileSystem fs = FileSystem.get(hdfs, configuration);

    if (fs.exists(input_path)) {
        fs.delete(input_path, true);//from w  w w  . j  a  v  a  2 s  .  c om
    }

    System.out.println(Arrays.toString(fs.listStatus(new Path("/"))));

    fs.copyFromLocalFile(false, true, new Path("/Users/pippo/Downloads/subscriber_status.statics.input"),
            input_path);
}