Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/*  w w  w  .  java 2s . c o m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU,
        boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new CardinalityException(numCols, other.numRows());
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Transpose Matrix within a new MapReduce Job
    DistributedRowMatrix transposed = this;
    if (transposeMatrixA) {
        transposed = transposed.transpose();
    }
    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    } else { // use GPU
        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    }

    // Multiply Matrix with transposed one
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/* w  ww  .  jav a2s.com*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());

    } else { // use GPU

        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());
    }

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:azkaban.jobtype.hiveutils.azkaban.hive.actions.Utils.java

License:Apache License

static ArrayList<String> fetchDirectories(FileSystem fs, String location, boolean returnFullPath)
        throws IOException, HiveViaAzkabanException {
    LOG.info("Fetching directories in " + location);
    Path p = new Path(location);
    FileStatus[] statuses = fs.listStatus(p);

    if (statuses == null || statuses.length == 0) {
        throw new HiveViaAzkabanException("Couldn't find any directories in " + location);
    }//from   w w  w  .j a va  2  s  . co  m

    ArrayList<String> files = new ArrayList<String>(statuses.length);
    for (FileStatus status : statuses) {
        if (!status.isDir())
            continue;
        if (status.getPath().getName().startsWith("."))
            continue;

        files.add(returnFullPath ? status.getPath().toString() : status.getPath().getName());
    }
    return files;
}

From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java

License:Apache License

@SuppressWarnings("rawtypes")
public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass)
        throws IOException, URISyntaxException {
    JobConf conf = new JobConf();
    // set custom class loader with custom find resource strategy.

    conf.setJobName(getJobName());//from   ww  w.  j  a  v  a 2s.c  o  m
    conf.setMapperClass(mapperClass);
    if (reducerClass != null) {
        conf.setReducerClass(reducerClass);
    }

    if (props.getBoolean("is.local", false)) {
        conf.set("mapred.job.tracker", "local");
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.local.dir", "/tmp/map-red");

        logger.info("Running locally, no hadoop jar set.");
    } else {
        HadoopUtils.setClassLoaderAndJar(conf, getClass());
        logger.info("Setting hadoop jar file for class:" + getClass() + "  to " + conf.getJar());
        logger.info("*************************************************************************");
        logger.info(
                "          Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ")           ");
        logger.info("*************************************************************************");
    }

    // set JVM options if present
    if (props.containsKey("mapred.child.java.opts")) {
        conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts"));
        logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts"));
    }

    // set input and output paths if they are present
    if (props.containsKey("input.paths")) {
        List<String> inputPaths = props.getStringList("input.paths");
        if (inputPaths.size() == 0)
            throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'");
        for (String path : inputPaths) {
            HadoopUtils.addAllSubPaths(conf, new Path(path));
        }
    }

    if (props.containsKey("output.path")) {
        String location = props.get("output.path");
        FileOutputFormat.setOutputPath(conf, new Path(location));

        // For testing purpose only remove output file if exists
        if (props.getBoolean("force.output.overwrite", false)) {
            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
            fs.delete(FileOutputFormat.getOutputPath(conf), true);
        }
    }

    // Adds External jars to hadoop classpath
    String externalJarList = props.getString("hadoop.external.jarFiles", null);
    if (externalJarList != null) {
        FileSystem fs = FileSystem.get(conf);
        String[] jarFiles = externalJarList.split(",");
        for (String jarFile : jarFiles) {
            logger.info("Adding extenral jar File:" + jarFile);
            DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs);
        }
    }

    // Adds distributed cache files
    String cacheFileList = props.getString("hadoop.cache.files", null);
    if (cacheFileList != null) {
        String[] cacheFiles = cacheFileList.split(",");
        for (String cacheFile : cacheFiles) {
            logger.info("Adding Distributed Cache File:" + cacheFile);
            DistributedCache.addCacheFile(new URI(cacheFile), conf);
        }
    }

    // Adds distributed cache files
    String archiveFileList = props.getString("hadoop.cache.archives", null);
    if (archiveFileList != null) {
        String[] archiveFiles = archiveFileList.split(",");
        for (String archiveFile : archiveFiles) {
            logger.info("Adding Distributed Cache Archive File:" + archiveFile);
            DistributedCache.addCacheArchive(new URI(archiveFile), conf);
        }
    }

    String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null);
    if (hadoopCacheJarDir != null) {
        FileSystem fs = FileSystem.get(conf);
        if (fs != null) {
            FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));

            if (status != null) {
                for (int i = 0; i < status.length; ++i) {
                    if (!status[i].isDir()) {
                        Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName());
                        logger.info("Adding Jar to Distributed Cache Archive File:" + path);

                        DistributedCache.addFileToClassPath(path, conf, fs);
                    }
                }
            } else {
                logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty.");
            }
        } else {
            logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist");
        }
    }

    for (String key : getProps().getKeySet()) {
        String lowerCase = key.toLowerCase();
        if (lowerCase.startsWith(HADOOP_PREFIX)) {
            String newKey = key.substring(HADOOP_PREFIX.length());
            conf.set(newKey, getProps().get(key));
        }
    }

    HadoopUtils.setPropsInJob(conf, getProps());

    // put in tokens
    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
        conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
    }

    return conf;
}

From source file:azkaban.jobtype.javautils.HadoopUtils.java

License:Apache License

public static JobConf addAllSubPaths(JobConf conf, Path path) throws IOException {
    if (shouldPathBeIgnored(path)) {
        throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path));
    }//  ww w . j  a va  2  s.  c  o m

    final FileSystem fs = path.getFileSystem(conf);

    if (fs.exists(path)) {
        for (FileStatus status : fs.listStatus(path)) {
            if (!shouldPathBeIgnored(status.getPath())) {
                if (status.isDir()) {
                    addAllSubPaths(conf, status.getPath());
                } else {
                    FileInputFormat.addInputPath(conf, status.getPath());
                }
            }
        }
    }
    return conf;
}

From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java

License:Apache License

private void displayDirPage(FileSystem fs, String user, HttpServletRequest req, HttpServletResponse resp,
        Session session, Path path) throws IOException {

    Page page = newPage(req, resp, session, "azkaban/viewer/hdfs/velocity/hdfs-browser.vm");
    page.add("allowproxy", allowGroupProxy);
    page.add("viewerPath", viewerPath);
    page.add("viewerName", viewerName);

    List<Path> paths = new ArrayList<Path>();
    List<String> segments = new ArrayList<String>();
    getPathSegments(path, paths, segments);
    page.add("paths", paths);
    page.add("segments", segments);
    page.add("user", user);
    page.add("homedir", getHomeDir(fs));

    try {/*from  w ww .j av  a 2 s  .  c  o  m*/
        FileStatus[] subdirs = fs.listStatus(path);
        page.add("subdirs", subdirs);
        long size = 0;
        for (int i = 0; i < subdirs.length; ++i) {
            if (subdirs[i].isDir()) {
                continue;
            }
            size += subdirs[i].getLen();
        }
        page.add("dirsize", size);
    } catch (AccessControlException e) {
        page.add("error_message", "Permission denied: " + e.getMessage());
        page.add("no_fs", "true");
    } catch (IOException e) {
        page.add("error_message", "Error: " + e.getMessage());
    }
    page.render();
}

From source file:bb.BranchAndBound.java

License:Apache License

static Job getJob(String input, String output, String dataDir, int iteration) throws Exception {
    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);
    FileStatus[] fileStatus = hdfs.listStatus(new Path(input));
    for (int i = 0; i < fileStatus.length; ++i) {
        if (fileStatus[i].getLen() == 0) {
            hdfs.delete(fileStatus[i].getPath());
        }//from   w w w. j  ava2s.c  o  m
    }
    DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf);
    Job ret = new Job(conf, dataDir + "_iteration_" + iteration);
    ret.setJarByClass(BranchAndBound.class);
    ret.setMapperClass(BBMapper1.class);
    ret.setReducerClass(BBReducer.class);
    //ret.setReducerClass(MergeReducer.class);
    FileInputFormat.setInputPaths(ret, new Path(input));
    //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864);
    FileOutputFormat.setOutputPath(ret, new Path(output));
    ret.setOutputKeyClass(NullWritable.class);
    ret.setOutputValueClass(Text.class);
    return ret;
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java

License:Apache License

private void getPgStartIndex(Configuration conf) {
    try {// w  w w .j  a  v a  2s .  c o  m
        Path path = new Path(basePGDir);
        FileSystem fs = path.getFileSystem(new Configuration());

        if (!fs.exists(path)) {
            pgStartIndex = 0;
            return;
        }

        int largestIx = 0;
        for (FileStatus file : fs.listStatus(path)) {
            String tmp = file.getPath().toString();
            if (!tmp.contains("bucket")) {
                continue;
            }
            tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length());
            int ix = Integer.parseInt(tmp.split("-")[1]);
            largestIx = Math.max(largestIx, ix);
            pgStartIndex += 1;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    try {/*w ww  .j a v  a 2 s . c o m*/
        // read header from input
        outpFormat = new KeyIgnoringVCFOutputFormat(VCFFormat.VCF);
        String input = HalvadeConf.getInputDir(context.getConfiguration());
        String output = HalvadeConf.getOutDir(context.getConfiguration());
        reportBest = HalvadeConf.getReportAllVariant(context.getConfiguration());
        FileSystem fs = FileSystem.get(new URI(input), context.getConfiguration());
        Path firstVcfFile = null;
        if (fs.getFileStatus(new Path(input)).isDirectory()) {
            // get first file
            FileStatus[] files = fs.listStatus(new Path(input));
            int i = 0, l = files.length;
            while (i < l && !files[i].getPath().getName().endsWith(".vcf")) {
                i++;
            }
            if (i < l) {
                firstVcfFile = files[i].getPath();
            } else {
                throw new InterruptedException("VCFCombineReducer: No files in input folder.");
            }
        } else {
            throw new InterruptedException("VCFCombineReducer: Input directory is not a directory.");
        }
        Logger.DEBUG("first file: " + firstVcfFile);
        outpFormat.readHeaderFrom(firstVcfFile, fs);
        recordWriter = outpFormat.getRecordWriter(context, new Path(output + "HalvadeCombined.vcf"));
    } catch (URISyntaxException ex) {
        Logger.EXCEPTION(ex);
        throw new InterruptedException("URI for input directory is invalid.");
    }
}

From source file:be.ugent.intec.halvade.HalvadeOptions.java

License:Open Source License

protected double getInputSize(String input, Configuration conf) throws URISyntaxException, IOException {
    double size = 0;
    FileSystem fs = FileSystem.get(new URI(input), conf);
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        // add every file in directory
        FileStatus[] files = fs.listStatus(new Path(input));
        for (FileStatus file : files) {
            if (!file.isDirectory()) {
                size += file.getLen();//from w w w.  j a  v  a2  s.  com
            }
        }
    } else {
        size += fs.getFileStatus(new Path(input)).getLen();
    }
    return (size / (1024 * 1024 * 1024));
}