Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:edu.ncku.ikdd.DataMining.java

public static void main(String[] argv) throws Exception {
    int candidateLength = 1;
    FileSystem dfs = FileSystem.get(new Configuration());
    do {/*from   w w w . j a  va  2s.c  o m*/
        JobConf countConf = new JobConf(DataMining.class);

        countConf.setOutputKeyClass(Text.class);
        countConf.setOutputValueClass(IntWritable.class);

        countConf.setMapperClass(CountMap.class);
        countConf.setCombinerClass(CountCombine.class);
        countConf.setReducerClass(CountReduce.class);

        countConf.setInputFormat(TextInputFormat.class);
        countConf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(countConf, new Path(argv[0]));
        FileOutputFormat.setOutputPath(countConf, new Path(count_path + String.valueOf(candidateLength)));
        countConf.setInt("minSupport", Integer.valueOf(argv[2]));
        countConf.setInt("candidateLength", candidateLength);
        JobClient.runJob(countConf);

        ++candidateLength;

        JobConf candidateConf = new JobConf(DataMining.class);

        candidateConf.setOutputKeyClass(Text.class);
        candidateConf.setOutputValueClass(Text.class);

        candidateConf.setMapperClass(CandidateMap.class);
        candidateConf.setReducerClass(CandidateReduce.class);

        candidateConf.setInputFormat(TextInputFormat.class);
        candidateConf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(candidateConf,
                new Path(count_path + String.valueOf(candidateLength - 1) + "/part-00000"));
        FileOutputFormat.setOutputPath(candidateConf,
                new Path(candidate_path + String.valueOf(candidateLength)));
        candidateConf.setInt("candidateLength", candidateLength);

        JobClient.runJob(candidateConf);

    } while (dfs.getFileStatus(new Path(candidate_path + String.valueOf(candidateLength) + "/part-00000"))
            .getLen() > 0);

    BufferedReader br;
    BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(dfs.create(new Path(argv[1] + "/part-00000"))));
    String line;
    for (int i = 1; i < candidateLength; ++i) {
        br = new BufferedReader(
                new InputStreamReader(dfs.open(new Path(count_path + String.valueOf(i) + "/part-00000"))));
        while ((line = br.readLine()) != null) {
            bw.write(line + "\n");
        }
        br.close();
    }
    bw.close();
}

From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java

License:Apache License

public static FileStatus[] listInputFiles(FileSystem fs, Path path) throws IOException {
    if (!fs.isDirectory(path))
        return new FileStatus[] { fs.getFileStatus(path) };
    else {/*from   w w w.j  a  v  a  2  s  .  com*/
        // Get all files in directory that are not directories or hidden files

        final FileSystem fsFinal = fs;
        PathFilter filter = new PathFilter() {
            public boolean accept(Path p) {
                try {
                    return !(fsFinal.isDirectory(p) || p.getName().startsWith(".")
                            || p.getName().startsWith("_"));
                } catch (IOException ex) {
                    throw new RuntimeException("Error filtering files.", ex);
                }
            }
        };

        return fs.listStatus(path, filter);
    }
}

From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java

License:Apache License

/**
 * Sets up the parameters for the Asterix config.
 * //from  ww  w  .  j a va2 s.c  o  m
 * @throws IOException
 */
private void distributeAsterixConfig() throws IOException {
    FileSystem fs = FileSystem.get(conf);
    String pathSuffix = instanceConfPath + File.separator + ASTERIX_CONF_NAME;
    Path dst = new Path(dfsBasePath, pathSuffix);
    URI paramLocation = dst.toUri();
    FileStatus paramFileStatus = fs.getFileStatus(dst);
    Long paramLen = paramFileStatus.getLen();
    Long paramTimestamp = paramFileStatus.getModificationTime();
    LocalResource asterixParamLoc = Records.newRecord(LocalResource.class);
    asterixParamLoc.setType(LocalResourceType.FILE);
    asterixParamLoc.setVisibility(LocalResourceVisibility.PRIVATE);
    asterixParamLoc.setResource(ConverterUtils.getYarnUrlFromURI(paramLocation));
    asterixParamLoc.setTimestamp(paramTimestamp);
    asterixParamLoc.setSize(paramLen);
    localResources.put(ASTERIX_CONF_NAME, asterixParamLoc);

}

From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java

License:Apache License

/**
 * Here I am just pointing the Containers to the exisiting HDFS resources given by the Client
 * filesystem of the nodes.//from w w w.  ja  v  a  2  s. c om
 * 
 * @throws IOException
 */
private void localizeDFSResources() throws IOException {
    //if performing an 'offline' task, skip a lot of resource distribution
    if (obliterate || backup || restore) {
        if (appMasterJar == null || ("").equals(appMasterJar)) {
            //this can happen in a jUnit testing environment. we don't need to set it there. 
            if (!conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
                throw new IllegalStateException("AM jar not provided in environment.");
            } else {
                return;
            }
        }
        FileSystem fs = FileSystem.get(conf);
        FileStatus appMasterJarStatus = fs.getFileStatus(appMasterJar);
        LocalResource obliteratorJar = Records.newRecord(LocalResource.class);
        obliteratorJar.setType(LocalResourceType.FILE);
        obliteratorJar.setVisibility(LocalResourceVisibility.PRIVATE);
        obliteratorJar.setResource(ConverterUtils.getYarnUrlFromPath(appMasterJar));
        obliteratorJar.setTimestamp(appMasterJarStatus.getModificationTime());
        obliteratorJar.setSize(appMasterJarStatus.getLen());
        localResources.put("asterix-yarn.jar", obliteratorJar);
        LOG.info(localResources.values());
        return;
    }
    //otherwise, distribute evertything to start up asterix

    LocalResource asterixZip = Records.newRecord(LocalResource.class);

    //this un-tar's the asterix distribution
    asterixZip.setType(LocalResourceType.ARCHIVE);

    asterixZip.setVisibility(LocalResourceVisibility.PRIVATE);
    try {
        asterixZip.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixZipPath)));

    } catch (URISyntaxException e) {
        LOG.error("Error locating Asterix zip" + " in env, path=" + asterixZipPath);
        throw new IOException(e);
    }

    asterixZip.setTimestamp(asterixZipTimestamp);
    asterixZip.setSize(asterixZipLen);
    localResources.put(ASTERIX_ZIP_NAME, asterixZip);

    //now let's do the same for the cluster description XML
    LocalResource asterixConf = Records.newRecord(LocalResource.class);
    asterixConf.setType(LocalResourceType.FILE);

    asterixConf.setVisibility(LocalResourceVisibility.PRIVATE);
    try {
        asterixConf.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixConfPath)));
    } catch (URISyntaxException e) {
        LOG.error("Error locating Asterix config" + " in env, path=" + asterixConfPath);
        throw new IOException(e);
    }
    //TODO: I could avoid localizing this everywhere by only calling this block on the metadata node. 
    asterixConf.setTimestamp(asterixConfTimestamp);
    asterixConf.setSize(asterixConfLen);
    localResources.put("cluster-config.xml", asterixConf);
    //now add the libraries if there are any
    try {
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path(dfsBasePath, instanceConfPath + File.separator + "library" + Path.SEPARATOR);
        if (fs.exists(p)) {
            FileStatus[] dataverses = fs.listStatus(p);
            for (FileStatus d : dataverses) {
                if (!d.isDirectory())
                    throw new IOException("Library configuration directory structure is incorrect");
                FileStatus[] libraries = fs.listStatus(d.getPath());
                for (FileStatus l : libraries) {
                    if (l.isDirectory())
                        throw new IOException("Library configuration directory structure is incorrect");
                    LocalResource lr = Records.newRecord(LocalResource.class);
                    lr.setResource(ConverterUtils.getYarnUrlFromURI(l.getPath().toUri()));
                    lr.setSize(l.getLen());
                    lr.setTimestamp(l.getModificationTime());
                    lr.setType(LocalResourceType.ARCHIVE);
                    lr.setVisibility(LocalResourceVisibility.PRIVATE);
                    localResources.put("library" + Path.SEPARATOR + d.getPath().getName() + Path.SEPARATOR
                            + l.getPath().getName().split("\\.")[0], lr);
                    LOG.info("Found library: " + l.getPath().toString());
                    LOG.info(l.getPath().getName());
                }
            }
        }
    } catch (FileNotFoundException e) {
        LOG.info("No external libraries present");
        //do nothing, it just means there aren't libraries. that is possible and ok
        // it should be handled by the fs.exists(p) check though.
    }
    LOG.info(localResources.values());

}

From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java

License:Apache License

/**
 * Upload the Asterix cluster description on to the DFS. This will persist the state of the instance.
 * /*from w ww  .  ja  va 2s  . com*/
 * @return
 * @throws YarnException
 * @throws IOException
 */
private List<DFSResourceCoordinate> deployConfig() throws YarnException, IOException {

    FileSystem fs = FileSystem.get(conf);
    List<DFSResourceCoordinate> resources = new ArrayList<DFSResourceCoordinate>(2);

    String pathSuffix = CONF_DIR_REL + instanceFolder + CONFIG_DEFAULT_NAME;
    Path dstConf = new Path(fs.getHomeDirectory(), pathSuffix);
    FileStatus destStatus;
    try {
        destStatus = fs.getFileStatus(dstConf);
    } catch (IOException e) {
        throw new YarnException("Asterix instance by that name does not appear to exist in DFS");
    }
    LocalResource asterixConfLoc = Records.newRecord(LocalResource.class);
    asterixConfLoc.setType(LocalResourceType.FILE);
    asterixConfLoc.setVisibility(LocalResourceVisibility.PRIVATE);
    asterixConfLoc.setResource(ConverterUtils.getYarnUrlFromPath(dstConf));
    asterixConfLoc.setTimestamp(destStatus.getModificationTime());

    DFSResourceCoordinate conf = new DFSResourceCoordinate();
    conf.envs.put(dstConf.toUri().toString(), AConstants.CONFLOCATION);
    conf.envs.put(Long.toString(asterixConfLoc.getSize()), AConstants.CONFLEN);
    conf.envs.put(Long.toString(asterixConfLoc.getTimestamp()), AConstants.CONFTIMESTAMP);
    conf.name = CONFIG_DEFAULT_NAME;
    conf.res = asterixConfLoc;
    resources.add(conf);

    return resources;

}

From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java

License:Apache License

/**
 * Install the current Asterix parameters to the DFS. This can be modified via alter.
 * //  ww  w .  j a v a 2s  . c o  m
 * @throws YarnException
 * @throws IOException
 */
private void installConfig() throws YarnException, IOException {
    FileSystem fs = FileSystem.get(conf);
    String pathSuffix = CONF_DIR_REL + instanceFolder + CONFIG_DEFAULT_NAME;
    Path dstConf = new Path(fs.getHomeDirectory(), pathSuffix);
    try {
        fs.getFileStatus(dstConf);
        if (mode == Mode.INSTALL) {
            throw new IllegalStateException("Instance with this name already exists.");
        }
    } catch (FileNotFoundException e) {
        if (mode == Mode.START) {
            throw new IllegalStateException("Instance does not exist for this user", e);
        }
    }
    if (mode == Mode.INSTALL) {
        Path src = new Path(asterixConf);
        fs.copyFromLocalFile(false, true, src, dstConf);
    }

}

From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java

License:Apache License

/**
 * Finds the minimal classes and JARs needed to start the AM only.
 * @return Resources the AM needs to start on the initial container.
 * @throws IllegalStateException/*  w  ww  .j a  v  a2  s  . c  o m*/
 * @throws IOException
 */
private List<DFSResourceCoordinate> installAmLibs() throws IllegalStateException, IOException {
    List<DFSResourceCoordinate> resources = new ArrayList<DFSResourceCoordinate>(2);
    FileSystem fs = FileSystem.get(conf);
    String fullLibPath = CONF_DIR_REL + instanceFolder + "am_jars" + Path.SEPARATOR;
    String[] cp = System.getProperty("java.class.path").split(System.getProperty("path.separator"));
    String asterixJarPattern = "^(asterix).*(jar)$"; //starts with asterix,ends with jar
    String commonsJarPattern = "^(commons).*(jar)$";
    String surefireJarPattern = "^(surefire).*(jar)$"; //for maven tests
    String jUnitTestPattern = "^(asterix-yarn" + File.separator + "target)$";

    LOG.info(File.separator);
    for (String j : cp) {
        String[] pathComponents = j.split(Pattern.quote(File.separator));
        LOG.info(j);
        LOG.info(pathComponents[pathComponents.length - 1]);
        if (pathComponents[pathComponents.length - 1].matches(asterixJarPattern)
                || pathComponents[pathComponents.length - 1].matches(commonsJarPattern)
                || pathComponents[pathComponents.length - 1].matches(surefireJarPattern)
                || pathComponents[pathComponents.length - 1].matches(jUnitTestPattern)) {
            LOG.info("Loading JAR/classpath: " + j);
            File f = new File(j);
            Path dst = new Path(fs.getHomeDirectory(), fullLibPath + f.getName());
            if (!fs.exists(dst) || refresh) {
                fs.copyFromLocalFile(false, true, new Path(f.getAbsolutePath()), dst);
            }
            FileStatus dstSt = fs.getFileStatus(dst);
            LocalResource amLib = Records.newRecord(LocalResource.class);
            amLib.setType(LocalResourceType.FILE);
            amLib.setVisibility(LocalResourceVisibility.PRIVATE);
            amLib.setResource(ConverterUtils.getYarnUrlFromPath(dst));
            amLib.setTimestamp(dstSt.getModificationTime());
            amLib.setSize(dstSt.getLen());
            DFSResourceCoordinate amLibCoord = new DFSResourceCoordinate();
            amLibCoord.res = amLib;
            amLibCoord.name = f.getName();
            if (f.getName().contains("asterix-yarn") || f.getName().contains("surefire")) {
                amLibCoord.envs.put(dst.toUri().toString(), AConstants.APPLICATIONMASTERJARLOCATION);
                amLibCoord.envs.put(Long.toString(dstSt.getLen()), AConstants.APPLICATIONMASTERJARLEN);
                amLibCoord.envs.put(Long.toString(dstSt.getModificationTime()),
                        AConstants.APPLICATIONMASTERJARTIMESTAMP);
            }
            resources.add(amLibCoord);
        }

    }
    if (resources.size() == 0) {
        throw new IOException("Required JARs are missing. Please check your directory structure");
    }
    return resources;
}

From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java

License:Apache License

/**
 * Uploads binary resources to HDFS for use by the AM
 * @return/*  w  w  w .  j  av  a  2  s.c o  m*/
 * @throws IOException
 * @throws YarnException
 */
public List<DFSResourceCoordinate> distributeBinaries() throws IOException, YarnException {

    List<DFSResourceCoordinate> resources = new ArrayList<DFSResourceCoordinate>(2);
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    Path src, dst;
    FileStatus destStatus;
    String pathSuffix;

    // adding info so we can add the jar to the App master container path

    // Add the asterix tarfile to HDFS for easy distribution
    // Keep it all archived for now so add it as a file...

    pathSuffix = CONF_DIR_REL + instanceFolder + "asterix-server.zip";
    dst = new Path(fs.getHomeDirectory(), pathSuffix);
    if (refresh) {
        if (fs.exists(dst)) {
            fs.delete(dst, false);
        }
    }
    if (!fs.exists(dst)) {
        src = new Path(asterixZip);
        LOG.info("Copying Asterix distributable to DFS");
        fs.copyFromLocalFile(false, true, src, dst);
    }
    destStatus = fs.getFileStatus(dst);
    LocalResource asterixTarLoc = Records.newRecord(LocalResource.class);
    asterixTarLoc.setType(LocalResourceType.ARCHIVE);
    asterixTarLoc.setVisibility(LocalResourceVisibility.PRIVATE);
    asterixTarLoc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    asterixTarLoc.setTimestamp(destStatus.getModificationTime());

    // adding info so we can add the tarball to the App master container path
    DFSResourceCoordinate tar = new DFSResourceCoordinate();
    tar.envs.put(dst.toUri().toString(), AConstants.TARLOCATION);
    tar.envs.put(Long.toString(asterixTarLoc.getSize()), AConstants.TARLEN);
    tar.envs.put(Long.toString(asterixTarLoc.getTimestamp()), AConstants.TARTIMESTAMP);
    tar.res = asterixTarLoc;
    tar.name = "asterix-server.zip";
    resources.add(tar);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        Path log4jSrc = new Path(log4jPropFile);
        Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
        fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
        FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
        LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
        log4jRsrc.setType(LocalResourceType.FILE);
        log4jRsrc.setVisibility(LocalResourceVisibility.PRIVATE);
        log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
        log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
        log4jRsrc.setSize(log4jFileStatus.getLen());
        DFSResourceCoordinate l4j = new DFSResourceCoordinate();
        tar.res = log4jRsrc;
        tar.name = "log4j.properties";
        resources.add(l4j);
    }

    resources.addAll(installAmLibs());
    return resources;
}

From source file:edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory.java

License:Apache License

/**
 * Instead of creating the split using the input format, we do it manually
 * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
 * and the produced splits only cover intersection between current files in hdfs and files stored internally
 * in AsterixDB//from www  .  j  av a  2 s  .  co  m
 * 1. NoOp means appended file
 * 2. AddOp means new file
 * 3. UpdateOp means the delta of a file
 *
 * @return
 * @throws IOException
 */
protected InputSplit[] getSplits(JobConf conf) throws IOException {
    // Create file system object
    FileSystem fs = FileSystem.get(conf);
    ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>();
    ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>();
    // Create files splits
    for (ExternalFile file : files) {
        Path filePath = new Path(file.getFileName());
        FileStatus fileStatus;
        try {
            fileStatus = fs.getFileStatus(filePath);
        } catch (FileNotFoundException e) {
            // file was deleted at some point, skip to next file
            continue;
        }
        if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP
                && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() < file.getSize()) {
                    fileSplits.add(new FileSplit(filePath, block.getOffset(),
                            (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength()
                                    : (file.getSize() - block.getOffset()),
                            block.getHosts()));
                    orderedExternalFiles.add(file);
                }
            }
        } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP
                && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            long oldSize = 0L;
            long newSize = file.getSize();
            for (int i = 0; i < files.size(); i++) {
                if (files.get(i).getFileName() == file.getFileName()
                        && files.get(i).getSize() != file.getSize()) {
                    newSize = files.get(i).getSize();
                    oldSize = file.getSize();
                    break;
                }
            }

            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() + block.getLength() > oldSize) {
                    if (block.getOffset() < newSize) {
                        // Block interact with delta -> Create a split
                        long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
                        long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L
                                : block.getOffset() + block.getLength() - newSize;
                        long splitLength = block.getLength() - startCut - endCut;
                        fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength,
                                block.getHosts()));
                        orderedExternalFiles.add(file);
                    }
                }
            }
        }
    }
    fs.close();
    files = orderedExternalFiles;
    return fileSplits.toArray(new FileSplit[fileSplits.size()]);
}

From source file:edu.umd.cloud9.util.ReadSequenceFile.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 1) {
        System.out.println("args: [path] [max-num-of-records-per-file]");
        System.exit(-1);//from  ww w  . j  a  va  2s.  c  o m
    }

    String f = args[0];

    int max = Integer.MAX_VALUE;
    if (args.length >= 2) {
        max = Integer.parseInt(args[1]);
    }

    Configuration config = new JobConf();
    FileSystem fileSys = FileSystem.get(config);
    Path p = new Path(f);

    if (fileSys.getFileStatus(p).isDir())
        readSequenceFilesInDir(p, max);
    else
        readSequenceFile(p, max);
}