Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.cloudera.sqoop.util.AppendUtils.java

License:Apache License

/**
 * Move files from source to target using a specified starting partition.
 *///from   w ww  .  j a  v a  2 s .c  o  m
private void moveFiles(FileSystem fs, Path sourceDir, Path targetDir, int partitionStart) throws IOException {

    NumberFormat numpart = NumberFormat.getInstance();
    numpart.setMinimumIntegerDigits(PARTITION_DIGITS);
    numpart.setGroupingUsed(false);
    Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
    FileStatus[] tempFiles = fs.listStatus(sourceDir);

    if (null == tempFiles) {
        // If we've already checked that the dir exists, and now it can't be
        // listed, this is a genuine error (permissions, fs integrity, or other).
        throw new IOException("Could not list files from " + sourceDir);
    }

    // Move and rename files & directories from temporary to target-dir thus
    // appending file's next partition
    for (FileStatus fileStat : tempFiles) {
        if (!fileStat.isDir()) {
            // Move imported data files
            String filename = fileStat.getPath().getName();
            Matcher mat = patt.matcher(filename);
            if (mat.matches()) {
                String name = getFilename(filename);
                String fileToMove = name.concat(numpart.format(partitionStart++));
                String extension = getFileExtension(filename);
                if (extension != null) {
                    fileToMove = fileToMove.concat(extension);
                }
                LOG.debug("Filename: " + filename + " repartitioned to: " + fileToMove);
                fs.rename(fileStat.getPath(), new Path(targetDir, fileToMove));
            }
        } else {
            // Move directories (_logs & any other)
            String dirName = fileStat.getPath().getName();
            Path path = new Path(targetDir, dirName);
            int dirNumber = 0;
            while (fs.exists(path)) {
                path = new Path(targetDir, dirName.concat("-").concat(numpart.format(dirNumber++)));
            }
            LOG.debug("Directory: " + dirName + " renamed to: " + path.getName());
            fs.rename(fileStat.getPath(), path);
        }
    }
}

From source file:com.conversantmedia.mapreduce.tool.BaseTool.java

License:Apache License

protected List<FileStatus> getInputFiles(Path input) throws IOException {
    FileSystem fs = FileSystem.get(getConf());
    List<FileStatus> status = new ArrayList<>();
    if (fs.exists(input)) {
        FileStatus inputStatus = fs.getFileStatus(input);
        if (inputStatus.isDirectory()) {
            // Move all files under this directory
            status = Arrays.asList(fs.listStatus(input));
        } else {/*w w w.  j ava2s  .  c  om*/
            status.add(inputStatus);
        }
    }
    // Must be a glob path
    else {
        FileStatus[] statusAry = fs.globStatus(input);
        status.addAll(Arrays.asList(statusAry));
    }
    return status;
}

From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java

License:Apache License

public static void createTables(String database, TajoClient client) throws Exception {
    String dataDir = getDataDir();
    if (dataDir == null || dataDir.isEmpty()) {
        throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>");
    }/*from   w ww .  j  a v  a  2s.c o  m*/

    if (dataDir.startsWith("hdfs://")) {
        Path path = new Path(dataDir);
        FileSystem fs = path.getFileSystem(new Configuration());
        for (String eachTable : tableNames) {
            Path tableDataDir = new Path(path, eachTable);
            if (!fs.exists(tableDataDir)) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    } else {
        File dataDirFile = new File(dataDir);
        if (!dataDirFile.exists()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists.");
        }
        if (dataDirFile.isFile()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory.");
        }

        for (String eachTable : tableNames) {
            File tableDataDir = new File(dataDirFile, eachTable);
            if (!tableDataDir.exists()) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    }

    KeyValueSet opt = new KeyValueSet();
    opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

    LOG.info("Create database: " + database);
    client.executeQuery("create database if not exists " + database);

    Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString());

    Path ddlPath = new Path(tpcdsResourceURL, "ddl");
    FileSystem localFs = FileSystem.getLocal(new Configuration());

    FileStatus[] files = localFs.listStatus(ddlPath);

    String dataDirWithPrefix = dataDir;
    if (dataDir.indexOf("://") < 0) {
        dataDirWithPrefix = "file://" + dataDir;
    }

    for (FileStatus eachFile : files) {
        if (eachFile.isFile()) {
            String tableName = eachFile.getPath().getName().split("\\.")[0];
            String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri()));
            query = query.replace("${DB}", database);
            query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName);

            LOG.info("Create table:" + tableName + "," + query);
            client.executeQuery(query);
        }
    }
}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Given a file post-fix, locate a file in the DistributedCache. It iterates over all the local files and returns the
 * first one that meets this condition.//from   w w w . ja v a2 s  .  c o  m
 * 
 * @param conf
 *          The Hadoop Configuration.
 * @param filePostFix
 *          The file post-fix.
 * @throws IOException
 */
public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException {
    FileSystem fS = FileSystem.get(conf);
    Path locatedFile = null;

    if (fS.equals(FileSystem.getLocal(conf))) {
        // We use the File Java API in local because the Hadoop Path, FileSystem, etc is too slow for tests that
        // need to call this method a lot
        File tmpFolder = new File(conf.get("hadoop.tmp.dir"));
        for (File file : tmpFolder.listFiles()) {
            if (file.getName().endsWith(filePostFix)) {
                locatedFile = new Path(file.toString());
                break;
            }
        }
    } else {
        Path tmpHdfsFolder = new Path(conf.get(HDFS_TMP_FOLDER_CONF, conf.get("hadoop.tmp.dir")));
        for (FileStatus fSt : fS.listStatus(tmpHdfsFolder)) {
            Path path = fSt.getPath();
            if (path.toString().endsWith(filePostFix)) {
                locatedFile = path;
                break;
            }
        }
    }

    return locatedFile;
}

From source file:com.datasalt.utils.commons.BaseConfigurationFactory.java

License:Apache License

private void populate(Configuration conf) throws IOException {
    FileSystem dFs = FileSystem.get(conf);
    if (conf.get("fs.default.name").startsWith("file:")) {
        return;//www. java 2  s  .  c  om
    }

    //String projectName = projectConf.getProjectName();
    Path libPath = new Path("/" + projectName + "/lib");
    Path confPath = new Path("/" + projectName + "/conf");
    /*
     * Add config folder to classpath
     */
    log.info("Adding HDFS Path " + confPath + " to classpath");
    DistributedCache.addFileToClassPath(confPath, conf);
    FileStatus[] libraries = dFs.listStatus(libPath);
    log.info("Adding all files in " + libPath + " to classpath");
    for (FileStatus library : libraries) {
        /*
         * Add each JAR to classpath - tiene su truco! =|
         */
        DistributedCache.addFileToClassPath(new Path(libPath, library.getPath().getName()), conf);
    }
}

From source file:com.datatorrent.lib.io.IdempotentStorageManagerTest.java

License:Open Source License

@Test
public void testDelete() throws IOException {
    Map<Integer, String> dataOf1 = Maps.newHashMap();
    dataOf1.put(1, "one");
    dataOf1.put(2, "two");
    dataOf1.put(3, "three");

    Map<Integer, String> dataOf2 = Maps.newHashMap();
    dataOf2.put(4, "four");
    dataOf2.put(5, "five");
    dataOf2.put(6, "six");

    Map<Integer, String> dataOf3 = Maps.newHashMap();
    dataOf2.put(7, "seven");
    dataOf2.put(8, "eight");
    dataOf2.put(9, "nine");

    testMeta.storageManager.save(dataOf1, 1, 1);
    testMeta.storageManager.save(dataOf2, 2, 1);
    testMeta.storageManager.save(dataOf3, 3, 1);

    testMeta.storageManager.partitioned(Lists.<IdempotentStorageManager>newArrayList(testMeta.storageManager),
            Sets.newHashSet(2, 3));//from w ww  . ja v a  2 s  . c  o m
    testMeta.storageManager.setup(testMeta.context);
    testMeta.storageManager.deleteUpTo(1, 1);

    Path appPath = new Path(testMeta.recoveryPath + '/' + testMeta.context.getValue(DAG.APPLICATION_ID));
    FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration());
    Assert.assertEquals("no data for 1", 0, fs.listStatus(new Path(appPath, Integer.toString(1))).length);
    Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2))));
    Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3))));
}

From source file:com.datatorrent.lib.util.FSWindowDataManagerTest.java

License:Apache License

@Test
public void testDelete() throws IOException {
    testMeta.storageManager.setup(testMeta.context);
    Map<Integer, String> dataOf1 = Maps.newHashMap();
    dataOf1.put(1, "one");
    dataOf1.put(2, "two");
    dataOf1.put(3, "three");

    Map<Integer, String> dataOf2 = Maps.newHashMap();
    dataOf2.put(4, "four");
    dataOf2.put(5, "five");
    dataOf2.put(6, "six");

    Map<Integer, String> dataOf3 = Maps.newHashMap();
    dataOf2.put(7, "seven");
    dataOf2.put(8, "eight");
    dataOf2.put(9, "nine");

    for (int i = 1; i <= 9; ++i) {
        testMeta.storageManager.save(dataOf1, 1, i);
    }/*w ww .  j  av a  2 s. c  o m*/

    testMeta.storageManager.save(dataOf2, 2, 1);
    testMeta.storageManager.save(dataOf3, 3, 1);

    testMeta.storageManager.partitioned(Lists.<WindowDataManager>newArrayList(testMeta.storageManager),
            Sets.newHashSet(2, 3));
    testMeta.storageManager.setup(testMeta.context);
    testMeta.storageManager.deleteUpTo(1, 6);

    Path appPath = new Path(testMeta.applicationPath + '/' + testMeta.storageManager.getRecoveryPath());
    FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration());
    FileStatus[] fileStatuses = fs.listStatus(new Path(appPath, Integer.toString(1)));
    Assert.assertEquals("number of windows for 1", 3, fileStatuses.length);
    TreeSet<String> windows = Sets.newTreeSet();
    for (FileStatus fileStatus : fileStatuses) {
        windows.add(fileStatus.getPath().getName());
    }
    Assert.assertEquals("window list for 1", Sets.newTreeSet(Arrays.asList("7", "8", "9")), windows);
    Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2))));
    Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3))));
    testMeta.storageManager.teardown();
}

From source file:com.datatorrent.lib.util.WindowDataManagerTest.java

License:Apache License

@Test
public void testDelete() throws IOException {
    Map<Integer, String> dataOf1 = Maps.newHashMap();
    dataOf1.put(1, "one");
    dataOf1.put(2, "two");
    dataOf1.put(3, "three");

    Map<Integer, String> dataOf2 = Maps.newHashMap();
    dataOf2.put(4, "four");
    dataOf2.put(5, "five");
    dataOf2.put(6, "six");

    Map<Integer, String> dataOf3 = Maps.newHashMap();
    dataOf2.put(7, "seven");
    dataOf2.put(8, "eight");
    dataOf2.put(9, "nine");

    for (int i = 1; i <= 9; ++i) {
        testMeta.storageManager.save(dataOf1, 1, i);
    }//w w w  .  jav a  2 s .c o  m

    testMeta.storageManager.save(dataOf2, 2, 1);
    testMeta.storageManager.save(dataOf3, 3, 1);

    testMeta.storageManager.partitioned(Lists.<WindowDataManager>newArrayList(testMeta.storageManager),
            Sets.newHashSet(2, 3));
    testMeta.storageManager.setup(testMeta.context);
    testMeta.storageManager.deleteUpTo(1, 6);

    Path appPath = new Path(testMeta.applicationPath + '/' + testMeta.storageManager.getRecoveryPath());
    FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration());
    FileStatus[] fileStatuses = fs.listStatus(new Path(appPath, Integer.toString(1)));
    Assert.assertEquals("number of windows for 1", 3, fileStatuses.length);
    TreeSet<String> windows = Sets.newTreeSet();
    for (FileStatus fileStatus : fileStatuses) {
        windows.add(fileStatus.getPath().getName());
    }
    Assert.assertEquals("window list for 1", Sets.newLinkedHashSet(Arrays.asList("7", "8", "9")), windows);
    Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2))));
    Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3))));
}

From source file:com.datatorrent.stram.StramClient.java

License:Apache License

public void copyInitialState(Path origAppDir) throws IOException {
    // locate previous snapshot
    String newAppDir = this.dag.assertAppPath();

    FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
    // read snapshot against new dependencies
    Object snapshot = recoveryHandler.restore();
    if (snapshot == null) {
        throw new IllegalArgumentException("No previous application state found in " + origAppDir);
    }//from  www .j a v a 2s .c om
    InputStream logIs = recoveryHandler.getLog();

    // modify snapshot state to switch app id
    ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf);
    Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

    FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
    // remove the path that was created by the storage agent during deserialization and replacement
    fs.delete(checkpointPath, true);

    // write snapshot to new location
    recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
    recoveryHandler.save(snapshot);
    OutputStream logOs = recoveryHandler.rotateLog();
    IOUtils.copy(logIs, logOs);
    logOs.flush();
    logOs.close();
    logIs.close();

    // copy sub directories that are not present in target
    FileStatus[] lFiles = fs.listStatus(origAppDir);
    for (FileStatus f : lFiles) {
        if (f.isDirectory()) {
            String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir);
            if (!fs.exists(new Path(targetPath))) {
                LOG.debug("Copying {} to {}", f.getPath(), targetPath);
                FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
                //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf);
            } else {
                LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
                //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777));
            }
        }
    }

}

From source file:com.datatorrent.stram.util.FSUtil.java

License:Apache License

/**
 * Copied from FileUtil to transfer ownership
 *
 * @param srcFS//from   w  w  w  .j  a  v a2 s.  c o  m
 * @param srcStatus
 * @param dstFS
 * @param dst
 * @param deleteSource
 * @param overwrite
 * @param conf
 * @return
 * @throws IOException
 */
public static boolean copy(FileSystem srcFS, FileStatus srcStatus, FileSystem dstFS, Path dst,
        boolean deleteSource, boolean overwrite, Configuration conf) throws IOException {
    Path src = srcStatus.getPath();
    //dst = checkDest(src.getName(), dstFS, dst, overwrite);
    if (srcStatus.isDirectory()) {
        //checkDependencies(srcFS, src, dstFS, dst);
        if (!mkdirs(dstFS, dst)) {
            return false;
        }

        FileStatus contents[] = srcFS.listStatus(src);
        for (int i = 0; i < contents.length; i++) {
            copy(srcFS, contents[i], dstFS, new Path(dst, contents[i].getPath().getName()), deleteSource,
                    overwrite, conf);
        }
    } else {
        InputStream in = null;
        OutputStream out = null;
        try {
            in = srcFS.open(src);
            out = dstFS.create(dst, overwrite);
            org.apache.hadoop.io.IOUtils.copyBytes(in, out, conf, true);
        } catch (IOException e) {
            org.apache.hadoop.io.IOUtils.closeStream(out);
            org.apache.hadoop.io.IOUtils.closeStream(in);
            throw e;
        }
    }

    // TODO: change group and limit write to group
    if (srcStatus.isDirectory()) {
        dstFS.setPermission(dst, new FsPermission((short) 0777));
    } else {
        dstFS.setPermission(dst, new FsPermission((short) 0777)/*"ugo+w"*/);
    }
    //dstFS.setOwner(dst, null, srcStatus.getGroup());

    /*
        try {
          // transfer owner
          // DOES NOT WORK only super user can change file owner
          dstFS.setOwner(dst, srcStatus.getOwner(), srcStatus.getGroup());
        } catch (IOException e) {
          LOG.warn("Failed to change owner on {} to {}", dst, srcStatus.getOwner(), e);
          throw e;
        }
    */
    if (deleteSource) {
        return srcFS.delete(src, true);
    } else {
        return true;
    }

}