Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:org.apache.tajo.engine.query.TestTablePartitions.java

License:Apache License

@Test
public final void testColumnPartitionedTableByThreeColumnsWithCompression() throws Exception {
    String tableName = CatalogUtil
            .normalizeIdentifier("testColumnPartitionedTableByThreeColumnsWithCompression");
    ResultSet res = executeString("create table " + tableName + " (col4 text) USING csv "
            + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') "
            + "partition by column(col1 int4, col2 int4, col3 float8)");
    res.close();/*w w  w .  j a v a2 s  . c o m*/

    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    res = executeString("insert overwrite into " + tableName
            + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem");
    res.close();
    TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
    if (!testingCluster.isHCatalogStoreRunning()) {
        assertEquals(5, desc.getStats().getNumRows().intValue());
    }

    FileSystem fs = FileSystem.get(conf);
    assertTrue(fs.exists(new Path(desc.getPath())));
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);

    Path path = new Path(desc.getPath());
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));

    for (FileStatus partition1 : fs.listStatus(path)) {
        assertTrue(fs.isDirectory(partition1.getPath()));
        for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
            assertTrue(fs.isDirectory(partition2.getPath()));
            for (FileStatus partition3 : fs.listStatus(partition2.getPath())) {
                assertTrue(fs.isDirectory(partition3.getPath()));
                for (FileStatus file : fs.listStatus(partition3.getPath())) {
                    CompressionCodec codec = factory.getCodec(file.getPath());
                    assertTrue(codec instanceof DeflateCodec);
                }
            }
        }
    }

    res = executeString("select * from " + tableName + " where col2 = 2");

    Map<Double, int[]> resultRows1 = Maps.newHashMap();
    resultRows1.put(45.0d, new int[] { 3, 2 });
    resultRows1.put(38.0d, new int[] { 2, 2 });

    int i = 0;
    while (res.next()) {
        assertEquals(resultRows1.get(res.getDouble(4))[0], res.getInt(2));
        assertEquals(resultRows1.get(res.getDouble(4))[1], res.getInt(3));
        i++;
    }
    res.close();
    assertEquals(2, i);

    Map<Double, int[]> resultRows2 = Maps.newHashMap();
    resultRows2.put(49.0d, new int[] { 3, 3 });
    resultRows2.put(45.0d, new int[] { 3, 2 });
    resultRows2.put(38.0d, new int[] { 2, 2 });

    res = executeString("select * from " + tableName + " where (col1 = 2 or col1 = 3) and col2 >= 2");
    i = 0;
    while (res.next()) {
        assertEquals(resultRows2.get(res.getDouble(4))[0], res.getInt(2));
        assertEquals(resultRows2.get(res.getDouble(4))[1], res.getInt(3));
        i++;
    }

    res.close();
    assertEquals(3, i);
}

From source file:org.apache.tajo.engine.query.TestTablePartitions.java

License:Apache License

@Test
public final void testColumnPartitionedTableNoMatchedPartition() throws Exception {
    String tableName = CatalogUtil.normalizeIdentifier("testColumnPartitionedTableNoMatchedPartition");
    ResultSet res = executeString("create table " + tableName + " (col4 text) USING csv "
            + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') "
            + "partition by column(col1 int4, col2 int4, col3 float8)");
    res.close();/*from w ww.  ja  v  a  2 s.co m*/

    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    res = executeString("insert overwrite into " + tableName
            + " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem");
    res.close();
    TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
    if (!testingCluster.isHCatalogStoreRunning()) {
        assertEquals(5, desc.getStats().getNumRows().intValue());
    }

    FileSystem fs = FileSystem.get(conf);
    assertTrue(fs.exists(new Path(desc.getPath())));
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);

    Path path = new Path(desc.getPath());
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));

    for (FileStatus partition1 : fs.listStatus(path)) {
        assertTrue(fs.isDirectory(partition1.getPath()));
        for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
            assertTrue(fs.isDirectory(partition2.getPath()));
            for (FileStatus partition3 : fs.listStatus(partition2.getPath())) {
                assertTrue(fs.isDirectory(partition3.getPath()));
                for (FileStatus file : fs.listStatus(partition3.getPath())) {
                    CompressionCodec codec = factory.getCodec(file.getPath());
                    assertTrue(codec instanceof DeflateCodec);
                }
            }
        }
    }

    res = executeString("select * from " + tableName + " where col2 = 9");
    assertFalse(res.next());
    res.close();
}

From source file:org.apache.tajo.ha.HdfsServiceTracker.java

License:Apache License

/**
 * Reads a text file stored in HDFS file, and then return all service addresses read from a HDFS file.   *
 *
 * @param conf//  w  w w  . j av a 2s. c o  m
 * @return all service addresses
 * @throws ServiceTrackerException
 */
private static List<String> getAddressElements(TajoConf conf) throws ServiceTrackerException {

    try {
        FileSystem fs = getFileSystem(conf);
        Path activeMasterBaseDir = new Path(TajoConf.getSystemHADir(conf),
                TajoConstants.SYSTEM_HA_ACTIVE_DIR_NAME);

        if (!fs.exists(activeMasterBaseDir)) {
            throw new ServiceTrackerException("No such active master base path: " + activeMasterBaseDir);
        }
        if (!fs.isDirectory(activeMasterBaseDir)) {
            throw new ServiceTrackerException("Active master base path must be a directory.");
        }

        FileStatus[] files = fs.listStatus(activeMasterBaseDir);

        if (files.length < 1) {
            throw new ServiceTrackerException("No active master entry");
        } else if (files.length > 1) {
            throw new ServiceTrackerException("Two or more than active master entries.");
        }

        // We can ensure that there is only one file due to the above assertion.
        Path activeMasterEntry = files[0].getPath();

        if (!fs.isFile(activeMasterEntry)) {
            throw new ServiceTrackerException("Active master entry must be a file, but it is a directory.");
        }

        List<String> addressElements = TUtil.newList();

        addressElements.add(activeMasterEntry.getName().replaceAll("_", ":")); // Add UMBILICAL_RPC_ADDRESS to elements

        FSDataInputStream stream = fs.open(activeMasterEntry);
        String data = stream.readUTF();
        stream.close();

        addressElements.addAll(TUtil.newList(data.split("_"))); // Add remains entries to elements

        // ensure the number of entries
        Preconditions.checkState(addressElements.size() == 5, "Fewer service addresses than necessary.");

        return addressElements;

    } catch (Throwable t) {
        throw new ServiceTrackerException(t);
    }
}

From source file:org.apache.tajo.storage.FileStorageManager.java

License:Apache License

/**
 *
 * @param fs/*from  ww w .j a v a 2 s.c  o  m*/
 * @param path The table path
 * @param result The final result files to be used
 * @param startFileIndex
 * @param numResultFiles
 * @param currentFileIndex
 * @param partitioned A flag to indicate if this table is partitioned
 * @param currentDepth Current visiting depth of partition directories
 * @param maxDepth The partition depth of this table
 * @throws IOException
 */
private void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex,
        int numResultFiles, AtomicInteger currentFileIndex, boolean partitioned, int currentDepth, int maxDepth)
        throws IOException {
    // Intermediate directory
    if (fs.isDirectory(path)) {

        FileStatus[] files = fs.listStatus(path, StorageManager.hiddenFileFilter);

        if (files != null && files.length > 0) {

            for (FileStatus eachFile : files) {

                // checking if the enough number of files are found
                if (result.size() >= numResultFiles) {
                    return;
                }
                if (eachFile.isDirectory()) {

                    getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles,
                            currentFileIndex, partitioned, currentDepth + 1, // increment a visiting depth
                            maxDepth);

                    // if partitioned table, we should ignore files located in the intermediate directory.
                    // we can ensure that this file is in leaf directory if currentDepth == maxDepth.
                } else if (eachFile.isFile() && eachFile.getLen() > 0
                        && (!partitioned || currentDepth == maxDepth)) {
                    if (currentFileIndex.get() >= startFileIndex) {
                        result.add(eachFile);
                    }
                    currentFileIndex.incrementAndGet();
                }
            }
        }

        // Files located in leaf directory
    } else {
        FileStatus fileStatus = fs.getFileStatus(path);
        if (fileStatus != null && fileStatus.getLen() > 0) {
            if (currentFileIndex.get() >= startFileIndex) {
                result.add(fileStatus);
            }
            currentFileIndex.incrementAndGet();
            if (result.size() >= numResultFiles) {
                return;
            }
        }
    }
}

From source file:org.apache.tajo.storage.FileTablespace.java

License:Apache License

private boolean isLeafDirectory(FileSystem fs, Path path) throws IOException {
    boolean retValue = false;

    FileStatus[] files = fs.listStatus(path);
    for (FileStatus file : files) {
        if (fs.isDirectory(file.getPath())) {
            retValue = true;//from   ww w.ja va  2s .c  o  m
            break;
        }
    }

    return retValue;
}

From source file:org.apache.tajo.storage.StorageUtil.java

License:Apache License

/**
 * Written files can be one of two forms: "part-[0-9]*-[0-9]*" or "part-[0-9]*-[0-9]*-[0-9]*".
 *
 * This method finds the maximum sequence number from existing data files through the above patterns.
 * If it cannot find any matched file or the maximum number, it will return -1.
 *
 * @param fs//w ww . ja v  a 2  s.c o  m
 * @param path
 * @param recursive
 * @return The maximum sequence number
 * @throws java.io.IOException
 */
public static int getMaxFileSequence(FileSystem fs, Path path, boolean recursive) throws IOException {
    if (!fs.isDirectory(path)) {
        return -1;
    }

    FileStatus[] files = fs.listStatus(path);

    if (files == null || files.length == 0) {
        return -1;
    }

    int maxValue = -1;

    for (FileStatus eachFile : files) {
        // In the case of partition table, return largest value within all partition dirs.
        int value;
        if (eachFile.isDirectory() && recursive) {
            value = getMaxFileSequence(fs, eachFile.getPath(), recursive);
            if (value > maxValue) {
                maxValue = value;
            }
        } else {
            if (eachFile.getPath().getName().matches(fileNamePatternV08)
                    || eachFile.getPath().getName().matches(fileNamePatternV09)) {
                value = getSequence(eachFile.getPath().getName());
                if (value > maxValue) {
                    maxValue = value;
                }
            }
        }
    }

    return maxValue;
}

From source file:org.apache.tajo.util.history.HistoryWriter.java

License:Apache License

public static Path getQueryTaskHistoryPath(FileSystem fs, Path parent, String processName, String taskStartTime)
        throws IOException {
    // <tajo.task-history.path>/<yyyyMMdd>/tasks/<WORKER_HOST>_<WORKER_PORT>/<WORKER_HOST>_<WORKER_PORT>_<HH>_<seq>.hist

    // finding largest sequence path
    Path fileParent = new Path(parent, taskStartTime.substring(0, 8) + "/tasks/" + processName);

    String hour = taskStartTime.substring(8, 10);
    int maxSeq = -1;

    if (!fs.exists(fileParent)) {
        maxSeq++;/*from w  w  w .j ava2  s . co  m*/
        return new Path(fileParent, processName + "_" + hour + "_" + maxSeq + HISTORY_FILE_POSTFIX);
    }

    if (!fs.isDirectory(fileParent)) {
        throw new IOException("Task history path is not directory: " + fileParent);
    }
    FileStatus[] files = fs.listStatus(fileParent);
    if (files != null) {
        for (FileStatus eachFile : files) {
            String[] nameTokens = eachFile.getPath().getName().split("_");
            if (nameTokens.length != 4) {
                continue;
            }

            if (nameTokens[2].equals(hour)) {
                int prefixIndex = nameTokens[3].indexOf(".");
                if (prefixIndex > 0) {
                    try {
                        int fileSeq = Integer.parseInt(nameTokens[3].substring(0, prefixIndex));
                        if (fileSeq > maxSeq) {
                            maxSeq = fileSeq;
                        }
                    } catch (NumberFormatException e) {
                    }
                }
            }
        }
    }

    maxSeq++;
    return new Path(fileParent, processName + "_" + hour + "_" + maxSeq + HISTORY_FILE_POSTFIX);
}

From source file:org.apache.tez.client.TezClientUtils.java

License:Apache License

private static FileStatus[] getLRFileStatus(String fileName, Configuration conf) throws IOException {
    URI uri;//  w w w  .  jav a2  s. c  o  m
    try {
        uri = new URI(fileName);
    } catch (URISyntaxException e) {
        String message = "Invalid URI defined in configuration for" + " location of TEZ jars. providedURI="
                + fileName;
        LOG.error(message);
        throw new TezUncheckedException(message, e);
    }

    Path p = new Path(uri);
    FileSystem fs = p.getFileSystem(conf);
    p = fs.resolvePath(p);

    if (fs.isDirectory(p)) {
        return fs.listStatus(p);
    } else {
        FileStatus fStatus = fs.getFileStatus(p);
        return new FileStatus[] { fStatus };
    }
}

From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer.java

License:Apache License

public static File copyDirectoryIfNonExistent(final FileSystem fileSystem, final String directory) {
    try {/* w w  w.j  av a  2s.c o  m*/
        final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs";
        final Path path = new Path(directory);
        if (Boolean.valueOf(System.getProperty("is.testing", "false"))
                || (fileSystem.exists(path) && fileSystem.isDirectory(path))) {
            final File tempDirectory = new File(
                    System.getProperty("java.io.tmpdir") + File.separator + hadoopGremlinLibsRemote);
            assert tempDirectory.exists() || tempDirectory.mkdirs();
            final String tempPath = tempDirectory.getAbsolutePath() + File.separator + path.getName();
            final RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(path, false);
            while (files.hasNext()) {
                final LocatedFileStatus f = files.next();
                fileSystem.copyToLocalFile(false, f.getPath(),
                        new Path(tempPath + System.getProperty("file.separator") + f.getPath().getName()),
                        true);
            }
            return new File(tempPath);
        } else
            return new File(directory);
    } catch (final IOException e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputerTest.java

License:Apache License

@Test
public void shouldCopyDirectoriesCorrectly() throws Exception {
    final String hdfsName = this.getClass().getSimpleName() + "-hdfs";
    final String localName = this.getClass().getSimpleName() + "-local";
    final FileSystem fs = FileSystem.get(new Configuration());
    if (!new File(System.getProperty("java.io.tmpdir") + "/" + localName).exists())
        assertTrue(new File(System.getProperty("java.io.tmpdir") + "/" + localName).mkdir());
    File tempFile1 = new File(System.getProperty("java.io.tmpdir") + "/" + localName + "/test1.txt");
    File tempFile2 = new File(System.getProperty("java.io.tmpdir") + "/" + localName + "/test2.txt");
    assertTrue(tempFile1.createNewFile());
    assertTrue(tempFile2.createNewFile());
    assertTrue(tempFile1.exists());/*from ww  w. jav  a  2s.  c  om*/
    assertTrue(tempFile2.exists());
    if (fs.exists(new Path("target/" + hdfsName)))
        assertTrue(fs.delete(new Path("target/" + hdfsName), true));
    fs.copyFromLocalFile(true, new Path(tempFile1.getAbsolutePath()),
            new Path("target/" + hdfsName + "/test1.dat"));
    fs.copyFromLocalFile(true, new Path(tempFile2.getAbsolutePath()),
            new Path("target/" + hdfsName + "/test2.dat"));
    assertTrue(fs.exists(new Path("target/" + hdfsName + "/test1.dat")));
    assertTrue(fs.exists(new Path("target/" + hdfsName + "/test2.dat")));
    assertTrue(fs.exists(new Path("target/" + hdfsName)));
    assertTrue(fs.isDirectory(new Path("target/" + hdfsName)));
    assertFalse(tempFile1.exists());
    assertFalse(tempFile2.exists());
    assertTrue(new File(System.getProperty("java.io.tmpdir") + "/" + localName).exists());
    assertTrue(new File(System.getProperty("java.io.tmpdir") + "/" + localName).delete());
    assertTrue(fs.exists(new Path("target/" + hdfsName + "/test1.dat")));
    assertTrue(fs.exists(new Path("target/" + hdfsName + "/test2.dat")));
    assertTrue(fs.exists(new Path("target/" + hdfsName)));
    assertTrue(fs.isDirectory(new Path("target/" + hdfsName)));
    /////
    final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs";
    final File localDirectory = new File(System.getProperty("java.io.tmpdir") + "/" + hadoopGremlinLibsRemote);
    final File localLibDirectory = new File(localDirectory.getAbsolutePath() + "/" + hdfsName);
    if (localLibDirectory.exists()) {
        Stream.of(localLibDirectory.listFiles()).forEach(File::delete);
        assertTrue(localLibDirectory.delete());
    }
    assertFalse(localLibDirectory.exists());
    assertEquals(localLibDirectory,
            AbstractHadoopGraphComputer.copyDirectoryIfNonExistent(fs, "target/" + hdfsName));
    assertTrue(localLibDirectory.exists());
    assertTrue(localLibDirectory.isDirectory());
    assertEquals(2,
            Stream.of(localLibDirectory.listFiles()).filter(file -> file.getName().endsWith(".dat")).count());
}