List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:org.apache.tajo.engine.query.TestTablePartitions.java
License:Apache License
@Test public final void testColumnPartitionedTableByThreeColumnsWithCompression() throws Exception { String tableName = CatalogUtil .normalizeIdentifier("testColumnPartitionedTableByThreeColumnsWithCompression"); ResultSet res = executeString("create table " + tableName + " (col4 text) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close();/*w w w . j a v a2 s . c o m*/ assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString("insert overwrite into " + tableName + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHCatalogStoreRunning()) { assertEquals(5, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(new Path(desc.getPath()))); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = new Path(desc.getPath()); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); for (FileStatus partition1 : fs.listStatus(path)) { assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus partition3 : fs.listStatus(partition2.getPath())) { assertTrue(fs.isDirectory(partition3.getPath())); for (FileStatus file : fs.listStatus(partition3.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } } res = executeString("select * from " + tableName + " where col2 = 2"); Map<Double, int[]> resultRows1 = Maps.newHashMap(); resultRows1.put(45.0d, new int[] { 3, 2 }); resultRows1.put(38.0d, new int[] { 2, 2 }); int i = 0; while (res.next()) { assertEquals(resultRows1.get(res.getDouble(4))[0], res.getInt(2)); assertEquals(resultRows1.get(res.getDouble(4))[1], res.getInt(3)); i++; } res.close(); assertEquals(2, i); Map<Double, int[]> resultRows2 = Maps.newHashMap(); resultRows2.put(49.0d, new int[] { 3, 3 }); resultRows2.put(45.0d, new int[] { 3, 2 }); resultRows2.put(38.0d, new int[] { 2, 2 }); res = executeString("select * from " + tableName + " where (col1 = 2 or col1 = 3) and col2 >= 2"); i = 0; while (res.next()) { assertEquals(resultRows2.get(res.getDouble(4))[0], res.getInt(2)); assertEquals(resultRows2.get(res.getDouble(4))[1], res.getInt(3)); i++; } res.close(); assertEquals(3, i); }
From source file:org.apache.tajo.engine.query.TestTablePartitions.java
License:Apache License
@Test public final void testColumnPartitionedTableNoMatchedPartition() throws Exception { String tableName = CatalogUtil.normalizeIdentifier("testColumnPartitionedTableNoMatchedPartition"); ResultSet res = executeString("create table " + tableName + " (col4 text) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close();/*from w ww. ja v a 2 s.co m*/ assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString("insert overwrite into " + tableName + " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHCatalogStoreRunning()) { assertEquals(5, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(new Path(desc.getPath()))); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = new Path(desc.getPath()); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); for (FileStatus partition1 : fs.listStatus(path)) { assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus partition3 : fs.listStatus(partition2.getPath())) { assertTrue(fs.isDirectory(partition3.getPath())); for (FileStatus file : fs.listStatus(partition3.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } } res = executeString("select * from " + tableName + " where col2 = 9"); assertFalse(res.next()); res.close(); }
From source file:org.apache.tajo.ha.HdfsServiceTracker.java
License:Apache License
/** * Reads a text file stored in HDFS file, and then return all service addresses read from a HDFS file. * * * @param conf// w w w . j av a 2s. c o m * @return all service addresses * @throws ServiceTrackerException */ private static List<String> getAddressElements(TajoConf conf) throws ServiceTrackerException { try { FileSystem fs = getFileSystem(conf); Path activeMasterBaseDir = new Path(TajoConf.getSystemHADir(conf), TajoConstants.SYSTEM_HA_ACTIVE_DIR_NAME); if (!fs.exists(activeMasterBaseDir)) { throw new ServiceTrackerException("No such active master base path: " + activeMasterBaseDir); } if (!fs.isDirectory(activeMasterBaseDir)) { throw new ServiceTrackerException("Active master base path must be a directory."); } FileStatus[] files = fs.listStatus(activeMasterBaseDir); if (files.length < 1) { throw new ServiceTrackerException("No active master entry"); } else if (files.length > 1) { throw new ServiceTrackerException("Two or more than active master entries."); } // We can ensure that there is only one file due to the above assertion. Path activeMasterEntry = files[0].getPath(); if (!fs.isFile(activeMasterEntry)) { throw new ServiceTrackerException("Active master entry must be a file, but it is a directory."); } List<String> addressElements = TUtil.newList(); addressElements.add(activeMasterEntry.getName().replaceAll("_", ":")); // Add UMBILICAL_RPC_ADDRESS to elements FSDataInputStream stream = fs.open(activeMasterEntry); String data = stream.readUTF(); stream.close(); addressElements.addAll(TUtil.newList(data.split("_"))); // Add remains entries to elements // ensure the number of entries Preconditions.checkState(addressElements.size() == 5, "Fewer service addresses than necessary."); return addressElements; } catch (Throwable t) { throw new ServiceTrackerException(t); } }
From source file:org.apache.tajo.storage.FileStorageManager.java
License:Apache License
/** * * @param fs/*from ww w .j a v a 2 s.c o m*/ * @param path The table path * @param result The final result files to be used * @param startFileIndex * @param numResultFiles * @param currentFileIndex * @param partitioned A flag to indicate if this table is partitioned * @param currentDepth Current visiting depth of partition directories * @param maxDepth The partition depth of this table * @throws IOException */ private void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex, int numResultFiles, AtomicInteger currentFileIndex, boolean partitioned, int currentDepth, int maxDepth) throws IOException { // Intermediate directory if (fs.isDirectory(path)) { FileStatus[] files = fs.listStatus(path, StorageManager.hiddenFileFilter); if (files != null && files.length > 0) { for (FileStatus eachFile : files) { // checking if the enough number of files are found if (result.size() >= numResultFiles) { return; } if (eachFile.isDirectory()) { getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles, currentFileIndex, partitioned, currentDepth + 1, // increment a visiting depth maxDepth); // if partitioned table, we should ignore files located in the intermediate directory. // we can ensure that this file is in leaf directory if currentDepth == maxDepth. } else if (eachFile.isFile() && eachFile.getLen() > 0 && (!partitioned || currentDepth == maxDepth)) { if (currentFileIndex.get() >= startFileIndex) { result.add(eachFile); } currentFileIndex.incrementAndGet(); } } } // Files located in leaf directory } else { FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus != null && fileStatus.getLen() > 0) { if (currentFileIndex.get() >= startFileIndex) { result.add(fileStatus); } currentFileIndex.incrementAndGet(); if (result.size() >= numResultFiles) { return; } } } }
From source file:org.apache.tajo.storage.FileTablespace.java
License:Apache License
private boolean isLeafDirectory(FileSystem fs, Path path) throws IOException { boolean retValue = false; FileStatus[] files = fs.listStatus(path); for (FileStatus file : files) { if (fs.isDirectory(file.getPath())) { retValue = true;//from ww w.ja va 2s .c o m break; } } return retValue; }
From source file:org.apache.tajo.storage.StorageUtil.java
License:Apache License
/** * Written files can be one of two forms: "part-[0-9]*-[0-9]*" or "part-[0-9]*-[0-9]*-[0-9]*". * * This method finds the maximum sequence number from existing data files through the above patterns. * If it cannot find any matched file or the maximum number, it will return -1. * * @param fs//w ww . ja v a 2 s.c o m * @param path * @param recursive * @return The maximum sequence number * @throws java.io.IOException */ public static int getMaxFileSequence(FileSystem fs, Path path, boolean recursive) throws IOException { if (!fs.isDirectory(path)) { return -1; } FileStatus[] files = fs.listStatus(path); if (files == null || files.length == 0) { return -1; } int maxValue = -1; for (FileStatus eachFile : files) { // In the case of partition table, return largest value within all partition dirs. int value; if (eachFile.isDirectory() && recursive) { value = getMaxFileSequence(fs, eachFile.getPath(), recursive); if (value > maxValue) { maxValue = value; } } else { if (eachFile.getPath().getName().matches(fileNamePatternV08) || eachFile.getPath().getName().matches(fileNamePatternV09)) { value = getSequence(eachFile.getPath().getName()); if (value > maxValue) { maxValue = value; } } } } return maxValue; }
From source file:org.apache.tajo.util.history.HistoryWriter.java
License:Apache License
public static Path getQueryTaskHistoryPath(FileSystem fs, Path parent, String processName, String taskStartTime) throws IOException { // <tajo.task-history.path>/<yyyyMMdd>/tasks/<WORKER_HOST>_<WORKER_PORT>/<WORKER_HOST>_<WORKER_PORT>_<HH>_<seq>.hist // finding largest sequence path Path fileParent = new Path(parent, taskStartTime.substring(0, 8) + "/tasks/" + processName); String hour = taskStartTime.substring(8, 10); int maxSeq = -1; if (!fs.exists(fileParent)) { maxSeq++;/*from w w w .j ava2 s . co m*/ return new Path(fileParent, processName + "_" + hour + "_" + maxSeq + HISTORY_FILE_POSTFIX); } if (!fs.isDirectory(fileParent)) { throw new IOException("Task history path is not directory: " + fileParent); } FileStatus[] files = fs.listStatus(fileParent); if (files != null) { for (FileStatus eachFile : files) { String[] nameTokens = eachFile.getPath().getName().split("_"); if (nameTokens.length != 4) { continue; } if (nameTokens[2].equals(hour)) { int prefixIndex = nameTokens[3].indexOf("."); if (prefixIndex > 0) { try { int fileSeq = Integer.parseInt(nameTokens[3].substring(0, prefixIndex)); if (fileSeq > maxSeq) { maxSeq = fileSeq; } } catch (NumberFormatException e) { } } } } } maxSeq++; return new Path(fileParent, processName + "_" + hour + "_" + maxSeq + HISTORY_FILE_POSTFIX); }
From source file:org.apache.tez.client.TezClientUtils.java
License:Apache License
private static FileStatus[] getLRFileStatus(String fileName, Configuration conf) throws IOException { URI uri;// w w w . jav a2 s. c o m try { uri = new URI(fileName); } catch (URISyntaxException e) { String message = "Invalid URI defined in configuration for" + " location of TEZ jars. providedURI=" + fileName; LOG.error(message); throw new TezUncheckedException(message, e); } Path p = new Path(uri); FileSystem fs = p.getFileSystem(conf); p = fs.resolvePath(p); if (fs.isDirectory(p)) { return fs.listStatus(p); } else { FileStatus fStatus = fs.getFileStatus(p); return new FileStatus[] { fStatus }; } }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer.java
License:Apache License
public static File copyDirectoryIfNonExistent(final FileSystem fileSystem, final String directory) { try {/* w w w.j av a 2s.c o m*/ final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs"; final Path path = new Path(directory); if (Boolean.valueOf(System.getProperty("is.testing", "false")) || (fileSystem.exists(path) && fileSystem.isDirectory(path))) { final File tempDirectory = new File( System.getProperty("java.io.tmpdir") + File.separator + hadoopGremlinLibsRemote); assert tempDirectory.exists() || tempDirectory.mkdirs(); final String tempPath = tempDirectory.getAbsolutePath() + File.separator + path.getName(); final RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(path, false); while (files.hasNext()) { final LocatedFileStatus f = files.next(); fileSystem.copyToLocalFile(false, f.getPath(), new Path(tempPath + System.getProperty("file.separator") + f.getPath().getName()), true); } return new File(tempPath); } else return new File(directory); } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputerTest.java
License:Apache License
@Test public void shouldCopyDirectoriesCorrectly() throws Exception { final String hdfsName = this.getClass().getSimpleName() + "-hdfs"; final String localName = this.getClass().getSimpleName() + "-local"; final FileSystem fs = FileSystem.get(new Configuration()); if (!new File(System.getProperty("java.io.tmpdir") + "/" + localName).exists()) assertTrue(new File(System.getProperty("java.io.tmpdir") + "/" + localName).mkdir()); File tempFile1 = new File(System.getProperty("java.io.tmpdir") + "/" + localName + "/test1.txt"); File tempFile2 = new File(System.getProperty("java.io.tmpdir") + "/" + localName + "/test2.txt"); assertTrue(tempFile1.createNewFile()); assertTrue(tempFile2.createNewFile()); assertTrue(tempFile1.exists());/*from ww w. jav a 2s. c om*/ assertTrue(tempFile2.exists()); if (fs.exists(new Path("target/" + hdfsName))) assertTrue(fs.delete(new Path("target/" + hdfsName), true)); fs.copyFromLocalFile(true, new Path(tempFile1.getAbsolutePath()), new Path("target/" + hdfsName + "/test1.dat")); fs.copyFromLocalFile(true, new Path(tempFile2.getAbsolutePath()), new Path("target/" + hdfsName + "/test2.dat")); assertTrue(fs.exists(new Path("target/" + hdfsName + "/test1.dat"))); assertTrue(fs.exists(new Path("target/" + hdfsName + "/test2.dat"))); assertTrue(fs.exists(new Path("target/" + hdfsName))); assertTrue(fs.isDirectory(new Path("target/" + hdfsName))); assertFalse(tempFile1.exists()); assertFalse(tempFile2.exists()); assertTrue(new File(System.getProperty("java.io.tmpdir") + "/" + localName).exists()); assertTrue(new File(System.getProperty("java.io.tmpdir") + "/" + localName).delete()); assertTrue(fs.exists(new Path("target/" + hdfsName + "/test1.dat"))); assertTrue(fs.exists(new Path("target/" + hdfsName + "/test2.dat"))); assertTrue(fs.exists(new Path("target/" + hdfsName))); assertTrue(fs.isDirectory(new Path("target/" + hdfsName))); ///// final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs"; final File localDirectory = new File(System.getProperty("java.io.tmpdir") + "/" + hadoopGremlinLibsRemote); final File localLibDirectory = new File(localDirectory.getAbsolutePath() + "/" + hdfsName); if (localLibDirectory.exists()) { Stream.of(localLibDirectory.listFiles()).forEach(File::delete); assertTrue(localLibDirectory.delete()); } assertFalse(localLibDirectory.exists()); assertEquals(localLibDirectory, AbstractHadoopGraphComputer.copyDirectoryIfNonExistent(fs, "target/" + hdfsName)); assertTrue(localLibDirectory.exists()); assertTrue(localLibDirectory.isDirectory()); assertEquals(2, Stream.of(localLibDirectory.listFiles()).filter(file -> file.getName().endsWith(".dat")).count()); }