List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java
License:Open Source License
private void testListStatus() throws Exception { FileSystem fs = FileSystem.get(getHadoopConf()); Path path = new Path(getHadoopTestDir(), "foo.txt"); OutputStream os = fs.create(path); os.write(1);//from ww w . java 2 s . c o m os.close(); FileStatus status1 = fs.getFileStatus(path); fs.close(); Configuration conf = new Configuration(); conf.set("fs.http.impl", HoopFileSystem.class.getName()); fs = FileSystem.get(getJettyURL().toURI(), conf); FileStatus status2 = fs.getFileStatus(new Path(path.toUri().getPath())); fs.close(); Assert.assertEquals(status2.getPermission(), status1.getPermission()); Assert.assertEquals(status2.getPath().toUri().getPath(), status1.getPath().toUri().getPath()); Assert.assertEquals(status2.getReplication(), status1.getReplication()); Assert.assertEquals(status2.getBlockSize(), status1.getBlockSize()); Assert.assertEquals(status2.getAccessTime(), status1.getAccessTime()); Assert.assertEquals(status2.getModificationTime(), status1.getModificationTime()); Assert.assertEquals(status2.getOwner(), status1.getOwner()); Assert.assertEquals(status2.getGroup(), status1.getGroup()); Assert.assertEquals(status2.getLen(), status1.getLen()); FileStatus[] stati = fs.listStatus(path.getParent()); Assert.assertEquals(stati.length, 1); Assert.assertEquals(stati[0].getPath().getName(), path.getName()); }
From source file:com.cloudera.impala.analysis.LoadDataStmt.java
License:Apache License
private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException { // The user must have permission to access the source location. Since the files will // be moved from this location, the user needs to have all permission. sourceDataPath_.analyze(analyzer, Privilege.ALL); try {/*w w w. jav a 2 s . c o m*/ Path source = sourceDataPath_.getPath(); FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration()); // sourceDataPath_.analyze() ensured that path is on an HDFS filesystem. Preconditions.checkState(fs instanceof DistributedFileSystem); DistributedFileSystem dfs = (DistributedFileSystem) fs; if (!dfs.exists(source)) { throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_)); } if (dfs.isDirectory(source)) { if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) { throw new AnalysisException( String.format("INPATH location '%s' contains no visible files.", sourceDataPath_)); } if (FileSystemUtil.containsSubdirectory(source)) { throw new AnalysisException( String.format("INPATH location '%s' cannot contain subdirectories.", sourceDataPath_)); } } else { // INPATH points to a file. if (FileSystemUtil.isHiddenFile(source.getName())) { throw new AnalysisException( String.format("INPATH location '%s' points to a hidden file.", source)); } } String noWriteAccessErrorMsg = String.format( "Unable to LOAD DATA into " + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ", hdfsTable.getFullName()); HdfsPartition partition; String location; if (partitionSpec_ != null) { partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues()); location = partition.getLocation(); if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { throw new AnalysisException(noWriteAccessErrorMsg + partition.getLocation()); } } else { // "default" partition partition = hdfsTable.getPartitions().get(0); location = hdfsTable.getLocation(); if (!hdfsTable.hasWriteAccess()) { throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation()); } } Preconditions.checkNotNull(partition); // Until Frontend.loadTableData() can handle cross-filesystem and filesystems // that aren't HDFS, require that source and dest are on the same HDFS. if (!FileSystemUtil.isPathOnFileSystem(new Path(location), fs)) { throw new AnalysisException(String.format( "Unable to LOAD DATA into target table (%s) because source path (%s) and " + "destination %s (%s) are on different file-systems.", hdfsTable.getFullName(), source, partitionSpec_ == null ? "table" : "partition", partition.getLocation())); } // Verify the files being loaded are supported. for (FileStatus fStatus : fs.listStatus(source)) { if (fs.isDirectory(fStatus.getPath())) continue; StringBuilder errorMsg = new StringBuilder(); HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat(); if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) { throw new AnalysisException(errorMsg.toString()); } } } catch (FileNotFoundException e) { throw new AnalysisException("File not found: " + e.getMessage(), e); } catch (IOException e) { throw new AnalysisException("Error accessing file system: " + e.getMessage(), e); } }
From source file:com.cloudera.impala.catalog.HdfsTable.java
License:Apache License
/** * Creates a new HdfsPartition object to be added to the internal partition list. * Populates with file format information and file locations. Partitions may be empty, * or may not even exist on the file system (a partition's location may have been * changed to a new path that is about to be created by an INSERT). For unchanged * files (indicated by unchanged mtime), reuses the FileDescriptor from the * oldFileDescMap. The one exception is if the partition is marked as cached * in which case the block metadata cannot be reused. Otherwise, creates a new * FileDescriptor for each modified or new file and adds it to newFileDescMap. * Both old and newFileDescMap are Maps of parent directory (partition location) * to list of files (FileDescriptors) under that directory. * Returns new partition if successful or null if none was added. * Separated from addPartition to reduce the number of operations done * while holding the lock on the hdfs table. /*from w ww.j a v a2 s .c o m*/ * @throws CatalogException * if the supplied storage descriptor contains metadata that Impala can't * understand. */ private HdfsPartition createPartition(StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition, Map<String, List<FileDescriptor>> oldFileDescMap, Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescMap) throws CatalogException { HdfsStorageDescriptor fileFormatDescriptor = HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor); Path partDirPath = new Path(storageDescriptor.getLocation()); List<FileDescriptor> fileDescriptors = Lists.newArrayList(); // If the partition is marked as cached, the block location metadata must be // reloaded, even if the file times have not changed. boolean isMarkedCached = isMarkedCached_; List<LiteralExpr> keyValues = Lists.newArrayList(); if (msPartition != null) { isMarkedCached = HdfsCachingUtil.getCacheDirIdFromParams(msPartition.getParameters()) != null; // Load key values for (String partitionKey : msPartition.getValues()) { Type type = getColumns().get(keyValues.size()).getType(); // Deal with Hive's special NULL partition key. if (partitionKey.equals(nullPartitionKeyValue_)) { keyValues.add(NullLiteral.create(type)); } else { try { keyValues.add(LiteralExpr.create(partitionKey, type)); } catch (Exception ex) { LOG.warn("Failed to create literal expression of type: " + type, ex); throw new CatalogException("Invalid partition key value of type: " + type, ex); } } } try { Expr.analyze(keyValues, null); } catch (AnalysisException e) { // should never happen throw new IllegalStateException(e); } } try { // Each partition could reside on a different filesystem. FileSystem fs = partDirPath.getFileSystem(CONF); multipleFileSystems_ = multipleFileSystems_ || !FileSystemUtil.isPathOnFileSystem(new Path(getLocation()), fs); if (fs.exists(partDirPath)) { // FileSystem does not have an API that takes in a timestamp and returns a list // of files that has been added/changed since. Therefore, we are calling // fs.listStatus() to list all the files. for (FileStatus fileStatus : fs.listStatus(partDirPath)) { String fileName = fileStatus.getPath().getName().toString(); if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName) || HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) { // Ignore directory, hidden file starting with . or _, and LZO index files // If a directory is erroneously created as a subdirectory of a partition dir // we should ignore it and move on. Hive will not recurse into directories. // Skip index files, these are read by the LZO scanner directly. continue; } String partitionDir = fileStatus.getPath().getParent().toString(); FileDescriptor fd = null; // Search for a FileDescriptor with the same partition dir and file name. If one // is found, it will be chosen as a candidate to reuse. if (oldFileDescMap != null && oldFileDescMap.get(partitionDir) != null) { for (FileDescriptor oldFileDesc : oldFileDescMap.get(partitionDir)) { if (oldFileDesc.getFileName().equals(fileName)) { fd = oldFileDesc; break; } } } // Check if this FileDescriptor has been modified since last loading its block // location information. If it has not been changed, the previously loaded // value can be reused. if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen() || fd.getModificationTime() != fileStatus.getModificationTime()) { // Create a new file descriptor, the block metadata will be populated by // loadBlockMd. fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); addPerFsFileDesc(perFsFileDescMap, fs, partitionDir, fd); } List<FileDescriptor> fds = fileDescMap_.get(partitionDir); if (fds == null) { fds = Lists.newArrayList(); fileDescMap_.put(partitionDir, fds); } fds.add(fd); // Add to the list of FileDescriptors for this partition. fileDescriptors.add(fd); } numHdfsFiles_ += fileDescriptors.size(); } HdfsPartition partition = new HdfsPartition(this, msPartition, keyValues, fileFormatDescriptor, fileDescriptors, getAvailableAccessLevel(fs, partDirPath)); partition.checkWellFormed(); return partition; } catch (Exception e) { throw new CatalogException("Failed to create partition: ", e); } }
From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java
License:Apache License
/** * List file status by calling fileSystem.listStatus. *//* w w w. ja va2 s . c o m*/ private static void listStatus(String dirPath) { Path path = new Path(dirPath); boolean exceptionThrown = false; try { FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf()); FileStatus[] fileStatus = fs.listStatus(path); if (fs.exists(path)) { for (FileStatus status : fileStatus) { BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation loc : locations) { loc.getNames(); loc.getHosts(); } } } } catch (IOException e) { exceptionThrown = true; LOG.error("Failed to list Status", e); } assertFalse(exceptionThrown); }
From source file:com.cloudera.impala.common.FileSystemUtil.java
License:Apache License
/** * Performs a non-recursive delete of all visible (non-hidden) files in a given * directory. Returns the number of files deleted as part of this operation. *//*ww w. j av a2 s . c o m*/ public static int deleteAllVisibleFiles(Path directory) throws IOException { FileSystem fs = directory.getFileSystem(CONF); Preconditions.checkState(fs.getFileStatus(directory).isDirectory()); int numFilesDeleted = 0; for (FileStatus fStatus : fs.listStatus(directory)) { // Only delete files that are not hidden. if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) { LOG.debug("Removing: " + fStatus.getPath()); fs.delete(fStatus.getPath(), false); ++numFilesDeleted; } } return numFilesDeleted; }
From source file:com.cloudera.impala.common.FileSystemUtil.java
License:Apache License
/** * Returns the total number of visible (non-hidden) files in a directory. *//*from w w w. j av a 2 s . c o m*/ public static int getTotalNumVisibleFiles(Path directory) throws IOException { FileSystem fs = directory.getFileSystem(CONF); Preconditions.checkState(fs.getFileStatus(directory).isDirectory()); int numFiles = 0; for (FileStatus fStatus : fs.listStatus(directory)) { // Only delete files that are not hidden. if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) { ++numFiles; } } return numFiles; }
From source file:com.cloudera.impala.common.FileSystemUtil.java
License:Apache License
/** * Moves all visible (non-hidden) files from a source directory to a destination * directory. Any sub-directories within the source directory are skipped. * Returns the number of files moved as part of this operation. */// www . ja va 2s . c o m public static int moveAllVisibleFiles(Path sourceDir, Path destDir) throws IOException { FileSystem fs = destDir.getFileSystem(CONF); Preconditions.checkState(fs.isDirectory(destDir)); Preconditions.checkState(fs.isDirectory(sourceDir)); // Use the same UUID to resolve all file name conflicts. This helps mitigate problems // that might happen if there is a conflict moving a set of files that have // dependent file names. For example, foo.lzo and foo.lzo_index. UUID uuid = UUID.randomUUID(); // Enumerate all the files in the source int numFilesMoved = 0; for (FileStatus fStatus : fs.listStatus(sourceDir)) { if (fStatus.isDirectory()) { LOG.debug("Skipping copy of directory: " + fStatus.getPath()); continue; } else if (isHiddenFile(fStatus.getPath().getName())) { continue; } Path destFile = new Path(destDir, fStatus.getPath().getName()); if (fs.exists(destFile)) { destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), uuid.toString())); } FileSystemUtil.moveFile(fStatus.getPath(), destFile, false); ++numFilesMoved; } return numFilesMoved; }
From source file:com.cloudera.impala.common.FileSystemUtil.java
License:Apache License
/** * Returns true if the given Path contains any sub directories, otherwise false. */// www . j ava 2s . c o m public static boolean containsSubdirectory(Path directory) throws FileNotFoundException, IOException { FileSystem fs = directory.getFileSystem(CONF); // Enumerate all the files in the source for (FileStatus fStatus : fs.listStatus(directory)) { if (fStatus.isDirectory()) { return true; } } return false; }
From source file:com.cloudera.impala.util.LoadMetadataUtil.java
License:Apache License
/** * Load and return a list of file descriptors for the files in 'dirPath', using the * listStatus HDFS API in filesystem to load filestatus. It will not load the file * descriptor if the file is a directory, or hidden file starting with . or _, or LZO * index files. If the file can be found in the old File description map and not * modified, and not 'isMarkedCached' - partition marked as cached, just reuse the one * in cache. Otherwise it will create a new File description with filename, file length * and modification time./*from w ww. ja v a2s . c o m*/ * * Must be threadsafe. Access to 'oldFileDescMap', 'perFsFileBlocks', 'hostIndex' and * 'fileDescMap' must be protected. */ public static List<FileDescriptor> loadFileDescriptors(FileSystem fs, Path dirPath, Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName, ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap) throws FileNotFoundException, IOException { List<FileDescriptor> fileDescriptors = Lists.newArrayList(); for (FileStatus fileStatus : fs.listStatus(dirPath)) { FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached, perFsFileBlocks, tblName, hostIndex); if (fd == null) continue; // Add partition dir to fileDescMap if it does not exist. String partitionDir = fileStatus.getPath().getParent().toString(); synchronized (fileDescMap) { if (!fileDescMap.containsKey(partitionDir)) { fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>()); } fileDescMap.get(partitionDir).add(fd); } // Add to the list of FileDescriptors for this partition. fileDescriptors.add(fd); } return fileDescriptors; }
From source file:com.cloudera.llama.server.TestMiniLlama.java
License:Apache License
private void testMiniLlama(Configuration conf, boolean writeHdfsConf) throws Exception { File confFile = null;/*from w w w . j a v a 2s . co m*/ MiniLlama server = new MiniLlama(conf); final NotificationEndPoint callbackServer = new NotificationEndPoint(); try { callbackServer.setConf(createCallbackConfiguration()); callbackServer.start(); Assert.assertNotNull(server.getConf().get(LlamaAM.CORE_QUEUES_KEY)); if (writeHdfsConf) { File confDir = new File("target", UUID.randomUUID().toString()); confDir.mkdirs(); confFile = new File(confDir, "minidfs-site.xml").getAbsoluteFile(); server.setWriteHadoopConfig(confFile.getAbsolutePath()); } server.start(); if (writeHdfsConf) { Assert.assertTrue(confFile.exists()); } Assert.assertNotSame(0, server.getAddressPort()); TTransport transport = new TSocket(server.getAddressHost(), server.getAddressPort()); transport.open(); TProtocol protocol = new TBinaryProtocol(transport); LlamaAMService.Client client = new LlamaAMService.Client(protocol); TLlamaAMRegisterRequest trReq = new TLlamaAMRegisterRequest(); trReq.setVersion(TLlamaServiceVersion.V1); trReq.setClient_id(TypeUtils.toTUniqueId(UUID.randomUUID())); TNetworkAddress tAddress = new TNetworkAddress(); tAddress.setHostname(callbackServer.getAddressHost()); tAddress.setPort(callbackServer.getAddressPort()); trReq.setNotification_callback_service(tAddress); //register TLlamaAMRegisterResponse trRes = client.Register(trReq); Assert.assertEquals(TStatusCode.OK, trRes.getStatus().getStatus_code()); //getNodes TLlamaAMGetNodesRequest tgnReq = new TLlamaAMGetNodesRequest(); tgnReq.setVersion(TLlamaServiceVersion.V1); tgnReq.setAm_handle(trRes.getAm_handle()); TLlamaAMGetNodesResponse tgnRes = client.GetNodes(tgnReq); Assert.assertEquals(TStatusCode.OK, tgnRes.getStatus().getStatus_code()); Assert.assertEquals(new HashSet<String>(server.getDataNodes()), new HashSet<String>(tgnRes.getNodes())); reserveExpandRelease(trRes, server, client, callbackServer, 1, 74); reserveExpandRelease(trRes, server, client, callbackServer, 1, 0); reserveExpandRelease(trRes, server, client, callbackServer, 2, 74); reserveExpandRelease(trRes, server, client, callbackServer, 1, 0); //test MiniHDFS FileSystem fs = FileSystem.get(server.getConf()); Assert.assertTrue(fs.getUri().getScheme().equals("hdfs")); fs.listStatus(new Path("/")); OutputStream os = fs.create(new Path("/test.txt")); os.write(0); os.close(); //unregister TLlamaAMUnregisterRequest turReq = new TLlamaAMUnregisterRequest(); turReq.setVersion(TLlamaServiceVersion.V1); turReq.setAm_handle(trRes.getAm_handle()); TLlamaAMUnregisterResponse turRes = client.Unregister(turReq); Assert.assertEquals(TStatusCode.OK, turRes.getStatus().getStatus_code()); } finally { server.stop(); callbackServer.stop(); } }