List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:org.apache.gobblin.hive.orc.HiveOrcSerDeManager.java
License:Apache License
/** * Get the schema as a TypeInfo object//from ww w . j ava 2 s . c om * @param path path that contains the ORC files * @param fs {@link FileSystem} * @return {@link TypeInfo} with the schema information * @throws IOException */ public TypeInfo getSchemaFromLatestFile(Path path, FileSystem fs) throws IOException { if (fs.isDirectory(path)) { List<FileStatus> files = Arrays.asList(fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { try { return ignoredFilePrefixes.stream().noneMatch(e -> path.getName().startsWith(e)) && fileExtensions.stream().anyMatch(e -> path.getName().endsWith(e)) && isORC(path, fs); } catch (IOException e) { log.error("Error checking file for schema retrieval", e); return false; } } })); if (files.size() > 0) { Collections.sort((files), FileListUtils.LATEST_MOD_TIME_ORDER); } else { throw new FileNotFoundException("No files in Dataset:" + path + " found for schema retrieval"); } return getSchemaFromLatestFile(files.get(0).getPath(), fs); } else { return TypeInfoUtils .getTypeInfoFromObjectInspector(OrcFile.createReader(fs, path).getObjectInspector()); } }
From source file:org.apache.hama.examples.util.WritableUtil.java
License:Apache License
/** * This method is used to read vector from specified path in SpMVTest. For * test purposes only.//from w w w .j a va 2s . co m * * @param pathString * input path for vector * @param result * instanse of vector writable which should be filled. * @param conf * configuration * @throws IOException */ @SuppressWarnings("deprecation") public static void readFromFile(String pathString, Writable result, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = null; Path path = new Path(pathString); List<String> filePaths = new ArrayList<String>(); // TODO this deprecation should be fixed. if (fs.isDirectory(path)) { FileStatus[] stats = fs.listStatus(path); for (FileStatus stat : stats) { filePaths.add(stat.getPath().toUri().getPath()); } } else if (fs.isFile(path)) { filePaths.add(path.toString()); } try { for (String filePath : filePaths) { reader = new SequenceFile.Reader(fs, new Path(filePath), conf); IntWritable key = new IntWritable(); reader.next(key, result); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) reader.close(); } }
From source file:org.apache.hoya.avro.RoleHistoryWriter.java
License:Apache License
/** * Find all history entries in a dir. The dir is created if it is * not already defined.//www .j av a 2 s . com * * The scan uses the match pattern {@link HoyaKeys#HISTORY_FILENAME_MATCH_PATTERN} * while dropping empty files and directories which match the pattern. * The list is then sorted with a comparator that sorts on filename, * relying on the filename of newer created files being later than the old ones. * * * * @param fs filesystem * @param dir dir to scan * @param includeEmptyFiles should empty files be included in the result? * @return a possibly empty list * @throws IOException IO problems * @throws FileNotFoundException if the target dir is actually a path */ public List<Path> findAllHistoryEntries(FileSystem fs, Path dir, boolean includeEmptyFiles) throws IOException { assert fs != null; assert dir != null; if (!fs.exists(dir)) { fs.mkdirs(dir); } else if (!fs.isDirectory(dir)) { throw new FileNotFoundException("Not a directory " + dir.toString()); } PathFilter filter = new GlobFilter(HoyaKeys.HISTORY_FILENAME_GLOB_PATTERN); FileStatus[] stats = fs.listStatus(dir, filter); List<Path> paths = new ArrayList<Path>(stats.length); for (FileStatus stat : stats) { log.debug("Possible entry: {}", stat.toString()); if (stat.isFile() && (includeEmptyFiles || stat.getLen() > 0)) { paths.add(stat.getPath()); } } sortHistoryPaths(paths); return paths; }
From source file:org.apache.hoya.core.build.InstanceBuilder.java
License:Apache License
public void takeSnapshotOfConfDir(Path appconfdir) throws IOException, BadConfigException, BadClusterStateException { FileSystem srcFS = FileSystem.get(appconfdir.toUri(), conf); if (!srcFS.isDirectory(appconfdir)) { throw new BadConfigException("Source Configuration directory is not valid: %s", appconfdir.toString()); }/*from w ww. j av a2 s .co m*/ // bulk copy FsPermission clusterPerms = coreFS.getInstanceDirectoryPermissions(); // first the original from wherever to the DFS HoyaUtils.copyDirectory(conf, appconfdir, instancePaths.snapshotConfPath, clusterPerms); }
From source file:org.apache.hoya.tools.HoyaUtils.java
License:Apache License
/** * Copy a directory to a new FS -both paths must be qualified. If * a directory needs to be created, supplied permissions can override * the default values. Existing directories are not touched * @param conf conf file//from w w w . j a v a2 s.c o m * @param srcDirPath src dir * @param destDirPath dest dir * @param permission permission for the dest directory; null means "default" * @return # of files copies */ public static int copyDirectory(Configuration conf, Path srcDirPath, Path destDirPath, FsPermission permission) throws IOException, BadClusterStateException { FileSystem srcFS = FileSystem.get(srcDirPath.toUri(), conf); FileSystem destFS = FileSystem.get(destDirPath.toUri(), conf); //list all paths in the src. if (!srcFS.exists(srcDirPath)) { throw new FileNotFoundException("Source dir not found " + srcDirPath); } if (!srcFS.isDirectory(srcDirPath)) { throw new FileNotFoundException("Source dir not a directory " + srcDirPath); } FileStatus[] entries = srcFS.listStatus(srcDirPath); int srcFileCount = entries.length; if (srcFileCount == 0) { return 0; } if (permission == null) { permission = FsPermission.getDirDefault(); } if (!destFS.exists(destDirPath)) { new HoyaFileSystem(destFS, conf).createWithPermissions(destDirPath, permission); } Path[] sourcePaths = new Path[srcFileCount]; for (int i = 0; i < srcFileCount; i++) { FileStatus e = entries[i]; Path srcFile = e.getPath(); if (srcFS.isDirectory(srcFile)) { throw new IOException("Configuration dir " + srcDirPath + " contains a directory " + srcFile); } log.debug("copying src conf file {}", srcFile); sourcePaths[i] = srcFile; } log.debug("Copying {} files from {} to dest {}", srcFileCount, srcDirPath, destDirPath); FileUtil.copy(srcFS, sourcePaths, destFS, destDirPath, false, true, conf); return srcFileCount; }
From source file:org.apache.impala.analysis.LoadDataStmt.java
License:Apache License
/** * Check to see if Impala has the necessary permissions to access the source and dest * paths for this LOAD statement (which maps onto a sequence of file move operations, * with the requisite permission requirements), and check to see if all files to be * moved are in format that Impala understands. Errors are raised as AnalysisExceptions. *//* w w w. j a va2 s . c o m*/ private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException { // The user must have permission to access the source location. Since the files will // be moved from this location, the user needs to have all permission. sourceDataPath_.analyze(analyzer, Privilege.ALL); // Catch all exceptions thrown by accessing files, and rethrow as AnalysisExceptions. try { Path source = sourceDataPath_.getPath(); FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration()); if (!(fs instanceof DistributedFileSystem) && !(fs instanceof S3AFileSystem)) { throw new AnalysisException(String.format( "INPATH location '%s' " + "must point to an HDFS or S3A filesystem.", sourceDataPath_)); } if (!fs.exists(source)) { throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_)); } // If the source file is a directory, we must be able to read from and write to // it. If the source file is a file, we must be able to read from it, and write to // its parent directory (in order to delete the file as part of the move operation). FsPermissionChecker checker = FsPermissionChecker.getInstance(); if (fs.isDirectory(source)) { if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) { throw new AnalysisException( String.format("INPATH location '%s' contains no visible files.", sourceDataPath_)); } if (FileSystemUtil.containsVisibleSubdirectory(source)) { throw new AnalysisException(String.format( "INPATH location '%s' cannot contain non-hidden subdirectories.", sourceDataPath_)); } if (!checker.getPermissions(fs, source).checkPermissions(FsAction.READ_WRITE)) { throw new AnalysisException(String.format( "Unable to LOAD DATA from %s " + "because Impala does not have READ and WRITE permissions on this directory", source)); } } else { // INPATH names a file. if (FileSystemUtil.isHiddenFile(source.getName())) { throw new AnalysisException( String.format("INPATH location '%s' points to a hidden file.", source)); } if (!checker.getPermissions(fs, source.getParent()).checkPermissions(FsAction.WRITE)) { throw new AnalysisException(String.format("Unable to LOAD DATA from %s " + "because Impala does not have WRITE permissions on its parent " + "directory %s", source, source.getParent())); } if (!checker.getPermissions(fs, source).checkPermissions(FsAction.READ)) { throw new AnalysisException(String.format("Unable to LOAD DATA from %s " + "because Impala does not have READ permissions on this file", source)); } } String noWriteAccessErrorMsg = String.format( "Unable to LOAD DATA into " + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ", hdfsTable.getFullName()); HdfsPartition partition; String location; if (partitionSpec_ != null) { partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues()); location = partition.getLocation(); if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { throw new AnalysisException(noWriteAccessErrorMsg + location); } } else { // "default" partition partition = hdfsTable.getPartitionMap().get(ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID); location = hdfsTable.getLocation(); if (!hdfsTable.hasWriteAccess()) { throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation()); } } Preconditions.checkNotNull(partition); // Verify the files being loaded are supported. for (FileStatus fStatus : fs.listStatus(source)) { if (fs.isDirectory(fStatus.getPath())) continue; StringBuilder errorMsg = new StringBuilder(); HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat(); if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) { throw new AnalysisException(errorMsg.toString()); } } } catch (FileNotFoundException e) { throw new AnalysisException("File not found: " + e.getMessage(), e); } catch (IOException e) { throw new AnalysisException("Error accessing filesystem: " + e.getMessage(), e); } }
From source file:org.apache.impala.common.FileSystemUtil.java
License:Apache License
/** * Relocates all visible (non-hidden) files from a source directory to a destination * directory. Files are moved (renamed) to the new location unless the source and * destination directories are in different encryption zones, in which case the files * are copied so that they are decrypted and/or encrypted. Naming conflicts are * resolved by appending a UUID to the base file name. Any sub-directories within the * source directory are skipped. Returns the number of files relocated as part of this * operation./*from w w w . j av a 2s . c o m*/ */ public static int relocateAllVisibleFiles(Path sourceDir, Path destDir) throws IOException { FileSystem destFs = destDir.getFileSystem(CONF); FileSystem sourceFs = sourceDir.getFileSystem(CONF); Preconditions.checkState(destFs.isDirectory(destDir)); Preconditions.checkState(sourceFs.isDirectory(sourceDir)); // Use the same UUID to resolve all file name conflicts. This helps mitigate problems // that might happen if there is a conflict moving a set of files that have // dependent file names. For example, foo.lzo and foo.lzo_index. UUID uuid = UUID.randomUUID(); // Enumerate all the files in the source int numFilesMoved = 0; for (FileStatus fStatus : sourceFs.listStatus(sourceDir)) { if (fStatus.isDirectory()) { if (LOG.isTraceEnabled()) { LOG.trace("Skipping copy of directory: " + fStatus.getPath()); } continue; } else if (isHiddenFile(fStatus.getPath().getName())) { continue; } Path destFile = new Path(destDir, fStatus.getPath().getName()); if (destFs.exists(destFile)) { destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), uuid.toString())); } FileSystemUtil.relocateFile(fStatus.getPath(), destFile, false); ++numFilesMoved; } return numFilesMoved; }
From source file:org.apache.impala.common.FileSystemUtil.java
License:Apache License
/** * Relocates the given file to a new location (either another directory or a * file in the same or different filesystem). The file is generally moved (renamed) to * the new location. However, the file is copied if the source and destination are in * different encryption zones so that the file can be decrypted and/or encrypted, or if * the source and destination are in different filesystems. If renameIfAlreadyExists is * true, no error will be thrown if a file with the same name already exists in the * destination location. Instead, a UUID will be appended to the base file name, * preserving the existing file extension. If renameIfAlreadyExists is false, an * IOException will be thrown if there is a file name conflict. *//*from www.j a v a 2 s .c o m*/ public static void relocateFile(Path sourceFile, Path dest, boolean renameIfAlreadyExists) throws IOException { FileSystem destFs = dest.getFileSystem(CONF); FileSystem sourceFs = sourceFile.getFileSystem(CONF); Path destFile = destFs.isDirectory(dest) ? new Path(dest, sourceFile.getName()) : dest; // If a file with the same name does not already exist in the destination location // then use the same file name. Otherwise, generate a unique file name. if (renameIfAlreadyExists && destFs.exists(destFile)) { Path destDir = destFs.isDirectory(dest) ? dest : dest.getParent(); destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), UUID.randomUUID().toString())); } boolean sameFileSystem = isPathOnFileSystem(sourceFile, destFs); boolean destIsDfs = isDistributedFileSystem(destFs); // If the source and the destination are on different file systems, or in different // encryption zones, files can't be moved from one location to the other and must be // copied instead. boolean sameEncryptionZone = arePathsInSameHdfsEncryptionZone(destFs, sourceFile, destFile); // We can do a rename if the src and dst are in the same encryption zone in the same // distributed filesystem. boolean doRename = destIsDfs && sameFileSystem && sameEncryptionZone; // Alternatively, we can do a rename if the src and dst are on the same // non-distributed filesystem. if (!doRename) doRename = !destIsDfs && sameFileSystem; if (doRename) { if (LOG.isTraceEnabled()) { LOG.trace(String.format("Moving '%s' to '%s'", sourceFile.toString(), destFile.toString())); } // Move (rename) the file. destFs.rename(sourceFile, destFile); return; } if (destIsDfs && sameFileSystem) { Preconditions.checkState(!doRename); // We must copy rather than move if the source and dest are in different // encryption zones. A move would return an error from the NN because a move is a // metadata-only operation and the files would not be encrypted/decrypted properly // on the DNs. if (LOG.isTraceEnabled()) { LOG.trace(String.format("Copying source '%s' to '%s' because HDFS encryption zones are different.", sourceFile, destFile)); } } else { Preconditions.checkState(!sameFileSystem); if (LOG.isTraceEnabled()) { LOG.trace(String.format("Copying '%s' to '%s' between filesystems.", sourceFile, destFile)); } } FileUtil.copy(sourceFs, sourceFile, destFs, destFile, true, true, CONF); }
From source file:org.apache.impala.service.Frontend.java
License:Apache License
/** * Loads a table or partition with one or more data files. If the "overwrite" flag * in the request is true, all existing data in the table/partition will be replaced. * If the "overwrite" flag is false, the files will be added alongside any existing * data files./*from ww w . j a v a2s.co m*/ */ public TLoadDataResp loadTableData(TLoadDataReq request) throws ImpalaException, IOException { TableName tableName = TableName.fromThrift(request.getTable_name()); // Get the destination for the load. If the load is targeting a partition, // this the partition location. Otherwise this is the table location. String destPathString = null; if (request.isSetPartition_spec()) { destPathString = impaladCatalog_ .getHdfsPartition(tableName.getDb(), tableName.getTbl(), request.getPartition_spec()) .getLocation(); } else { destPathString = impaladCatalog_.getTable(tableName.getDb(), tableName.getTbl()).getMetaStoreTable() .getSd().getLocation(); } Path destPath = new Path(destPathString); Path sourcePath = new Path(request.source_path); FileSystem destFs = destPath.getFileSystem(FileSystemUtil.getConfiguration()); FileSystem sourceFs = sourcePath.getFileSystem(FileSystemUtil.getConfiguration()); // Create a temporary directory within the final destination directory to stage the // file move. Path tmpDestPath = FileSystemUtil.makeTmpSubdirectory(destPath); int filesLoaded = 0; if (sourceFs.isDirectory(sourcePath)) { filesLoaded = FileSystemUtil.relocateAllVisibleFiles(sourcePath, tmpDestPath); } else { FileSystemUtil.relocateFile(sourcePath, tmpDestPath, true); filesLoaded = 1; } // If this is an OVERWRITE, delete all files in the destination. if (request.isOverwrite()) { FileSystemUtil.deleteAllVisibleFiles(destPath); } // Move the files from the temporary location to the final destination. FileSystemUtil.relocateAllVisibleFiles(tmpDestPath, destPath); // Cleanup the tmp directory. destFs.delete(tmpDestPath, true); TLoadDataResp response = new TLoadDataResp(); TColumnValue col = new TColumnValue(); String loadMsg = String.format("Loaded %d file(s). Total files in destination location: %d", filesLoaded, FileSystemUtil.getTotalNumVisibleFiles(destPath)); col.setString_val(loadMsg); response.setLoad_summary(new TResultRow(Lists.newArrayList(col))); return response; }
From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsWithOldStep.java
License:Apache License
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment optimizeSegment = cube .getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment); Preconditions.checkNotNull(oldSegment, "cannot find the original segment to be optimized by " + optimizeSegment); KylinConfig kylinConf = cube.getConfig(); Configuration conf = HadoopUtil.getCurrentConfiguration(); ResourceStore rs = ResourceStore.getStore(kylinConf); int averageSamplingPercentage = 0; try {//from www.j ava 2 s . c om //1. Add statistics from optimized segment Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams())); FileSystem hdfs = FileSystem.get(conf); if (!hdfs.exists(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists"); } if (!hdfs.isDirectory(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory"); } Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath, BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFiles == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath); } for (Path item : statisticsFiles) { CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null, optimizeSegment.getConfig(), item); averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(optimizeSegmentStatsReader); } //2. Add statistics from old segment CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig()); averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(oldSegmentStatsReader); logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString()); //3. Store merged statistics for recommend cuboids averageSamplingPercentage = averageSamplingPercentage / 2; Set<Long> cuboidsRecommend = cube.getCuboidsRecommend(); Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size()); for (Long cuboid : cuboidsRecommend) { HLLCounter hll = cuboidHLLMap.get(cuboid); if (hll == null) { logger.warn("Cannot get the row count stats for cuboid " + cuboid); } else { resultCuboidHLLMap.put(cuboid, hll); } } String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams()); CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap, averageSamplingPercentage); try (FSDataInputStream mergedStats = hdfs .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) { // put the statistics to metadata store String statisticsFileName = optimizeSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis()); } //By default, the cube optimization will use in-memory cubing CubingJob cubingJob = (CubingJob) getManager() .getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment); return new ExecuteResult(); } catch (IOException e) { logger.error("fail to merge cuboid statistics", e); return ExecuteResult.createError(e); } }