Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:org.apache.gobblin.hive.orc.HiveOrcSerDeManager.java

License:Apache License

/**
 * Get the schema as a TypeInfo object//from   ww w  . j  ava  2  s .  c  om
 * @param path path that contains the ORC files
 * @param fs {@link FileSystem}
 * @return {@link TypeInfo} with the schema information
 * @throws IOException
 */
public TypeInfo getSchemaFromLatestFile(Path path, FileSystem fs) throws IOException {
    if (fs.isDirectory(path)) {
        List<FileStatus> files = Arrays.asList(fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                try {
                    return ignoredFilePrefixes.stream().noneMatch(e -> path.getName().startsWith(e))
                            && fileExtensions.stream().anyMatch(e -> path.getName().endsWith(e))
                            && isORC(path, fs);
                } catch (IOException e) {
                    log.error("Error checking file for schema retrieval", e);
                    return false;
                }
            }
        }));

        if (files.size() > 0) {
            Collections.sort((files), FileListUtils.LATEST_MOD_TIME_ORDER);
        } else {
            throw new FileNotFoundException("No files in Dataset:" + path + " found for schema retrieval");
        }
        return getSchemaFromLatestFile(files.get(0).getPath(), fs);
    } else {
        return TypeInfoUtils
                .getTypeInfoFromObjectInspector(OrcFile.createReader(fs, path).getObjectInspector());
    }
}

From source file:org.apache.hama.examples.util.WritableUtil.java

License:Apache License

/**
 * This method is used to read vector from specified path in SpMVTest. For
 * test purposes only.//from  w w w  .j a va 2s . co m
 * 
 * @param pathString
 *          input path for vector
 * @param result
 *          instanse of vector writable which should be filled.
 * @param conf
 *          configuration
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public static void readFromFile(String pathString, Writable result, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader = null;
    Path path = new Path(pathString);
    List<String> filePaths = new ArrayList<String>();
    // TODO this deprecation should be fixed.
    if (fs.isDirectory(path)) {
        FileStatus[] stats = fs.listStatus(path);
        for (FileStatus stat : stats) {
            filePaths.add(stat.getPath().toUri().getPath());
        }
    } else if (fs.isFile(path)) {
        filePaths.add(path.toString());
    }
    try {
        for (String filePath : filePaths) {
            reader = new SequenceFile.Reader(fs, new Path(filePath), conf);
            IntWritable key = new IntWritable();
            reader.next(key, result);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null)
            reader.close();
    }
}

From source file:org.apache.hoya.avro.RoleHistoryWriter.java

License:Apache License

/**
 * Find all history entries in a dir. The dir is created if it is
 * not already defined.//www .j  av  a 2 s  .  com
 * 
 * The scan uses the match pattern {@link HoyaKeys#HISTORY_FILENAME_MATCH_PATTERN}
 * while dropping empty files and directories which match the pattern.
 * The list is then sorted with a comparator that sorts on filename,
 * relying on the filename of newer created files being later than the old ones.
 * 
 * 
 *
 * @param fs filesystem
 * @param dir dir to scan
 * @param includeEmptyFiles should empty files be included in the result?
 * @return a possibly empty list
 * @throws IOException IO problems
 * @throws FileNotFoundException if the target dir is actually a path
 */
public List<Path> findAllHistoryEntries(FileSystem fs, Path dir, boolean includeEmptyFiles) throws IOException {
    assert fs != null;
    assert dir != null;
    if (!fs.exists(dir)) {
        fs.mkdirs(dir);
    } else if (!fs.isDirectory(dir)) {
        throw new FileNotFoundException("Not a directory " + dir.toString());
    }

    PathFilter filter = new GlobFilter(HoyaKeys.HISTORY_FILENAME_GLOB_PATTERN);
    FileStatus[] stats = fs.listStatus(dir, filter);
    List<Path> paths = new ArrayList<Path>(stats.length);
    for (FileStatus stat : stats) {
        log.debug("Possible entry: {}", stat.toString());
        if (stat.isFile() && (includeEmptyFiles || stat.getLen() > 0)) {
            paths.add(stat.getPath());
        }
    }
    sortHistoryPaths(paths);
    return paths;
}

From source file:org.apache.hoya.core.build.InstanceBuilder.java

License:Apache License

public void takeSnapshotOfConfDir(Path appconfdir)
        throws IOException, BadConfigException, BadClusterStateException {
    FileSystem srcFS = FileSystem.get(appconfdir.toUri(), conf);
    if (!srcFS.isDirectory(appconfdir)) {
        throw new BadConfigException("Source Configuration directory is not valid: %s", appconfdir.toString());
    }/*from  w  ww.  j  av  a2 s .co  m*/
    // bulk copy
    FsPermission clusterPerms = coreFS.getInstanceDirectoryPermissions();
    // first the original from wherever to the DFS
    HoyaUtils.copyDirectory(conf, appconfdir, instancePaths.snapshotConfPath, clusterPerms);
}

From source file:org.apache.hoya.tools.HoyaUtils.java

License:Apache License

/**
 * Copy a directory to a new FS -both paths must be qualified. If
 * a directory needs to be created, supplied permissions can override
 * the default values. Existing directories are not touched
 * @param conf conf file//from  w  w w  . j  a v  a2 s.c o m
 * @param srcDirPath src dir
 * @param destDirPath dest dir
 * @param permission permission for the dest directory; null means "default"
 * @return # of files copies
 */
public static int copyDirectory(Configuration conf, Path srcDirPath, Path destDirPath, FsPermission permission)
        throws IOException, BadClusterStateException {
    FileSystem srcFS = FileSystem.get(srcDirPath.toUri(), conf);
    FileSystem destFS = FileSystem.get(destDirPath.toUri(), conf);
    //list all paths in the src.
    if (!srcFS.exists(srcDirPath)) {
        throw new FileNotFoundException("Source dir not found " + srcDirPath);
    }
    if (!srcFS.isDirectory(srcDirPath)) {
        throw new FileNotFoundException("Source dir not a directory " + srcDirPath);
    }
    FileStatus[] entries = srcFS.listStatus(srcDirPath);
    int srcFileCount = entries.length;
    if (srcFileCount == 0) {
        return 0;
    }
    if (permission == null) {
        permission = FsPermission.getDirDefault();
    }
    if (!destFS.exists(destDirPath)) {
        new HoyaFileSystem(destFS, conf).createWithPermissions(destDirPath, permission);
    }
    Path[] sourcePaths = new Path[srcFileCount];
    for (int i = 0; i < srcFileCount; i++) {
        FileStatus e = entries[i];
        Path srcFile = e.getPath();
        if (srcFS.isDirectory(srcFile)) {
            throw new IOException("Configuration dir " + srcDirPath + " contains a directory " + srcFile);
        }
        log.debug("copying src conf file {}", srcFile);
        sourcePaths[i] = srcFile;
    }
    log.debug("Copying {} files from {} to dest {}", srcFileCount, srcDirPath, destDirPath);
    FileUtil.copy(srcFS, sourcePaths, destFS, destDirPath, false, true, conf);
    return srcFileCount;
}

From source file:org.apache.impala.analysis.LoadDataStmt.java

License:Apache License

/**
 * Check to see if Impala has the necessary permissions to access the source and dest
 * paths for this LOAD statement (which maps onto a sequence of file move operations,
 * with the requisite permission requirements), and check to see if all files to be
 * moved are in format that Impala understands. Errors are raised as AnalysisExceptions.
 *//*  w w  w. j  a va2 s .  c  o  m*/
private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException {
    // The user must have permission to access the source location. Since the files will
    // be moved from this location, the user needs to have all permission.
    sourceDataPath_.analyze(analyzer, Privilege.ALL);

    // Catch all exceptions thrown by accessing files, and rethrow as AnalysisExceptions.
    try {
        Path source = sourceDataPath_.getPath();
        FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration());
        if (!(fs instanceof DistributedFileSystem) && !(fs instanceof S3AFileSystem)) {
            throw new AnalysisException(String.format(
                    "INPATH location '%s' " + "must point to an HDFS or S3A filesystem.", sourceDataPath_));
        }
        if (!fs.exists(source)) {
            throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_));
        }

        // If the source file is a directory, we must be able to read from and write to
        // it. If the source file is a file, we must be able to read from it, and write to
        // its parent directory (in order to delete the file as part of the move operation).
        FsPermissionChecker checker = FsPermissionChecker.getInstance();

        if (fs.isDirectory(source)) {
            if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' contains no visible files.", sourceDataPath_));
            }
            if (FileSystemUtil.containsVisibleSubdirectory(source)) {
                throw new AnalysisException(String.format(
                        "INPATH location '%s' cannot contain non-hidden subdirectories.", sourceDataPath_));
            }
            if (!checker.getPermissions(fs, source).checkPermissions(FsAction.READ_WRITE)) {
                throw new AnalysisException(String.format(
                        "Unable to LOAD DATA from %s "
                                + "because Impala does not have READ and WRITE permissions on this directory",
                        source));
            }
        } else {
            // INPATH names a file.
            if (FileSystemUtil.isHiddenFile(source.getName())) {
                throw new AnalysisException(
                        String.format("INPATH location '%s' points to a hidden file.", source));
            }

            if (!checker.getPermissions(fs, source.getParent()).checkPermissions(FsAction.WRITE)) {
                throw new AnalysisException(String.format("Unable to LOAD DATA from %s "
                        + "because Impala does not have WRITE permissions on its parent " + "directory %s",
                        source, source.getParent()));
            }

            if (!checker.getPermissions(fs, source).checkPermissions(FsAction.READ)) {
                throw new AnalysisException(String.format("Unable to LOAD DATA from %s "
                        + "because Impala does not have READ permissions on this file", source));
            }
        }

        String noWriteAccessErrorMsg = String.format(
                "Unable to LOAD DATA into "
                        + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ",
                hdfsTable.getFullName());

        HdfsPartition partition;
        String location;
        if (partitionSpec_ != null) {
            partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues());
            location = partition.getLocation();
            if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
                throw new AnalysisException(noWriteAccessErrorMsg + location);
            }
        } else {
            // "default" partition
            partition = hdfsTable.getPartitionMap().get(ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID);
            location = hdfsTable.getLocation();
            if (!hdfsTable.hasWriteAccess()) {
                throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation());
            }
        }
        Preconditions.checkNotNull(partition);

        // Verify the files being loaded are supported.
        for (FileStatus fStatus : fs.listStatus(source)) {
            if (fs.isDirectory(fStatus.getPath()))
                continue;
            StringBuilder errorMsg = new StringBuilder();
            HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat();
            if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) {
                throw new AnalysisException(errorMsg.toString());
            }
        }
    } catch (FileNotFoundException e) {
        throw new AnalysisException("File not found: " + e.getMessage(), e);
    } catch (IOException e) {
        throw new AnalysisException("Error accessing filesystem: " + e.getMessage(), e);
    }
}

From source file:org.apache.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Relocates all visible (non-hidden) files from a source directory to a destination
 * directory. Files are moved (renamed) to the new location unless the source and
 * destination directories are in different encryption zones, in which case the files
 * are copied so that they are decrypted and/or encrypted. Naming conflicts are
 * resolved by appending a UUID to the base file name. Any sub-directories within the
 * source directory are skipped. Returns the number of files relocated as part of this
 * operation./*from w w  w  . j av a  2s  .  c  o m*/
 */
public static int relocateAllVisibleFiles(Path sourceDir, Path destDir) throws IOException {
    FileSystem destFs = destDir.getFileSystem(CONF);
    FileSystem sourceFs = sourceDir.getFileSystem(CONF);
    Preconditions.checkState(destFs.isDirectory(destDir));
    Preconditions.checkState(sourceFs.isDirectory(sourceDir));

    // Use the same UUID to resolve all file name conflicts. This helps mitigate problems
    // that might happen if there is a conflict moving a set of files that have
    // dependent file names. For example, foo.lzo and foo.lzo_index.
    UUID uuid = UUID.randomUUID();

    // Enumerate all the files in the source
    int numFilesMoved = 0;
    for (FileStatus fStatus : sourceFs.listStatus(sourceDir)) {
        if (fStatus.isDirectory()) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("Skipping copy of directory: " + fStatus.getPath());
            }
            continue;
        } else if (isHiddenFile(fStatus.getPath().getName())) {
            continue;
        }

        Path destFile = new Path(destDir, fStatus.getPath().getName());
        if (destFs.exists(destFile)) {
            destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), uuid.toString()));
        }
        FileSystemUtil.relocateFile(fStatus.getPath(), destFile, false);
        ++numFilesMoved;
    }
    return numFilesMoved;
}

From source file:org.apache.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Relocates the given file to a new location (either another directory or a
 * file in the same or different filesystem). The file is generally moved (renamed) to
 * the new location. However, the file is copied if the source and destination are in
 * different encryption zones so that the file can be decrypted and/or encrypted, or if
 * the source and destination are in different filesystems. If renameIfAlreadyExists is
 * true, no error will be thrown if a file with the same name already exists in the
 * destination location. Instead, a UUID will be appended to the base file name,
 * preserving the existing file extension. If renameIfAlreadyExists is false, an
 * IOException will be thrown if there is a file name conflict.
 *//*from  www.j  a v  a 2  s  .c  o  m*/
public static void relocateFile(Path sourceFile, Path dest, boolean renameIfAlreadyExists) throws IOException {
    FileSystem destFs = dest.getFileSystem(CONF);
    FileSystem sourceFs = sourceFile.getFileSystem(CONF);

    Path destFile = destFs.isDirectory(dest) ? new Path(dest, sourceFile.getName()) : dest;
    // If a file with the same name does not already exist in the destination location
    // then use the same file name. Otherwise, generate a unique file name.
    if (renameIfAlreadyExists && destFs.exists(destFile)) {
        Path destDir = destFs.isDirectory(dest) ? dest : dest.getParent();
        destFile = new Path(destDir, appendToBaseFileName(destFile.getName(), UUID.randomUUID().toString()));
    }
    boolean sameFileSystem = isPathOnFileSystem(sourceFile, destFs);
    boolean destIsDfs = isDistributedFileSystem(destFs);

    // If the source and the destination are on different file systems, or in different
    // encryption zones, files can't be moved from one location to the other and must be
    // copied instead.
    boolean sameEncryptionZone = arePathsInSameHdfsEncryptionZone(destFs, sourceFile, destFile);
    // We can do a rename if the src and dst are in the same encryption zone in the same
    // distributed filesystem.
    boolean doRename = destIsDfs && sameFileSystem && sameEncryptionZone;
    // Alternatively, we can do a rename if the src and dst are on the same
    // non-distributed filesystem.
    if (!doRename)
        doRename = !destIsDfs && sameFileSystem;
    if (doRename) {
        if (LOG.isTraceEnabled()) {
            LOG.trace(String.format("Moving '%s' to '%s'", sourceFile.toString(), destFile.toString()));
        }
        // Move (rename) the file.
        destFs.rename(sourceFile, destFile);
        return;
    }
    if (destIsDfs && sameFileSystem) {
        Preconditions.checkState(!doRename);
        // We must copy rather than move if the source and dest are in different
        // encryption zones. A move would return an error from the NN because a move is a
        // metadata-only operation and the files would not be encrypted/decrypted properly
        // on the DNs.
        if (LOG.isTraceEnabled()) {
            LOG.trace(String.format("Copying source '%s' to '%s' because HDFS encryption zones are different.",
                    sourceFile, destFile));
        }
    } else {
        Preconditions.checkState(!sameFileSystem);
        if (LOG.isTraceEnabled()) {
            LOG.trace(String.format("Copying '%s' to '%s' between filesystems.", sourceFile, destFile));
        }
    }
    FileUtil.copy(sourceFs, sourceFile, destFs, destFile, true, true, CONF);
}

From source file:org.apache.impala.service.Frontend.java

License:Apache License

/**
 * Loads a table or partition with one or more data files. If the "overwrite" flag
 * in the request is true, all existing data in the table/partition will be replaced.
 * If the "overwrite" flag is false, the files will be added alongside any existing
 * data files./*from ww  w .  j a  v  a2s.co  m*/
 */
public TLoadDataResp loadTableData(TLoadDataReq request) throws ImpalaException, IOException {
    TableName tableName = TableName.fromThrift(request.getTable_name());

    // Get the destination for the load. If the load is targeting a partition,
    // this the partition location. Otherwise this is the table location.
    String destPathString = null;
    if (request.isSetPartition_spec()) {
        destPathString = impaladCatalog_
                .getHdfsPartition(tableName.getDb(), tableName.getTbl(), request.getPartition_spec())
                .getLocation();
    } else {
        destPathString = impaladCatalog_.getTable(tableName.getDb(), tableName.getTbl()).getMetaStoreTable()
                .getSd().getLocation();
    }

    Path destPath = new Path(destPathString);
    Path sourcePath = new Path(request.source_path);
    FileSystem destFs = destPath.getFileSystem(FileSystemUtil.getConfiguration());
    FileSystem sourceFs = sourcePath.getFileSystem(FileSystemUtil.getConfiguration());

    // Create a temporary directory within the final destination directory to stage the
    // file move.
    Path tmpDestPath = FileSystemUtil.makeTmpSubdirectory(destPath);

    int filesLoaded = 0;
    if (sourceFs.isDirectory(sourcePath)) {
        filesLoaded = FileSystemUtil.relocateAllVisibleFiles(sourcePath, tmpDestPath);
    } else {
        FileSystemUtil.relocateFile(sourcePath, tmpDestPath, true);
        filesLoaded = 1;
    }

    // If this is an OVERWRITE, delete all files in the destination.
    if (request.isOverwrite()) {
        FileSystemUtil.deleteAllVisibleFiles(destPath);
    }

    // Move the files from the temporary location to the final destination.
    FileSystemUtil.relocateAllVisibleFiles(tmpDestPath, destPath);
    // Cleanup the tmp directory.
    destFs.delete(tmpDestPath, true);
    TLoadDataResp response = new TLoadDataResp();
    TColumnValue col = new TColumnValue();
    String loadMsg = String.format("Loaded %d file(s). Total files in destination location: %d", filesLoaded,
            FileSystemUtil.getTotalNumVisibleFiles(destPath));
    col.setString_val(loadMsg);
    response.setLoad_summary(new TResultRow(Lists.newArrayList(col)));
    return response;
}

From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsWithOldStep.java

License:Apache License

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube
            .getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {//from www.j  ava  2 s .  c  om
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage);

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}