List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Expands all directories passed as input and keeps only valid data tables. * * @return A list of all the data tables found under the input directories. *///from w ww .j a v a2 s. c o m @Override protected List<FileStatus> listStatus(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); List<FileStatus> files = super.listStatus(job); DataTablePathFilter dataTableFilter = getDataTableFilter(conf); files = cleanUpBackupDir(files); for (int i = 0; i < files.size(); i++) { FileStatus file = files.get(i); Path p = file.getPath(); // Expand if directory if (file.isDirectory() && p != null) { LOG.info("Expanding {}", p); FileSystem fs = p.getFileSystem(conf); FileStatus[] children = fs.listStatus(p); List<FileStatus> listChildren = Lists.newArrayList(children); listChildren = cleanUpBackupDir(listChildren); files.addAll(i + 1, listChildren); } if (!dataTableFilter.accept(file.getPath())) { LOG.info("Removing {}", file.getPath()); files.remove(i); i--; } } return files; }
From source file:com.kxen.han.projection.giraph.BspCase.java
License:Apache License
/** * Get the single part file status and make sure there is only one part * * @param conf Configuration to get the file system from * @param partDirPath Directory where the single part file should exist * @return Single part file status/*w ww . j a va 2 s. c o m*/ * @throws IOException */ public static FileStatus getSinglePartFileStatus(Configuration conf, Path partDirPath) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus singlePartFileStatus = null; int partFiles = 0; for (FileStatus fileStatus : fs.listStatus(partDirPath)) { if (fileStatus.getPath().getName().equals("part-m-00000")) { singlePartFileStatus = fileStatus; } if (fileStatus.getPath().getName().startsWith("part-m-")) { ++partFiles; } } Preconditions.checkState(partFiles == 1, "getSinglePartFile: Part file " + "count should be 1, but is " + partFiles); return singlePartFileStatus; }
From source file:com.kylinolap.dict.lookup.HiveTable.java
License:Apache License
private FileStatus findOnlyFile(String hdfsDir, FileSystem fs) throws FileNotFoundException, IOException { FileStatus[] files = fs.listStatus(new Path(hdfsDir)); ArrayList<FileStatus> nonZeroFiles = Lists.newArrayList(); for (FileStatus f : files) { if (f.getLen() > 0) nonZeroFiles.add(f);/*from www.ja va 2 s . c o m*/ } if (nonZeroFiles.size() != 1) throw new IllegalStateException( "Expect 1 and only 1 non-zero file under " + hdfsDir + ", but find " + nonZeroFiles.size()); return nonZeroFiles.get(0); }
From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java
License:Apache License
public void addInputDirs(String input, Job job) throws IOException { for (String inp : StringSplitter.split(input, ",")) { inp = inp.trim();/*from ww w .j ava 2 s . c o m*/ if (inp.endsWith("/*")) { inp = inp.substring(0, inp.length() - 2); FileSystem fs = FileSystem.get(job.getConfiguration()); Path path = new Path(inp); FileStatus[] fileStatuses = fs.listStatus(path); boolean hasDir = false; for (FileStatus stat : fileStatuses) { if (stat.isDirectory()) { hasDir = true; addInputDirs(stat.getPath().toString(), job); } } if (fileStatuses.length > 0 && !hasDir) { addInputDirs(path.toString(), job); } } else { System.out.println("Add input " + inp); FileInputFormat.addInputPath(job, new Path(inp)); } } }
From source file:com.kylinolap.job.hadoop.cube.StorageCleanupJob.java
License:Apache License
private void cleanUnusedHdfsFiles(Configuration conf) throws IOException { JobEngineConfig engineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv()); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); FileSystem fs = FileSystem.get(conf); List<String> allHdfsPathsNeedToBeDeleted = new ArrayList<String>(); // GlobFilter filter = new // GlobFilter(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() // + "/kylin-.*"); FileStatus[] fStatus = fs.listStatus(new Path(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory())); for (FileStatus status : fStatus) { String path = status.getPath().getName(); // System.out.println(path); if (path.startsWith(JobInstance.JOB_WORKING_DIR_PREFIX)) { String kylinJobPath = engineConfig.getHdfsWorkingDirectory() + "/" + path; allHdfsPathsNeedToBeDeleted.add(kylinJobPath); }//from w w w . j a va2 s. c o m } List<JobInstance> allJobs = JobDAO.getInstance(KylinConfig.getInstanceFromEnv()).listAllJobs(); for (JobInstance jobInstance : allJobs) { // only remove FINISHED and DISCARDED job intermediate files if (isJobInUse(jobInstance) == true) { String path = JobInstance.getJobWorkingDir(jobInstance, engineConfig); allHdfsPathsNeedToBeDeleted.remove(path); log.info("Remove " + path + " from deletion list, as the path belongs to job " + jobInstance.getUuid() + " with status " + jobInstance.getStatus()); } } // remove every segment working dir from deletion list for (CubeInstance cube : cubeMgr.listAllCubes()) { for (CubeSegment seg : cube.getSegments()) { String jobUuid = seg.getLastBuildJobID(); if (jobUuid != null && jobUuid.equals("") == false) { String path = JobInstance.getJobWorkingDir(jobUuid, engineConfig.getHdfsWorkingDirectory()); allHdfsPathsNeedToBeDeleted.remove(path); log.info("Remove " + path + " from deletion list, as the path belongs to segment " + seg + " of cube " + cube.getName()); } } } if (delete == true) { // remove files for (String hdfsPath : allHdfsPathsNeedToBeDeleted) { log.info("Deleting hdfs path " + hdfsPath); Path p = new Path(hdfsPath); if (fs.exists(p) == true) { fs.delete(p, true); log.info("Deleted hdfs path " + hdfsPath); } else { log.info("Hdfs path " + hdfsPath + "does not exist"); } } } else { System.out.println("--------------- HDFS Path To Be Deleted ---------------"); for (String hdfsPath : allHdfsPathsNeedToBeDeleted) { System.out.println(hdfsPath); } System.out.println("-------------------------------------------------------"); } }
From source file:com.kylinolap.job.tools.DeployCoprocessorCLI.java
License:Apache License
public static Path getNewestCoprocessorJar(KylinConfig config, FileSystem fileSystem) throws IOException { Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, config); FileStatus newestJar = null;/* ww w . j a v a2 s . c o m*/ for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) { if (fileStatus.getPath().toString().endsWith(".jar")) { if (newestJar == null) { newestJar = fileStatus; } else { if (newestJar.getModificationTime() < fileStatus.getModificationTime()) newestJar = fileStatus; } } } if (newestJar == null) return null; Path path = newestJar.getPath().makeQualified(fileSystem.getUri(), null); logger.info("The newest coprocessor is " + path.toString()); return path; }
From source file:com.kylinolap.job.tools.DeployCoprocessorCLI.java
License:Apache License
public static Path uploadCoprocessorJar(String localCoprocessorJar, FileSystem fileSystem, Set<String> oldJarPaths) throws IOException { Path uploadPath = null;/*from ww w . j a v a 2s. co m*/ File localCoprocessorFile = new File(localCoprocessorJar); // check existing jars if (oldJarPaths == null) { oldJarPaths = new HashSet<String>(); } Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, KylinConfig.getInstanceFromEnv()); for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) { if (fileStatus.getLen() == localCoprocessorJar.length() && fileStatus.getModificationTime() == localCoprocessorFile.lastModified()) { uploadPath = fileStatus.getPath(); break; } String filename = fileStatus.getPath().toString(); if (filename.endsWith(".jar")) { oldJarPaths.add(filename); } } // upload if not existing if (uploadPath == null) { // figure out a unique new jar file name Set<String> oldJarNames = new HashSet<String>(); for (String path : oldJarPaths) { oldJarNames.add(new Path(path).getName()); } String baseName = getBaseFileName(localCoprocessorJar); String newName = null; int i = 0; while (newName == null) { newName = baseName + "-" + (i++) + ".jar"; if (oldJarNames.contains(newName)) newName = null; } // upload uploadPath = new Path(coprocessorDir, newName); FileInputStream in = null; FSDataOutputStream out = null; try { in = new FileInputStream(localCoprocessorFile); out = fileSystem.create(uploadPath); IOUtils.copy(in, out); } finally { IOUtils.closeQuietly(in); IOUtils.closeQuietly(out); } fileSystem.setTimes(uploadPath, localCoprocessorFile.lastModified(), System.currentTimeMillis()); } uploadPath = uploadPath.makeQualified(fileSystem.getUri(), null); return uploadPath; }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public String[] getFileNames(long companyId, long repositoryId, String dirName) throws SystemException { Path fullPath = HadoopManager.getFullDirPath(companyId, repositoryId, dirName); try {//from w w w . ja v a2 s . com FileSystem fileSystem = HadoopManager.getFileSystem(); FileStatus[] listStatus = fileSystem.listStatus(fullPath); if ((listStatus == null) || (listStatus.length < 1)) { return new String[0]; } List<String> fileNameList = new ArrayList<String>(listStatus.length); for (FileStatus fileStatus : listStatus) { String fileStatusPathString = fileStatus.getPath().toString(); int pos = fileStatusPathString.indexOf(dirName); if (pos != -1) { fileStatusPathString = fileStatusPathString.substring(pos); } fileNameList.add(fileStatusPathString); } return fileNameList.toArray(new String[fileNameList.size()]); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
protected void deleteEmptyAncestors(long companyId, long repositoryId, Path path) throws SystemException { try {/*from ww w . ja v a 2 s . co m*/ FileSystem fileSystem = HadoopManager.getFileSystem(); FileStatus[] listStatus = fileSystem.listStatus(path); if ((listStatus == null) || (listStatus.length > 0)) { return; } Path parentPath = path.getParent(); if (fileSystem.delete(path, true) && fileSystem.exists(parentPath)) { deleteEmptyAncestors(companyId, repositoryId, parentPath); } } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.lightboxtechnologies.spectrum.ExtractData.java
License:Apache License
protected static void chmodR(FileSystem fs, Path p) throws IOException { final FsPermission perm = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL); final FileStatus[] list = fs.listStatus(p); for (FileStatus f : list) { if (f.isDir()) { chmodR(fs, f.getPath());/*from ww w.jav a2 s.c o m*/ } fs.setPermission(f.getPath(), perm); } fs.setPermission(p, perm); }