List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.cloudera.sqoop.util.AppendUtils.java
License:Apache License
/** * Move files from source to target using a specified starting partition. *///from w ww . j a v a 2 s .c o m private void moveFiles(FileSystem fs, Path sourceDir, Path targetDir, int partitionStart) throws IOException { NumberFormat numpart = NumberFormat.getInstance(); numpart.setMinimumIntegerDigits(PARTITION_DIGITS); numpart.setGroupingUsed(false); Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*"); FileStatus[] tempFiles = fs.listStatus(sourceDir); if (null == tempFiles) { // If we've already checked that the dir exists, and now it can't be // listed, this is a genuine error (permissions, fs integrity, or other). throw new IOException("Could not list files from " + sourceDir); } // Move and rename files & directories from temporary to target-dir thus // appending file's next partition for (FileStatus fileStat : tempFiles) { if (!fileStat.isDir()) { // Move imported data files String filename = fileStat.getPath().getName(); Matcher mat = patt.matcher(filename); if (mat.matches()) { String name = getFilename(filename); String fileToMove = name.concat(numpart.format(partitionStart++)); String extension = getFileExtension(filename); if (extension != null) { fileToMove = fileToMove.concat(extension); } LOG.debug("Filename: " + filename + " repartitioned to: " + fileToMove); fs.rename(fileStat.getPath(), new Path(targetDir, fileToMove)); } } else { // Move directories (_logs & any other) String dirName = fileStat.getPath().getName(); Path path = new Path(targetDir, dirName); int dirNumber = 0; while (fs.exists(path)) { path = new Path(targetDir, dirName.concat("-").concat(numpart.format(dirNumber++))); } LOG.debug("Directory: " + dirName + " renamed to: " + path.getName()); fs.rename(fileStat.getPath(), path); } } }
From source file:com.conversantmedia.mapreduce.tool.BaseTool.java
License:Apache License
protected List<FileStatus> getInputFiles(Path input) throws IOException { FileSystem fs = FileSystem.get(getConf()); List<FileStatus> status = new ArrayList<>(); if (fs.exists(input)) { FileStatus inputStatus = fs.getFileStatus(input); if (inputStatus.isDirectory()) { // Move all files under this directory status = Arrays.asList(fs.listStatus(input)); } else {/*w w w. j ava2s . c om*/ status.add(inputStatus); } } // Must be a glob path else { FileStatus[] statusAry = fs.globStatus(input); status.addAll(Arrays.asList(statusAry)); } return status; }
From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java
License:Apache License
public static void createTables(String database, TajoClient client) throws Exception { String dataDir = getDataDir(); if (dataDir == null || dataDir.isEmpty()) { throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>"); }/*from w ww . j a v a 2s.c o m*/ if (dataDir.startsWith("hdfs://")) { Path path = new Path(dataDir); FileSystem fs = path.getFileSystem(new Configuration()); for (String eachTable : tableNames) { Path tableDataDir = new Path(path, eachTable); if (!fs.exists(tableDataDir)) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } else { File dataDirFile = new File(dataDir); if (!dataDirFile.exists()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists."); } if (dataDirFile.isFile()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory."); } for (String eachTable : tableNames) { File tableDataDir = new File(dataDirFile, eachTable); if (!tableDataDir.exists()) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } KeyValueSet opt = new KeyValueSet(); opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); LOG.info("Create database: " + database); client.executeQuery("create database if not exists " + database); Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString()); Path ddlPath = new Path(tpcdsResourceURL, "ddl"); FileSystem localFs = FileSystem.getLocal(new Configuration()); FileStatus[] files = localFs.listStatus(ddlPath); String dataDirWithPrefix = dataDir; if (dataDir.indexOf("://") < 0) { dataDirWithPrefix = "file://" + dataDir; } for (FileStatus eachFile : files) { if (eachFile.isFile()) { String tableName = eachFile.getPath().getName().split("\\.")[0]; String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri())); query = query.replace("${DB}", database); query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName); LOG.info("Create table:" + tableName + "," + query); client.executeQuery(query); } } }
From source file:com.datasalt.pangool.utils.DCUtils.java
License:Apache License
/** * Given a file post-fix, locate a file in the DistributedCache. It iterates over all the local files and returns the * first one that meets this condition.//from w w w . ja v a2 s . c o m * * @param conf * The Hadoop Configuration. * @param filePostFix * The file post-fix. * @throws IOException */ public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException { FileSystem fS = FileSystem.get(conf); Path locatedFile = null; if (fS.equals(FileSystem.getLocal(conf))) { // We use the File Java API in local because the Hadoop Path, FileSystem, etc is too slow for tests that // need to call this method a lot File tmpFolder = new File(conf.get("hadoop.tmp.dir")); for (File file : tmpFolder.listFiles()) { if (file.getName().endsWith(filePostFix)) { locatedFile = new Path(file.toString()); break; } } } else { Path tmpHdfsFolder = new Path(conf.get(HDFS_TMP_FOLDER_CONF, conf.get("hadoop.tmp.dir"))); for (FileStatus fSt : fS.listStatus(tmpHdfsFolder)) { Path path = fSt.getPath(); if (path.toString().endsWith(filePostFix)) { locatedFile = path; break; } } } return locatedFile; }
From source file:com.datasalt.utils.commons.BaseConfigurationFactory.java
License:Apache License
private void populate(Configuration conf) throws IOException { FileSystem dFs = FileSystem.get(conf); if (conf.get("fs.default.name").startsWith("file:")) { return;//www. java 2 s . c om } //String projectName = projectConf.getProjectName(); Path libPath = new Path("/" + projectName + "/lib"); Path confPath = new Path("/" + projectName + "/conf"); /* * Add config folder to classpath */ log.info("Adding HDFS Path " + confPath + " to classpath"); DistributedCache.addFileToClassPath(confPath, conf); FileStatus[] libraries = dFs.listStatus(libPath); log.info("Adding all files in " + libPath + " to classpath"); for (FileStatus library : libraries) { /* * Add each JAR to classpath - tiene su truco! =| */ DistributedCache.addFileToClassPath(new Path(libPath, library.getPath().getName()), conf); } }
From source file:com.datatorrent.lib.io.IdempotentStorageManagerTest.java
License:Open Source License
@Test public void testDelete() throws IOException { Map<Integer, String> dataOf1 = Maps.newHashMap(); dataOf1.put(1, "one"); dataOf1.put(2, "two"); dataOf1.put(3, "three"); Map<Integer, String> dataOf2 = Maps.newHashMap(); dataOf2.put(4, "four"); dataOf2.put(5, "five"); dataOf2.put(6, "six"); Map<Integer, String> dataOf3 = Maps.newHashMap(); dataOf2.put(7, "seven"); dataOf2.put(8, "eight"); dataOf2.put(9, "nine"); testMeta.storageManager.save(dataOf1, 1, 1); testMeta.storageManager.save(dataOf2, 2, 1); testMeta.storageManager.save(dataOf3, 3, 1); testMeta.storageManager.partitioned(Lists.<IdempotentStorageManager>newArrayList(testMeta.storageManager), Sets.newHashSet(2, 3));//from w ww . ja v a 2 s . c o m testMeta.storageManager.setup(testMeta.context); testMeta.storageManager.deleteUpTo(1, 1); Path appPath = new Path(testMeta.recoveryPath + '/' + testMeta.context.getValue(DAG.APPLICATION_ID)); FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration()); Assert.assertEquals("no data for 1", 0, fs.listStatus(new Path(appPath, Integer.toString(1))).length); Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2)))); Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3)))); }
From source file:com.datatorrent.lib.util.FSWindowDataManagerTest.java
License:Apache License
@Test public void testDelete() throws IOException { testMeta.storageManager.setup(testMeta.context); Map<Integer, String> dataOf1 = Maps.newHashMap(); dataOf1.put(1, "one"); dataOf1.put(2, "two"); dataOf1.put(3, "three"); Map<Integer, String> dataOf2 = Maps.newHashMap(); dataOf2.put(4, "four"); dataOf2.put(5, "five"); dataOf2.put(6, "six"); Map<Integer, String> dataOf3 = Maps.newHashMap(); dataOf2.put(7, "seven"); dataOf2.put(8, "eight"); dataOf2.put(9, "nine"); for (int i = 1; i <= 9; ++i) { testMeta.storageManager.save(dataOf1, 1, i); }/*w ww . j av a 2 s. c o m*/ testMeta.storageManager.save(dataOf2, 2, 1); testMeta.storageManager.save(dataOf3, 3, 1); testMeta.storageManager.partitioned(Lists.<WindowDataManager>newArrayList(testMeta.storageManager), Sets.newHashSet(2, 3)); testMeta.storageManager.setup(testMeta.context); testMeta.storageManager.deleteUpTo(1, 6); Path appPath = new Path(testMeta.applicationPath + '/' + testMeta.storageManager.getRecoveryPath()); FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration()); FileStatus[] fileStatuses = fs.listStatus(new Path(appPath, Integer.toString(1))); Assert.assertEquals("number of windows for 1", 3, fileStatuses.length); TreeSet<String> windows = Sets.newTreeSet(); for (FileStatus fileStatus : fileStatuses) { windows.add(fileStatus.getPath().getName()); } Assert.assertEquals("window list for 1", Sets.newTreeSet(Arrays.asList("7", "8", "9")), windows); Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2)))); Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3)))); testMeta.storageManager.teardown(); }
From source file:com.datatorrent.lib.util.WindowDataManagerTest.java
License:Apache License
@Test public void testDelete() throws IOException { Map<Integer, String> dataOf1 = Maps.newHashMap(); dataOf1.put(1, "one"); dataOf1.put(2, "two"); dataOf1.put(3, "three"); Map<Integer, String> dataOf2 = Maps.newHashMap(); dataOf2.put(4, "four"); dataOf2.put(5, "five"); dataOf2.put(6, "six"); Map<Integer, String> dataOf3 = Maps.newHashMap(); dataOf2.put(7, "seven"); dataOf2.put(8, "eight"); dataOf2.put(9, "nine"); for (int i = 1; i <= 9; ++i) { testMeta.storageManager.save(dataOf1, 1, i); }//w w w . jav a 2 s .c o m testMeta.storageManager.save(dataOf2, 2, 1); testMeta.storageManager.save(dataOf3, 3, 1); testMeta.storageManager.partitioned(Lists.<WindowDataManager>newArrayList(testMeta.storageManager), Sets.newHashSet(2, 3)); testMeta.storageManager.setup(testMeta.context); testMeta.storageManager.deleteUpTo(1, 6); Path appPath = new Path(testMeta.applicationPath + '/' + testMeta.storageManager.getRecoveryPath()); FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration()); FileStatus[] fileStatuses = fs.listStatus(new Path(appPath, Integer.toString(1))); Assert.assertEquals("number of windows for 1", 3, fileStatuses.length); TreeSet<String> windows = Sets.newTreeSet(); for (FileStatus fileStatus : fileStatuses) { windows.add(fileStatus.getPath().getName()); } Assert.assertEquals("window list for 1", Sets.newLinkedHashSet(Arrays.asList("7", "8", "9")), windows); Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2)))); Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3)))); }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
public void copyInitialState(Path origAppDir) throws IOException { // locate previous snapshot String newAppDir = this.dag.assertAppPath(); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf); // read snapshot against new dependencies Object snapshot = recoveryHandler.restore(); if (snapshot == null) { throw new IllegalArgumentException("No previous application state found in " + origAppDir); }//from www .j a v a 2s .c om InputStream logIs = recoveryHandler.getLog(); // modify snapshot state to switch app id ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf); Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS); FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf); // remove the path that was created by the storage agent during deserialization and replacement fs.delete(checkpointPath, true); // write snapshot to new location recoveryHandler = new FSRecoveryHandler(newAppDir, conf); recoveryHandler.save(snapshot); OutputStream logOs = recoveryHandler.rotateLog(); IOUtils.copy(logIs, logOs); logOs.flush(); logOs.close(); logIs.close(); // copy sub directories that are not present in target FileStatus[] lFiles = fs.listStatus(origAppDir); for (FileStatus f : lFiles) { if (f.isDirectory()) { String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir); if (!fs.exists(new Path(targetPath))) { LOG.debug("Copying {} to {}", f.getPath(), targetPath); FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf); //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf); } else { LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath); //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777)); } } } }
From source file:com.datatorrent.stram.util.FSUtil.java
License:Apache License
/** * Copied from FileUtil to transfer ownership * * @param srcFS//from w w w .j a v a2 s. c o m * @param srcStatus * @param dstFS * @param dst * @param deleteSource * @param overwrite * @param conf * @return * @throws IOException */ public static boolean copy(FileSystem srcFS, FileStatus srcStatus, FileSystem dstFS, Path dst, boolean deleteSource, boolean overwrite, Configuration conf) throws IOException { Path src = srcStatus.getPath(); //dst = checkDest(src.getName(), dstFS, dst, overwrite); if (srcStatus.isDirectory()) { //checkDependencies(srcFS, src, dstFS, dst); if (!mkdirs(dstFS, dst)) { return false; } FileStatus contents[] = srcFS.listStatus(src); for (int i = 0; i < contents.length; i++) { copy(srcFS, contents[i], dstFS, new Path(dst, contents[i].getPath().getName()), deleteSource, overwrite, conf); } } else { InputStream in = null; OutputStream out = null; try { in = srcFS.open(src); out = dstFS.create(dst, overwrite); org.apache.hadoop.io.IOUtils.copyBytes(in, out, conf, true); } catch (IOException e) { org.apache.hadoop.io.IOUtils.closeStream(out); org.apache.hadoop.io.IOUtils.closeStream(in); throw e; } } // TODO: change group and limit write to group if (srcStatus.isDirectory()) { dstFS.setPermission(dst, new FsPermission((short) 0777)); } else { dstFS.setPermission(dst, new FsPermission((short) 0777)/*"ugo+w"*/); } //dstFS.setOwner(dst, null, srcStatus.getGroup()); /* try { // transfer owner // DOES NOT WORK only super user can change file owner dstFS.setOwner(dst, srcStatus.getOwner(), srcStatus.getGroup()); } catch (IOException e) { LOG.warn("Failed to change owner on {} to {}", dst, srcStatus.getOwner(), e); throw e; } */ if (deleteSource) { return srcFS.delete(src, true); } else { return true; } }