List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.toy.Client.java
License:Apache License
private static void registerLocalResource(Map<String, LocalResource> localResources, ApplicationId appId, FileSystem fs, Path src) throws IOException { String pathSuffix = Constants.TOY_PREFIX + appId.toString() + "/" + src.getName(); Path dst = new Path(fs.getHomeDirectory(), pathSuffix); LOG.info("Copy {} from local filesystem to {} and add to local environment", src.getName(), dst.toUri()); fs.copyFromLocalFile(false, true, src, dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put(src.getName(), amJarRsrc); }
From source file:com.toy.Client.java
License:Apache License
private void uploadDepAndRegister(Map<String, LocalResource> localResources, ApplicationId appId, FileSystem fs, String depname) throws IOException { File dep = new File(depname); if (!dep.exists()) throw new IOException(dep.getAbsolutePath() + " does not exist"); Path dst = new Path(fs.getHomeDirectory(), Constants.TOY_PREFIX + appId.toString() + "/" + dep.getName()); LOG.info("Copy {} from local filesystem to {} and add to local environment", dep.getName(), dst.toUri()); FileInputStream input = new FileInputStream(dep); final FSDataOutputStream outputStream = fs.create(dst, true); ByteStreams.copy(input, outputStream); input.close();/*from w w w. j av a 2 s.c o m*/ outputStream.close(); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); FileStatus destStatus = fs.getFileStatus(dst); amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put(dep.getName(), amJarRsrc); }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
public static void main(String[] args) throws IOException { Path baseDir = null;/*w w w. ja va2 s . c om*/ String localPath = null; String preservePath = null; String sIgnoreTablesFilename = null; String sNoPreserveFilename = null; String sDateString = null; long size = 0; // UNIX dates for right now long now = new java.util.Date().getTime() / 1000; long maxDate = now; for (int i = 0; i < args.length; i++) { if (args[i].equals("--hdfs-path")) { baseDir = new Path(args[++i]); continue; } if (args[i].equals("--local-path")) { localPath = args[++i]; continue; } if (args[i].equals("--preserve-path")) { preservePath = args[++i]; continue; } if (args[i].equals("--no-preserve")) { sNoPreserveFilename = args[++i]; continue; } if (args[i].equals("--ignore-tables")) { sIgnoreTablesFilename = args[++i]; continue; } if (args[i].equals("--sleep")) { try { m_nSleepSeconds = Integer.parseInt(args[++i]); } catch (Exception e) { System.err.println("ERROR: " + e.toString() + "\n"); usage(); } continue; } if (args[i].equals("--dry-run")) { m_bDryRun = true; continue; } if (args[i].equals("--date")) { sDateString = args[++i]; continue; } if (args[i].equals("--max-date")) { maxDate = Long.parseLong(args[++i]); continue; } if (args[i].equals("--max-bytes")) { size = Long.parseLong(args[++i]); continue; } System.err.println("ERROR: unknown arg " + args[i]); usage(); } if (baseDir == null || localPath == null || preservePath == null || sDateString == null) { usage(); } long minDate; if ("yesterday".equals(sDateString)) { // figure out yesterday's dates Calendar cal = Calendar.getInstance(); cal.roll(Calendar.DAY_OF_YEAR, -1); // yesterday midnight cal.set(Calendar.HOUR_OF_DAY, 0); cal.set(Calendar.MINUTE, 0); cal.set(Calendar.SECOND, 0); cal.set(Calendar.MILLISECOND, 0); minDate = cal.getTimeInMillis() / 1000; // yesterday end of day cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); cal.set(Calendar.SECOND, 59); cal.set(Calendar.MILLISECOND, 999); maxDate = cal.getTimeInMillis() / 1000; } else if ("last-week".equals(sDateString)) { minDate = maxDate - (7 * 24 * 60 * 60); } else if ("last-day".equals(sDateString)) { minDate = maxDate - (24 * 60 * 60); } else { // UNIX date since epoch of last backup minDate = Long.parseLong(sDateString); } long tmpDate = 0; BackupHdfs bak = new BackupHdfs(); // initialize the list of tables to ignore if (sIgnoreTablesFilename != null) { bak.initializeTablesToIgnore(sIgnoreTablesFilename); } // initialize list of files to not preserve if (sNoPreserveFilename != null) { bak.initializeNoPreserve(sNoPreserveFilename); } ArrayList<Path> pathList = new ArrayList<Path>(2000); HashMap<Path, Long> hmTimestamps = new HashMap<Path, Long>(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // If the HDFS path is a dir continue if (fs.getFileStatus(baseDir).isDir()) { Calendar cal = Calendar.getInstance(); System.err.println(""); cal.setTimeInMillis(minDate * 1000); System.err.println("min date = " + cal.getTime().toString()); cal.setTimeInMillis(maxDate * 1000); System.err.println("max date = " + cal.getTime().toString()); System.err.println(""); System.err.println("Searching filesystem: " + baseDir.toUri().getPath()); bak.checkDir(fs, minDate, maxDate, baseDir, pathList, hmTimestamps); System.err.println(""); System.err.println("Skipped " + m_nIgnoredTables + " files due to ignored tables"); System.err.println(""); System.err.println("Number of files to backup = " + pathList.size()); System.err.println("Total bytes to backup = " + prettyPrintBytes(m_nTotalBytes)); System.err.println(""); System.err.println("sorting list of files..."); Collections.sort(pathList, new DateComparator(hmTimestamps)); System.err.println("done"); System.err.println(""); System.err.println("starting backup..."); tmpDate = bak.backupFiles(localPath, preservePath, fs, pathList, size); bak.closeFiles(); System.err.println(""); System.err.println("backup completed..."); } if (tmpDate == 0) { // If not size limit reached print out date for right now System.out.println(maxDate); } else { // Print out date for last file backed up System.err.println("Size limit reached."); System.out.println(tmpDate); } System.exit(0); }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
/** * Method to move files from HDFS to local filesystem * * localPath: Path on the machines filesystem * fs:FileSystem object from HDFS//w w w. j a v a2s .c o m * pathList:List of paths for files that might need to be backed * up * size:max size in bytes to be backed up * * ReturnsDate of the last files backed up if reached size limit, * else, zero **/ public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList, long size) { Path fsPath; long tmpSize = 0; long tmpDate = 0; // Start iterating over all paths for (Path hdfsPath : pathList) { try { long nFileSize = fs.getContentSummary(hdfsPath).getLength(); tmpSize = tmpSize + nFileSize; if ((tmpSize <= size) || (size == 0)) { FileStatus stat = fs.getFileStatus(hdfsPath); System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, " + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", " + stat.getPermission().toString()); tmpDate = stat.getModificationTime() / 1000; String sFsPath = localPath + hdfsPath.toUri().getPath(); fsPath = new Path(sFsPath); File f = new File(sFsPath); // COMMENTED OUT: until a few backup cycles run // and the mtime gets in fact set on all copied // files. // // ignore it if the file exists and has the same mtime // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime()) // { // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs"); // continue; // } if (false == m_bDryRun) { // check if we need to back up the local file // (not directory), if it already exists. if (f.exists() && f.isFile()) { // ignore files with substrings in the // no-preserve file if (true == doPreserveFile(sFsPath)) { // move it to the backup path String sNewPath = preservePath + hdfsPath.toUri().getPath(); File newFile = new File(sNewPath); // create directory structure for new file? if (false == newFile.getParentFile().exists()) { if (false == newFile.getParentFile().mkdirs()) { System.err .println("Failed to mkdirs " + newFile.getParentFile().toString()); System.exit(1); } } // rename existing file to new location if (false == f.renameTo(newFile)) { System.err.println( "Failed to renameTo " + f.toString() + " to " + newFile.toString()); System.exit(1); } System.out.println("preserved " + f.toString() + " into " + newFile.toString()); } else { System.out.println("skipped preservation of " + f.toString()); } } // copy from hdfs to local filesystem fs.copyToLocalFile(hdfsPath, fsPath); // set the mtime to match hdfs file f.setLastModified(stat.getModificationTime()); // compare checksums on both files compareChecksums(fs, hdfsPath, sFsPath); } // don't print the progress after every file -- go // by at least 1% increments long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes); if (nPercentDone > m_nLastPercentBytesDone) { System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone + "% done" + ", tstamp=" + tmpDate); m_nLastPercentBytesDone = nPercentDone; } if (m_nSleepSeconds > 0) { try { Thread.sleep(1000 * m_nSleepSeconds); } catch (Exception e2) { // ignore } } } else { return tmpDate; } } catch (IOException e) { System.err.println("FATAL ERROR: Something wrong with the file"); System.err.println(e); System.out.println(tmpDate); System.exit(1); return 0; } } return 0; }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
/** * Method to go though the HDFS filesystem in a DFS to find all * files//w ww.j av a2 s.c om * * fs:FileSystem object from HDFS * minDate: Oldest date for files to be backed up * maxDate:Newest date for files to be backed up * p:Path in HDFS to look for files * pathList:Will be filled with all files in p * hmTimestamps: hashmap of timestamps for later sorting **/ public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList, HashMap<Path, Long> hmTimestamps) { long tmpDate; FileStatus[] fStat; try { String sPath = p.toUri().getPath(); // If this is a directory if (fs.getFileStatus(p).isDir()) { // ignore certain directories if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName()) || sPath.startsWith("/mapred") || "ops".equals(p.getName()) || p.getName().startsWith("_distcp_logs")) { return; } // dump the mkdir and chmod commands for this // directory -- skip root directory only { FileStatus stat = fs.getFileStatus(p); if (!sPath.equals("/")) { m_wrMkdirs.println("hadoop fs -mkdir " + sPath); } m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath); Short sh = new Short(stat.getPermission().toShort()); m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath); } fStat = fs.listStatus(p); // Do a recursive call to all elements for (int i = 0; i < fStat.length; i++) { checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps); } } else { // If not a directory then we've found a file // ignore crc files if (p.getName().endsWith(".crc")) { return; } // ignore other files if (sPath.startsWith("/user/oozie/etl/workflows/")) { return; } // try to get the table name from the path. There are // various types of tables, from those replicated from // another database to regular hive tables to // partitioned hive tables. We use table names to // both exclude some from the backup, and for the rest // to dump out the schema and partition name. if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) { m_nIgnoredTables++; if (m_nIgnoredTables < 5) { System.out.println("Skipping ignore-table file: " + sPath); } else if (m_nIgnoredTables == 5) { System.out.println("(...not showing other skipped tables...)"); } return; } FileStatus stat = fs.getFileStatus(p); tmpDate = stat.getModificationTime() / 1000; // store the chmods/chowns for all files m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath); m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath); // check dates. is it too young? if (tmpDate < minDate) { return; } // is the file too recent? if (tmpDate > maxDate) { //System.out.println("file too recent: " + sPath); return; } // file timestamp is ok pathList.add(p); hmTimestamps.put(p, new Long(tmpDate)); // store info about total bytes neeed to backup m_nTotalBytes += fs.getContentSummary(p).getLength(); } } catch (IOException e) { System.err.println("ERROR: could not open " + p + ": " + e); // System.exit(1) ; } }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
/** Compare the checksums of the hdfs file as well as the local * copied file.//w w w. j a v a2s . c o m * * @author tpalka@tripadvisor.com * @date Fri Jan 27 06:06:00 2012 */ boolean compareChecksums(FileSystem fs, Path p, String sFsPath) { try { // get hdfs file info FileStatus stat = fs.getFileStatus(p); // get HDFS checksum FileChecksum ck = fs.getFileChecksum(p); String sCk, sCkShort; if (ck == null) { sCk = sCkShort = "<null>"; } else { sCk = ck.toString(); sCkShort = sCk.replaceAll("^.*:", ""); } // System.out.println(p.toUri().getPath() + " len=" + stat.getLen() // + " " + stat.getOwner() + "/" + stat.getGroup() // + " checksum=" + sCk); // find the local file File fLocal = new File(sFsPath); if (!fLocal.exists()) { System.out.println("CHECKSUM-ERROR: file does not exist: " + sFsPath); return false; } if (!fLocal.isFile()) { System.out.println("CHECKSUM-ERROR: path is not a file: " + sFsPath); return false; } if (stat.getLen() != fLocal.length()) { System.out.println("CHECKSUM-ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen() + " fslen=" + fLocal.length()); return false; } // get local fs checksum FileChecksum ckLocal = getLocalFileChecksum(sFsPath); if (ckLocal == null) { System.out.println("ERROR Failed to get checksum for local file " + sFsPath); return false; } // compare checksums as a string, after stripping the // algorithm name from the beginning String sCkLocal = ckLocal.toString(); String sCkLocalShort = sCkLocal.replaceAll("^.*:", ""); if (false == sCkShort.equals(sCkLocalShort)) { System.out.println("CHECKSUM-ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal); return false; } return true; } catch (IOException e) { System.out.println("CHECKSUM-ERROR: " + sFsPath + " exception " + e.toString()); } return false; }
From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java
License:Apache License
public static void main(String[] args) throws IOException { Path baseDir = null;/*from ww w . ja v a 2 s . co m*/ String sLocalPathRoot = null; String sIgnoreTablesFilename = null; String sMaxDateString = null; String sFromFilename = null; for (int i = 0; i < args.length; i++) { if (args[i].equals("--hdfs-path")) { baseDir = new Path(args[++i]); continue; } if (args[i].equals("--local-path")) { sLocalPathRoot = args[++i]; continue; } if (args[i].equals("--ignore-tables")) { sIgnoreTablesFilename = args[++i]; continue; } if (args[i].equals("--max-date")) { sMaxDateString = args[++i]; continue; } if (args[i].equals("--from-file")) { sFromFilename = args[++i]; continue; } System.err.println("ERROR: unknown arg " + args[i]); usage(); } if (baseDir == null || sLocalPathRoot == null) { usage(); } // UNIX date for right now long maxDate = new java.util.Date().getTime() / 1000; if (sMaxDateString != null) { // UNIX date since epoch of last backup maxDate = Long.parseLong(sMaxDateString); } VerifyHdfsBackup bak = new VerifyHdfsBackup(); // initialize the list of tables to ignore if (sIgnoreTablesFilename != null) { bak.initializeTablesToIgnore(sIgnoreTablesFilename); } Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); if (sFromFilename != null) { BufferedReader in = null; try { in = new BufferedReader(new FileReader(sFromFilename)); String sFile; while ((sFile = in.readLine()) != null) { bak.checkDir(fs, new Path(sFile), sLocalPathRoot, maxDate); } } catch (Exception e) { System.out.println("ERROR: Failed to read from-file " + sFromFilename + ": " + e); } finally { try { in.close(); } catch (Exception e2) { } } } else { // If the HDFS path is a dir continue if (fs.getFileStatus(baseDir).isDir()) { System.out.println("Searching filesystem: " + baseDir.toUri().getPath()); bak.checkDir(fs, baseDir, sLocalPathRoot, maxDate); } } System.exit(0); }
From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java
License:Apache License
/** * Method to go though the HDFS filesystem in a DFS to find all * files/*from ww w.j av a2s. co m*/ * * fs:FileSystem object from HDFS * maxDate:Newest date for files to be backed up * p:Path in HDFS to look for files **/ public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) { FileStatus[] fStat; try { String sPath = p.toUri().getPath(); // If this is a directory if (fs.getFileStatus(p).isDir()) { // ignore certain directories if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName()) || sPath.startsWith("/mapred") || "ops".equals(p.getName()) || p.getName().startsWith("_distcp_logs")) { return; } fStat = fs.listStatus(p); // Do a recursive call to all elements for (int i = 0; i < fStat.length; i++) { checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate); } } else { // If not a directory then we've found a file // ignore crc files if (p.getName().endsWith(".crc")) { return; } // ignore other files if (sPath.startsWith("/user/oozie/etl/workflows/")) { return; } // try to get the table name from the path. There are // various types of tables, from those replicated from // tripmonster to regular hive tables to partitioned // hive tables. We use table names to both exclude // some from the backup, and for the rest to dump out // the schema and partition name. if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) { return; } // check the file FileStatus stat = fs.getFileStatus(p); // ignore files that are too new if ((stat.getModificationTime() / 1000) > maxDate) { System.out.println("IGNORING: " + sPath + " too new"); return; } // warn about files that have a mis-matching block // size. The checksum check will fail for them // anyways, so just catch it here. if (stat.getBlockSize() != N_BLOCK_SIZE) { System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024)) + "M) would fail checksum: " + sPath); return; } // get HDFS checksum FileChecksum ck = fs.getFileChecksum(p); String sCk, sCkShort; if (ck == null) { sCk = sCkShort = "<null>"; } else { sCk = ck.toString(); sCkShort = sCk.replaceAll("^.*:", ""); } System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup() + " checksum=" + sCk); // find the local file String sFsPath = sLocalPathRoot + p.toUri().getPath(); File fLocal = new File(sFsPath); if (!fLocal.exists()) { Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(stat.getModificationTime()); System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime=" + cal.getTime().toString()); return; } if (!fLocal.isFile()) { System.out.println("ERROR: path is not a file: " + sFsPath); return; } if (stat.getLen() != fLocal.length()) { System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen() + " fslen=" + fLocal.length()); return; } // get local fs checksum FileChecksum ckLocal = getLocalFileChecksum(sFsPath); if (ckLocal == null) { System.out.println("ERROR Failed to get checksum for local file " + sFsPath); return; } // compare checksums as a string, to strip the // algorithm name from the beginning String sCkLocal = ckLocal.toString(); String sCkLocalShort = sCkLocal.replaceAll("^.*:", ""); if (false == sCkShort.equals(sCkLocalShort)) { System.out.println( "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal); return; } } } catch (IOException e) { System.out.println("ERROR: could not open " + p + ": " + e); // System.exit(1) ; } }
From source file:com.twitter.elephanttwin.util.HdfsFsWalker.java
License:Apache License
/** * Walk recursively (depth-first) through the file system beneath path, calling the * passed in function on each path./*w w w . j ava 2s.c o m*/ * * @param path the path at which to start walking * @param evalFunc a functional representing the desired action to take on each path * @throws IOException */ public void walk(Path path, Functional.F2<Boolean, FileStatus, FileSystem> evalFunc) throws IOException { FileSystem fs = FileSystem.get(URI.create(path.toString()), conf); if (fs.exists(path) && (pathFilter == null || pathFilter.accept(path))) { walkInternal(fs.getFileStatus(path), fs, evalFunc, 0); } else { LOG.info("Refusing to walk. fs.exists? " + fs.exists(path) + "pathFilter accepts? " + pathFilter.accept(path)); } }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory, final FileSystem hdfs) throws IOException { FileStatus[] fileStat;/*from w w w. ja v a 2s . com*/ Path basePath = new Path(baseDirectory); if (!hdfs.exists(basePath)) { throw new IOException( hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories"); } FileStatus status = hdfs.getFileStatus(basePath); if (!status.isDir()) { LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file"); return Lists.newArrayList(status.getPath()); } // get the stat on all files in the source directory fileStat = hdfs.listStatus(basePath); if (fileStat == null) { throw new IOException( "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories"); } // get paths to the files in the source directory return Arrays.asList(FileUtil.stat2Paths(fileStat)); }