Java tutorial
/* * Licensed to the University of California, Berkeley under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package tachyon.hadoop; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import tachyon.Constants; import tachyon.PrefixList; import tachyon.TachyonURI; import tachyon.client.TachyonFS; import tachyon.client.TachyonFile; import tachyon.client.UfsUtils; import tachyon.client.WriteType; import tachyon.conf.TachyonConf; import tachyon.thrift.DependencyInfo; import tachyon.thrift.FileBlockInfo; import tachyon.thrift.FileInfo; import tachyon.thrift.NetAddress; import tachyon.util.CommonUtils; /** * Base class for Apache Hadoop based Tachyon {@link FileSystem}. This class really just delegates * to {@link tachyon.client.TachyonFS} for most operations. * * All implementing classes must define {@link #isZookeeperMode()} which states if fault tolerant is * used and {@link #getScheme()} for Hadoop's {@link java.util.ServiceLoader} support. */ abstract class AbstractTFS extends FileSystem { public static final String FIRST_COM_PATH = "tachyon_dep/"; public static final String RECOMPUTE_PATH = "tachyon_recompute/"; private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE); private String mUnderFSAddress; private URI mUri = null; private Path mWorkingDir = new Path(TachyonURI.SEPARATOR); private Statistics mStatistics = null; private TachyonFS mTFS = null; private String mTachyonHeader = null; private final TachyonConf mTachyonConf = new TachyonConf(); @Override public FSDataOutputStream append(Path cPath, int bufferSize, Progressable progress) throws IOException { LOG.info("append(" + cPath + ", " + bufferSize + ", " + progress + ")"); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); fromHdfsToTachyon(path); long fileId = mTFS.getFileId(path); TachyonFile file = mTFS.getFile(fileId); if (file.length() > 0) { LOG.warn("This maybe an error."); } WriteType type = getWriteType(); return new FSDataOutputStream(file.getOutStream(type), mStatistics); } @Override public void close() throws IOException { try { super.close(); } finally { if (mTFS != null) { mTFS.close(); } } } /** * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder. * * @param cPath path to create * @param permission permissions of the created file/folder * @param overwrite overwrite if file exists * @param bufferSize the size in bytes of the buffer to be used * @param replication under filesystem replication factor * @param blockSize block size in bytes * @param progress queryable progress * @return an FSDataOutputStream created at the indicated path of a file * @throws IOException if overwrite is not specified and the path already exists or if the path is * a folder */ @Override public FSDataOutputStream create(Path cPath, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { LOG.info("create(" + cPath + ", " + permission + ", " + overwrite + ", " + bufferSize + ", " + replication + ", " + blockSize + ", " + progress + ")"); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } boolean asyncEnabled = mTachyonConf.getBoolean(Constants.ASYNC_ENABLED); if (!asyncEnabled) { TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); if (mTFS.exist(path)) { if (overwrite && !mTFS.getFileStatus(-1, path).isFolder) { if (!mTFS.delete(path, false)) { throw new IOException("Failed to delete existing data " + cPath); } } else { throw new IOException(cPath.toString() + " already exists. Directories cannot be " + "overwritten with create."); } } long fileId = mTFS.createFile(path, blockSize); TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); WriteType type = getWriteType(); return new FSDataOutputStream(file.getOutStream(type), mStatistics); } if (cPath.toString().contains(FIRST_COM_PATH) && !cPath.toString().contains("SUCCESS")) { TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); mTFS.createFile(path, blockSize); String depPath = path.getPath(); depPath = depPath.substring(depPath.indexOf(FIRST_COM_PATH) + FIRST_COM_PATH.length()); depPath = depPath.substring(0, depPath.indexOf(TachyonURI.SEPARATOR)); int depId = Integer.parseInt(depPath); LOG.info("create(" + cPath + ") : " + depPath + " " + depId); depPath = path.getPath(); depPath = depPath.substring(depPath.indexOf("part-") + 5); int index = Integer.parseInt(depPath); DependencyInfo info = mTFS.getClientDependencyInfo(depId); long fileId = info.getChildren().get(index).intValue(); LOG.info("create(" + cPath + ") : " + depPath + " " + index + " " + info + " " + fileId); TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); return new FSDataOutputStream(file.getOutStream(WriteType.ASYNC_THROUGH), mStatistics); } if (cPath.toString().contains(RECOMPUTE_PATH) && !cPath.toString().contains("SUCCESS")) { TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); mTFS.createFile(path, blockSize); String depPath = path.getPath(); depPath = depPath.substring(depPath.indexOf(RECOMPUTE_PATH) + RECOMPUTE_PATH.length()); depPath = depPath.substring(0, depPath.indexOf(TachyonURI.SEPARATOR)); int depId = Integer.parseInt(depPath); LOG.info("create(" + cPath + ") : " + depPath + " " + depId); depPath = path.getPath(); depPath = depPath.substring(depPath.indexOf("part-") + 5); int index = Integer.parseInt(depPath); DependencyInfo info = mTFS.getClientDependencyInfo(depId); long fileId = info.getChildren().get(index).intValue(); LOG.info("create(" + cPath + ") : " + depPath + " " + index + " " + info + " " + fileId); TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); return new FSDataOutputStream(file.getOutStream(WriteType.ASYNC_THROUGH), mStatistics); } TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); long fileId; WriteType type = getWriteType(); if (mTFS.exist(path)) { fileId = mTFS.getFileId(path); type = WriteType.MUST_CACHE; } else { fileId = mTFS.createFile(path, blockSize); } TachyonFile file = mTFS.getFile(fileId); file.setUFSConf(getConf()); return new FSDataOutputStream(file.getOutStream(type), mStatistics); } /** * Opens an FSDataOutputStream at the indicated Path with write-progress reporting. Same as * create(), except fails if parent directory doesn't already exist. * * TODO(hy): We need to refactor this method after having a new internal API support (TACHYON-46). * * @param cPath the file name to open * @param overwrite if a file with this name already exists, then if true, the file will be * overwritten, and if false an error will be thrown. * @param bufferSize the size of the buffer to be used. * @param replication required block replication for the file. * @param blockSize the size in bytes of the buffer to be used. * @param progress queryable progress * @throws IOException if 1) overwrite is not specified and the path already exists, 2) if the * path is a folder, or 3) the parent directory does not exist * @see #setPermission(Path, FsPermission) * @deprecated API only for 0.20-append */ @Override @Deprecated public FSDataOutputStream createNonRecursive(Path cPath, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath.getParent())); fromHdfsToTachyon(path); if (!mTFS.exist(path)) { throw new FileNotFoundException("Parent directory does not exist!"); } return this.create(cPath, permission, overwrite, bufferSize, replication, blockSize, progress); } @Override @Deprecated public boolean delete(Path path) throws IOException { return delete(path, true); } /** * Attempts to delete the file or directory with the specified path. * * @param cPath path to delete * @param recursive if true, will attempt to delete all children of the path * @return true if one or more files/directories were deleted; false otherwise * @throws IOException if the path failed to be deleted due to some constraint (ie. non empty * directory with recursive flag disabled) */ @Override public boolean delete(Path cPath, boolean recursive) throws IOException { LOG.info("delete(" + cPath + ", " + recursive + ")"); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); fromHdfsToTachyon(path); if (!mTFS.exist(path)) { return false; } boolean rtn = mTFS.delete(path, recursive); if (mTFS.exist(path)) { throw new IOException("Failed to delete path " + path.toString()); } return rtn; } private void fromHdfsToTachyon(TachyonURI path) throws IOException { if (!mTFS.exist(path)) { Path hdfsPath = Utils.getHDFSPath(path, mUnderFSAddress); Configuration conf = new Configuration(getConf()); if (conf.get("fs.defaultFS") == null) { conf.set("fs.defaultFS", mUnderFSAddress); } FileSystem fs = hdfsPath.getFileSystem(conf); if (fs.exists(hdfsPath)) { TachyonURI ufsUri = new TachyonURI(mUnderFSAddress); TachyonURI ufsAddrPath = new TachyonURI(ufsUri.getScheme(), ufsUri.getAuthority(), path.getPath()); // Set the path as the TFS root path. UfsUtils.loadUnderFs(mTFS, path, ufsAddrPath, new PrefixList(null), mTachyonConf); } } } @Override public long getDefaultBlockSize() { return mTachyonConf.getBytes(Constants.USER_DEFAULT_BLOCK_SIZE_BYTE); } @Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { if (file == null) { return null; } if (mStatistics != null) { mStatistics.incrementReadOps(1); } TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(file.getPath())); fromHdfsToTachyon(path); long fileId = mTFS.getFileId(path); if (fileId == -1) { throw new FileNotFoundException("File does not exist: " + file.getPath()); } List<BlockLocation> blockLocations = new ArrayList<BlockLocation>(); List<FileBlockInfo> blocks = mTFS.getFileBlocks(fileId); for (int k = 0; k < blocks.size(); k++) { FileBlockInfo info = blocks.get(k); long offset = info.getOffset(); long end = offset + info.blockInfo.getLength(); // Check if there is any overlapping between [start, start+len] and [offset, end] if (end >= start && offset <= start + len) { ArrayList<String> names = new ArrayList<String>(); ArrayList<String> hosts = new ArrayList<String>(); List<NetAddress> addrs = Lists.newArrayList(); // add the existing in-memory block locations first for (tachyon.thrift.BlockLocation location : info.getBlockInfo().getLocations()) { addrs.add(location.getWorkerAddress()); } // then add under file system location addrs.addAll(info.getUfsLocations()); for (NetAddress addr : addrs) { // Name format is "hostname:data transfer port" String name = addr.host + ":" + addr.dataPort; LOG.debug("getFileBlockLocations : adding name : '" + name + ""); names.add(name); hosts.add(addr.host); } blockLocations.add(new BlockLocation(CommonUtils.toStringArray(names), CommonUtils.toStringArray(hosts), offset, info.blockInfo.getLength())); } } BlockLocation[] ret = new BlockLocation[blockLocations.size()]; blockLocations.toArray(ret); return ret; } /** * {@inheritDoc} * * If the file does not exist in Tachyon, query it from HDFS. */ @Override public FileStatus getFileStatus(Path path) throws IOException { TachyonURI tPath = new TachyonURI(Utils.getPathWithoutScheme(path)); Path hdfsPath = Utils.getHDFSPath(tPath, mUnderFSAddress); LOG.info("getFileStatus(" + path + "): HDFS Path: " + hdfsPath + " TPath: " + mTachyonHeader + tPath); if (mStatistics != null) { mStatistics.incrementReadOps(1); } if (useHdfs()) { fromHdfsToTachyon(tPath); } TachyonFile file; try { file = mTFS.getFile(tPath); } catch (IOException ioe) { LOG.info("File does not exist: " + path); throw new FileNotFoundException("File does not exist: " + path); } FileStatus ret = new FileStatus(file.length(), file.isDirectory(), file.getDiskReplication(), file.getBlockSizeByte(), file.getCreationTimeMs(), file.getCreationTimeMs(), null, null, null, new Path(mTachyonHeader + tPath)); return ret; } /** * Gets the URI schema that maps to the FileSystem. This was introduced in Hadoop 2.x as a means * to make loading new FileSystems simpler. This doesn't exist in Hadoop 1.x, so cannot put * @Override. * * @return schema hadoop should map to. * * @see org.apache.hadoop.fs.FileSystem#createFileSystem(java.net.URI, * org.apache.hadoop.conf.Configuration) */ public abstract String getScheme(); /** * Returns an object implementing the Tachyon-specific client API. * * @return null if initialize() hasn't been called. */ public TachyonFS getTachyonFS() { return mTFS; } @Override public URI getUri() { return mUri; } @Override public Path getWorkingDirectory() { LOG.info("getWorkingDirectory: " + mWorkingDir); return mWorkingDir; } /** * {@inheritDoc} * * Sets up a lazy connection to Tachyon through mTFS. */ @Override public void initialize(URI uri, Configuration conf) throws IOException { super.initialize(uri, conf); LOG.info("initialize(" + uri + ", " + conf + "). Connecting to Tachyon: " + uri.toString()); Utils.addS3Credentials(conf); setConf(conf); mTachyonHeader = getScheme() + "://" + uri.getHost() + ":" + uri.getPort(); // Set the statistics member. Use mStatistics instead of the parent class's variable. mStatistics = statistics; // Load TachyonConf if any and merge to the one in TachyonFS TachyonConf siteConf = ConfUtils.loadFromHadoopConfiguration(conf); if (siteConf != null) { mTachyonConf.merge(siteConf); } mTachyonConf.set(Constants.MASTER_HOSTNAME, uri.getHost()); mTachyonConf.set(Constants.MASTER_PORT, Integer.toString(uri.getPort())); mTachyonConf.set(Constants.USE_ZOOKEEPER, Boolean.toString(isZookeeperMode())); mTFS = TachyonFS.get(mTachyonConf); mUri = URI.create(mTachyonHeader); mUnderFSAddress = mTFS.getUfsAddress(); LOG.info(mTachyonHeader + " " + mUri + " " + mUnderFSAddress); } /** * Determines if zookeeper should be used for the FileSystem. This method should only be used for * {@link #initialize(java.net.URI, org.apache.hadoop.conf.Configuration)}. * * @return true if zookeeper should be used */ protected abstract boolean isZookeeperMode(); @Override public FileStatus[] listStatus(Path path) throws IOException { TachyonURI tPath = new TachyonURI(Utils.getPathWithoutScheme(path)); Path hdfsPath = Utils.getHDFSPath(tPath, mUnderFSAddress); LOG.info("listStatus(" + path + "): HDFS Path: " + hdfsPath); if (mStatistics != null) { mStatistics.incrementReadOps(1); } fromHdfsToTachyon(tPath); if (!mTFS.exist(tPath)) { throw new FileNotFoundException("File does not exist: " + path); } List<FileInfo> files = mTFS.listStatus(tPath); FileStatus[] ret = new FileStatus[files.size()]; for (int k = 0; k < files.size(); k++) { FileInfo info = files.get(k); // TODO(hy): Replicate 3 with the number of disk replications. ret[k] = new FileStatus(info.getLength(), info.isFolder, 3, info.getBlockSizeBytes(), info.getCreationTimeMs(), info.getCreationTimeMs(), null, null, null, new Path(mTachyonHeader + info.getPath())); } return ret; } /** * Attempts to create a folder with the specified path. Parent directories will be created. Mkdirs * will fail if the path already exists or a parent is a file. * * @param cPath path to create * @param permission permissions to grant the created folder * @return true if the indicated folder is created successfully, false otherwise * @throws IOException if the folder cannot be created (e.g., it already exists) */ @Override public boolean mkdirs(Path cPath, FsPermission permission) throws IOException { LOG.info("mkdirs(" + cPath + ", " + permission + ")"); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); return mTFS.mkdir(path); } /** * Attempts to open the specified file for reading. * * @param cPath the file name to open * @param bufferSize the size in bytes of the buffer to be used * @return an FSDataInputStream at the indicated path of a file * @throws IOException if the file cannot be opened (e.g., the path is a folder) */ @Override public FSDataInputStream open(Path cPath, int bufferSize) throws IOException { LOG.info("open(" + cPath + ", " + bufferSize + ")"); if (mStatistics != null) { mStatistics.incrementReadOps(1); } TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath)); fromHdfsToTachyon(path); long fileId = mTFS.getFileId(path); return new FSDataInputStream(new HdfsFileInputStream(mTFS, fileId, Utils.getHDFSPath(path, mUnderFSAddress), getConf(), bufferSize, mStatistics, mTachyonConf)); } @Override public boolean rename(Path src, Path dst) throws IOException { LOG.info("rename(" + src + ", " + dst + ")"); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } TachyonURI srcPath = new TachyonURI(Utils.getPathWithoutScheme(src)); TachyonURI dstPath = new TachyonURI(Utils.getPathWithoutScheme(dst)); FileInfo info; try { info = mTFS.getFileStatus(-1, dstPath); } catch (IOException ioe) { info = null; } // If the destination is an existing folder, try to move the src into the folder if (info != null && info.isFolder) { dstPath = dstPath.join(srcPath.getName()); } fromHdfsToTachyon(srcPath); try { return mTFS.rename(srcPath, dstPath); } catch (IOException ioe) { LOG.error("Failed to rename {} to {}", src, dst, ioe); return false; } } @Override public void setWorkingDirectory(Path path) { LOG.info("setWorkingDirectory(" + path + ")"); if (path.isAbsolute()) { mWorkingDir = path; } else { mWorkingDir = new Path(mWorkingDir, path); } } /** * When underfs has a schema, then we can use the hdfs underfs code base. * <p> * When this check is not done, {@link #fromHdfsToTachyon(TachyonURI)} is called, which loads the * default filesystem (hadoop's). When there is no schema, then it may default to Tachyon which * causes a recursive loop. * * @see <a href="https://tachyon.atlassian.net/browse/TACHYON-54">TACHYON-54</a> */ @Deprecated private boolean useHdfs() { return mUnderFSAddress != null && URI.create(mUnderFSAddress).getScheme() != null; } private WriteType getWriteType() { return mTachyonConf.getEnum(Constants.USER_DEFAULT_WRITE_TYPE, WriteType.class); } }