Java tutorial
/* * Copyright (c) 2009 - 2010. School of Information Technology and Electrical * Engineering, The University of Queensland. This software is being developed * for the "Phenomics Ontoogy Driven Data Management Project (PODD)" project. * PODD is a National e-Research Architecture Taskforce (NeAT) project * co-funded by ANDS and ARCS. * * PODD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PODD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with PODD. If not, see <http://www.gnu.org/licenses/>. */ package podd.resources.util; import static podd.util.stream.StreamUtility.createTempFile; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLEncoder; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import net.schmizz.sshj.SSHClient; import net.schmizz.sshj.sftp.RemoteFile; import net.schmizz.sshj.sftp.SFTPClient; import net.schmizz.sshj.sftp.StatefulSFTPClient; import net.schmizz.sshj.xfer.scp.SCPDownloadClient; import net.schmizz.sshj.xfer.scp.SCPUploadClient; import org.apache.commons.fileupload.FileItem; import org.apache.commons.pool.BaseKeyedObjectPool; import org.apache.commons.pool.KeyedObjectPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import podd.dataaccess.DatastoreRegistryDAO; import podd.exception.DataAccessException; import podd.exception.EntityException; import podd.exception.MimeTypeHandlingException; import podd.exception.RawDataHandlingException; import podd.exception.VersioningException; import podd.model.data.DataItem; import podd.model.data.versioning.Version; import podd.model.data.versioning.impl.DataItemFactory; import podd.model.datastore.Datastore; import podd.model.entity.PoddObject; import fedora.server.types.gen.DatastreamControlGroup; /** * * * The URL format used to encode how the files are to be retrieved are as follows: * * http://userName@host:port/defaultDirectory/path/file#reference * * If the URL has a fragment (#reference) with the value reference, then it is treated as a file reference and is not directly maintained by PODD. * * * @author Faith Davies * @author Philip Wu * @version $Id$ */ public class RemoteFileHelper { private static final Logger LOGGER = LoggerFactory.getLogger(RemoteFileHelper.class); private static final String URL_ENCODING = "UTF-8"; private static final String URL_FRAGMENT_REFERENCE = "reference"; private static String tempDir = System.getProperty("java.io.tmpdir"); private DatastoreRegistryDAO datastoreRegistryDao; private DataItemFactory dataItemFactory; /** * Pool for SSH Clients for reusing connections. * The pool is meant to prevent connection blocking that occurs when too many connections are made over a short period of time. * See a quote from one of the data store managers below: * * All our public IP addresses run intrusion prevention. * If the rate of connections is too high, the source IP is * temporarily blocked. * Since this is implemented at the connection level, it * does not/can not exclude successful logins. */ private KeyedObjectPool sshClientPool; private Map<DataItem, File> fileItemMap; static { // Ensure the temporary directory has a trailling slash if (tempDir != null && !tempDir.endsWith("/")) { tempDir += "/"; } } public RemoteFileHelper() { } public void setDatastoreRegistryDao(DatastoreRegistryDAO datastoreRegistryDao) { this.datastoreRegistryDao = datastoreRegistryDao; } public void setDataItemFactory(DataItemFactory dataItemFactory) { this.dataItemFactory = dataItemFactory; } public void setSshClientPool(BaseKeyedObjectPool sshClientPool) { this.sshClientPool = sshClientPool; } public PoddObject attachLocalFiles(PoddObject poddObject, Map<Integer, FileItem> files, Map<Integer, String> desc, String userName) throws IOException, DataAccessException, RawDataHandlingException, EntityException, MimeTypeHandlingException { fileItemMap = new HashMap<DataItem, File>(); for (Integer idx : files.keySet()) { final FileItem fileItem = files.get(idx); final String description = desc.get(idx); final File tempFile = createTempFile(fileItem.getInputStream()); DataItem dataItem = attachOneLocalFile(poddObject, userName, fileItem, description, tempFile); fileItemMap.put(dataItem, tempFile); } return poddObject; } public PoddObject attachRedirectFiles(Datastore datastore, PoddObject poddObject, Map<Integer, FileItem> files, Map<Integer, String> desc, String userName) throws IOException, DataAccessException, RawDataHandlingException, EntityException, MimeTypeHandlingException { fileItemMap = new HashMap<DataItem, File>(); for (Integer idx : files.keySet()) { final FileItem fileItem = files.get(idx); final String description = desc.get(idx); final File tempFile = createTempFile(fileItem.getInputStream()); DataItem dataItem = redirectOneFile(datastore, poddObject, userName, fileItem, description, tempFile); fileItemMap.put(dataItem, tempFile); } return poddObject; } /** * Attach referenced files to the podd object. Only applies to access methods using SFTP. * @param datastore * @param poddObject * @param path The path relative to the default directory of the datastore to where the file exists * @param filenames * @param desc * @param userName * @return * @throws IOException * @throws DataAccessException * @throws RawDataHandlingException * @throws EntityException * @throws MimeTypeHandlingException */ public PoddObject attachRedirectFilenames(Datastore datastore, PoddObject poddObject, String path, Map<Integer, String> filenames, Map<Integer, String> desc, String userName) throws IOException, DataAccessException, RawDataHandlingException, EntityException, MimeTypeHandlingException { LOGGER.info("attachRedirectFilenames"); SSHClient ssh = null; SFTPClient sftpClient = null; try { ssh = borrowSSHClient(datastore); //session = ssh.startSession(); sftpClient = ssh.newSFTPClient(); StatefulSFTPClient statefulSFTPClient = new StatefulSFTPClient(sftpClient.getSFTPEngine()); // Change to the directory that contains the files StringBuilder changeDirectory = new StringBuilder(); changeDirectory.append(datastore.getDefaultDirectory()); if (!datastore.getDefaultDirectory().endsWith("/")) { changeDirectory.append("/"); } if (path != null) { if (path.startsWith("/") && path.length() > 0) changeDirectory.append(path.substring(1)); else changeDirectory.append(path); } String changeDirectoryString = changeDirectory.toString(); LOGGER.info("changeDirectory=" + changeDirectoryString); statefulSFTPClient.cd(changeDirectoryString); // Link each file to the podd object for (Integer idx : filenames.keySet()) { final String filename = filenames.get(idx); final String description = desc.get(idx); if (datastore.getAccessMethod().equals("SFTP")) { LOGGER.info("Opening connection to filename: " + filename); // Open a connection to the file RemoteFile remoteFile = statefulSFTPClient.open(filename); referenceOneFile(datastore, poddObject, userName, description, remoteFile.getInputStream(), path, filename, remoteFile.length()); remoteFile.close(); } } statefulSFTPClient.close(); } finally { if (sftpClient != null) sftpClient.close(); /* if (null != ssh && ssh.isConnected()) { ssh.disconnect(); } */ this.returnSSHClient(datastore, ssh); } return poddObject; } public HashMap<String, String> uploadFiles(Datastore datastore, PoddObject poddObject) throws IOException { LOGGER.info("uploadFiles"); HashMap<String, String> transferErrorMap = new HashMap<String, String>(); SSHClient ssh = null; try { ssh = borrowSSHClient(datastore); final SFTPClient sftpClient = ssh.newSFTPClient(); // check if the default directory exists and if not create it final String defaultDir = datastore.getDefaultDirectory(); if (!defaultDir.equals("") && defaultDir.endsWith("/")) { LOGGER.info("truncating trailing / for " + defaultDir); // when we check if the directory exists we want to remove the trailing '/' sftpClient.mkdirs(defaultDir.substring(0, defaultDir.length() - 1)); } // transfer data items for (DataItem dataItem : fileItemMap.keySet()) { LOGGER.info("fileItem=" + fileItemMap.get(dataItem)); if (fileItemMap.get(dataItem) != null) { final String source = fileItemMap.get(dataItem).getAbsolutePath().toString(); final String filename = getDatastoreFilename(dataItem.getAtLatestVersion().getValue(), poddObject.getPid()); LOGGER.info("datastoreFilename=" + filename); if (datastore.getAccessMethod().equals("SFTP")) { // SFTP sftpClient.put(source, filename); } else { // SSH final SCPUploadClient client = ssh.newSCPFileTransfer().newSCPUploadClient(); if (client.copy(source, filename) != 0) { // save any error so that we can send them back to the user for (String msg : client.getWarnings()) { transferErrorMap.put(dataItem.getItemName(), msg); } } } } } } finally { /* if (null != ssh && ssh.isConnected()) { ssh.disconnect(); }*/ this.returnSSHClient(datastore, ssh); } return transferErrorMap; } /** * Most efficient access to downloading a file via an inputstream for access methods using SFTP. * Access methods using SCP, will not see any performance gains since the file must be downloaded in * its entirety before being passed on to the user. * @param url * @param pid * @return * @throws IOException * @throws DataAccessException */ public InputStream downloadFilestream(URL url, String pid) throws IOException, DataAccessException { LOGGER.info("downloadFilestream"); final Datastore datastore = checkValidDataStore(url); final String pathToFile = getPathToFile(url, pid); //getDatastoreFilename(url, pid); LOGGER.info("pathToFile=" + pathToFile); if (datastore.getAccessMethod().equals("SFTP")) { SSHClient ssh = null; try { ssh = borrowSSHClient(datastore); // SFTP final SFTPClient sftpClient = ssh.newSFTPClient(); RemoteFile remoteFile = sftpClient.open(pathToFile); // Leave the SSHClient connection open, until the InputStream has been closed SelfCleaningRemoteFileInputStreamWrapper wrapperStream = new SelfCleaningRemoteFileInputStreamWrapper( datastore, ssh, sftpClient, remoteFile.getInputStream()); return wrapperStream; } catch (Exception ex) { /* if (null != ssh && ssh.isConnected()) { ssh.disconnect(); }*/ ex.printStackTrace(); this.returnSSHClient(datastore, ssh); throw new DataAccessException(ex); } } else { // Since SCPDownloadClient provides not InputStream, we have to download the whole file first File tmpFile = downloadFile(url, pid); FileInputStream fis = new FileInputStream(tmpFile); return fis; } } public File downloadFile(URL url, String pid) throws IOException, DataAccessException { LOGGER.info("downloadFile"); final Datastore datastore = checkValidDataStore(url); final String pathToFile = getPathToFile(url, pid); //getDatastoreFilename(url, pid); LOGGER.info("pathToFile=" + pathToFile); SSHClient ssh = null; try { ssh = borrowSSHClient(datastore); String tempFilePath = tempDir + pathToFile; LOGGER.info("tempFilePath=" + tempFilePath); File tempFile = new File(tempFilePath); if (!tempFile.exists()) { // Create directories to file int slashIndex = pathToFile.lastIndexOf('/'); LOGGER.info("slashIndex=" + slashIndex); if (slashIndex > 0) { // Directories exist in the path String dirs = pathToFile.substring(0, slashIndex); String tempDirs = tempDir + dirs; LOGGER.info("dirs=" + tempDirs); File dirsFile = new File(tempDirs); boolean dirsMade = dirsFile.mkdirs(); LOGGER.info("dirsMade=" + dirsMade); } // Once the directories have been made, then create the file tempFile.createNewFile(); } if (datastore.getAccessMethod().equals("SFTP")) { // SFTP final SFTPClient sftpClient = ssh.newSFTPClient(); sftpClient.get(pathToFile, tempFile.getAbsolutePath()); //RemoteFile remoteFile = sftpClient.open(pathToFile); return tempFile; } else { // Since SCPDownloadClient provides not InputStream, we have to download the whole file first final SCPDownloadClient client = ssh.newSCPFileTransfer().newSCPDownloadClient(); if (client.copy(pathToFile, tempFile.getAbsolutePath()) != 0) { // save any error so that we can send them back to the user for (String msg : client.getWarnings()) { LOGGER.error("Error downloading file <" + pathToFile + ">: " + msg); } tempFile = null; } return tempFile; } } finally { /* if (null != ssh && ssh.isConnected()) { ssh.disconnect(); }*/ this.returnSSHClient(datastore, ssh); } } public Datastore checkValidDataStore(Datastore ds) throws DataAccessException { final String dataStoreAddress = ds.getLoginId() + "@" + ds.getIp() + ":" + ds.getPort(); Datastore datastore = datastoreRegistryDao.loadByIP(ds.getIp()); if (null == datastore || !ds.equals(datastore)) { throw new DataAccessException("Not a registered data store: " + dataStoreAddress); } return datastore; } public void rollbackAttachedFiles(PoddObject poddObject) { if (null != fileItemMap) { try { Set<DataItem> emptyItems = new HashSet<DataItem>(); for (DataItem data : fileItemMap.keySet()) { poddObject.removeDataItem(data.getItemName()); final DataItem dataItem = poddObject.getDataItems().get(data.getItemName()); try { if (null == dataItem) { continue; } final Version<URL> latestVersion = dataItem.getAtLatestVersion(); if (null == latestVersion) { emptyItems.add(dataItem); } else { dataItem.remove(latestVersion.getTimestamp()); } fileItemMap.get(data).delete(); LOGGER.info("Action: attached file removed: " + data.getItemName()); } catch (VersioningException e) { LOGGER.warn("Error removing attached file from data item: " + dataItem.getItemName() + ".", e); } } for (DataItem item : emptyItems) { poddObject.removeDataItem(item.getItemName()); } } finally { fileItemMap.clear(); } } } /** * Borrow or create a new SSH Client * @param datastore * @return * @throws IOException */ private SSHClient borrowSSHClient(Datastore datastore) throws IOException { LOGGER.info("borrowing ssh client"); try { SSHClient sshClient = (SSHClient) sshClientPool.borrowObject(datastore); return sshClient; } catch (NoSuchElementException e) { LOGGER.error("Found exception", e); e.printStackTrace(); throw new IOException(e); } catch (IllegalStateException e) { LOGGER.error("Found exception", e); e.printStackTrace(); throw new IOException(e); } catch (Exception e) { LOGGER.error("Found exception", e); e.printStackTrace(); throw new IOException(e); } /* final SSHClient ssh = new SSHClient(); ssh.loadKnownHosts(); // Accept all hosts ssh.addHostKeyVerifier( new HostKeyVerifier() { public boolean verify(String arg0, int arg1, PublicKey arg2) { return true; // don't bother verifying } }); ssh.connect(datastore.getIp(), datastore.getPort()); ssh.authPublickey(datastore.getLoginId()); //ssh.authPublickey(datastore.getLoginId(), privateKey.getAbsolutePath()); return ssh;*/ } /** * Return the borrowed SSH Client * @param datastore * @param sshClient */ private void returnSSHClient(Datastore datastore, SSHClient sshClient) { LOGGER.info("returning ssh client"); if (sshClient != null && datastore != null) { try { sshClientPool.returnObject(datastore, sshClient); } catch (Exception e) { LOGGER.error("Found exception", e); e.printStackTrace(); } } } private Datastore checkValidDataStore(URL url) throws DataAccessException { return checkValidDataStore(new Datastore(url.getHost(), url.getPort(), url.getUserInfo(), "")); } /** * Translate the url encoded reference to how the filename is actually labelled on the datastore * @param url * @param pid * @return */ private String getDatastoreFilename(URL url, String pid) { String filename = url.getFile(); LOGGER.info("url=" + url); LOGGER.info("url.getFile=" + filename); filename = filename.replace("/" + pid + "/", "/" + pid + "."); // windows doesn't allow ':' in the filename filename = filename.replaceAll(":", "."); return "." + filename; } /** * For reference urls, use the same path as given in the url with any translation * @param url * @return */ private String getDatastoreReferenceFilename(URL url) { String path = url.getPath(); LOGGER.info("path=" + path); StringBuilder sb = new StringBuilder(); if (path != null) { if (path.startsWith("/") && path.length() > 0) sb.append(path.substring(1)); else sb.append(path); } return sb.toString(); } /** * Retrieve the path to the file on the datastore, given the encoded url and PID. * Check condition for whether the url is for a referenced file * @param url * @param pid * @return */ private String getPathToFile(URL url, String pid) { try { URI uri = url.toURI(); String fragment = uri.getFragment(); LOGGER.info("fragment=" + fragment); // If not a reference then translate to data store filename? if (fragment != null && fragment.equals(URL_FRAGMENT_REFERENCE)) { // Reference to existing file return getDatastoreReferenceFilename(url); } else { // Reference to an uploaded file return getDatastoreFilename(url, pid); } } catch (URISyntaxException e) { LOGGER.error("Found exception", e); e.printStackTrace(); } return null; } /** * Method for creating the redirect URL as stored in fedora * Always ends with / character * @param datastore * @param poddObject * @param filename * @return */ private StringBuilder getDefaultDirectoryUrl(Datastore datastore, PoddObject poddObject, String path, String filename) { StringBuilder sb = new StringBuilder(); sb.append("http://").append(datastore.getLoginId()).append("@").append(datastore.getIp()).append(":") .append(datastore.getPort()).append("/").append(datastore.getDefaultDirectory()); if (!datastore.getDefaultDirectory().endsWith("/")) sb.append("/"); if (path != null) { if (path.startsWith("/") && path.length() > 0) sb.append(path.substring(1)); else sb.append(path); } if (!sb.toString().endsWith("/")) sb.append("/"); return sb; } private DataItem redirectOneFile(Datastore datastore, PoddObject poddObject, String userName, FileItem fileItem, String desc, File tempFile) throws IOException, RawDataHandlingException, EntityException, DataAccessException, MimeTypeHandlingException { final String filename = getShortName(fileItem.getName()); /* final String urlString = "http://" + datastore.getLoginId() + "@" + datastore.getIp() + ":" + datastore.getPort() + "/" + datastore.getDefaultDirectory() + poddObject.getPid() + "/" + filename + "." + System.currentTimeMillis();*/ StringBuilder urlString = getDefaultDirectoryUrl(datastore, poddObject, null, filename); // To avoid overwriting files with similar names we append timestamp to the end urlString.append(poddObject.getPid()).append(filename).append(".").append(System.currentTimeMillis()); DataItem dataItem = dataItemFactory.getNewDataItem(filename, new URL(urlString.toString()), DatastreamControlGroup._R, desc, userName, tempFile); poddObject.addDataItem(filename, dataItem); LOGGER.info( "Action: " + userName + " added redirect file: " + filename + " to object: " + poddObject.getPid()); return dataItem; } /** * Add a new DataItem to the poddObject for a file that already exists on a remote datastore * @param datastore * @param poddObject * @param userName * @param desc * @param fileStream The fileStream here is used to detect the mimeType based on its bytes. Optional * @param path The path relative to the default directory to where the file exists on the datastore * @param filename * @param fileSize * @return * @throws EntityException * @throws MalformedURLException * @throws RawDataHandlingException */ private DataItem referenceOneFile(Datastore datastore, PoddObject poddObject, String userName, String desc, InputStream fileStream, String path, String filename, long fileSize) throws EntityException, MalformedURLException, RawDataHandlingException { StringBuilder urlString = getDefaultDirectoryUrl(datastore, poddObject, path, filename); urlString.append(filename); urlString.append("#"); urlString.append(URL_FRAGMENT_REFERENCE); LOGGER.info("reference URL: " + urlString.toString()); long startTime = System.currentTimeMillis(); DataItem dataItem = dataItemFactory.getNewDataItem(filename, new URL(urlString.toString()), DatastreamControlGroup._R, desc, userName, fileStream, fileSize); long duration = (System.currentTimeMillis() - startTime) / 1000L; LOGGER.info("Time taken to determine mime type: " + duration + " seconds"); poddObject.addDataItem(filename, dataItem); LOGGER.info( "Action: " + userName + " added redirect file: " + filename + " to object: " + poddObject.getPid()); return dataItem; } private DataItem attachOneLocalFile(PoddObject poddObject, String userName, FileItem fileItem, String desc, File tempFile) throws IOException, RawDataHandlingException, EntityException, DataAccessException, MimeTypeHandlingException { final String filename = getShortName(fileItem.getName()); DataItem dataItem = dataItemFactory.getNewDataItem(filename, tempFile, DatastreamControlGroup._M, desc, userName); poddObject.addDataItem(filename, dataItem); LOGGER.info("Action: attached local file: " + filename + ", user: " + userName); return dataItem; } public static String getShortName(String fullName) { int index = fullName.lastIndexOf("\\"); // if it is not windows based path, try unix if (index == -1) { index = fullName.lastIndexOf("/"); } String filename = fullName.substring(index + 1); ; try { filename = URLEncoder.encode(filename, URL_ENCODING); } catch (UnsupportedEncodingException e) { LOGGER.error("Error encoding filename [" + filename + "] for data item."); } return filename; } /** * Inner class * This wrapper class allows us to close the SSHClient after the input stream has closed, which is handled by the tomcat container * @author Philip Wu * */ class SelfCleaningRemoteFileInputStreamWrapper extends InputStream { private SSHClient sshClient; private InputStream is; private Datastore datastore; private SFTPClient sftpClient; public SelfCleaningRemoteFileInputStreamWrapper(Datastore datastore, SSHClient sshClient, SFTPClient sftpClient, InputStream is) { this.datastore = datastore; this.sshClient = sshClient; this.sftpClient = sftpClient; this.is = is; } public void setSshClient(SSHClient sshClient) { this.sshClient = sshClient; } public int available() throws IOException { // TODO Auto-generated method stub return is.available(); } public synchronized void mark(int readlimit) { // TODO Auto-generated method stub is.mark(readlimit); } public boolean markSupported() { // TODO Auto-generated method stub return is.markSupported(); } public int read() throws IOException { // TODO Auto-generated method stub return 0; } public int read(byte[] b, int off, int len) throws IOException { // TODO Auto-generated method stub return is.read(b, off, len); } public int read(byte[] b) throws IOException { // TODO Auto-generated method stub return is.read(b); } public synchronized void reset() throws IOException { // TODO Auto-generated method stub is.reset(); } public long skip(long n) throws IOException { // TODO Auto-generated method stub return is.skip(n); } public void close() throws IOException { LOGGER.info("self cleaning remote file input stream close"); /* if (sshClient != null && sshClient.isConnected()) this.sshClient.disconnect(); */ is.close(); sftpClient.close(); returnSSHClient(datastore, sshClient); } } }