/* * Copyright 2013-2015 EMC Corporation. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.emc.ecs.sync.source; import com.emc.ecs.sync.CommonOptions; import com.emc.ecs.sync.filter.SyncFilter; import com.emc.ecs.sync.model.SyncEstimate; import com.emc.ecs.sync.model.SyncMetadata; import com.emc.ecs.sync.model.object.FileSyncObject; import; import com.emc.ecs.sync.util.ConfigurationException; import com.emc.ecs.sync.util.EnhancedThreadPoolExecutor; import com.emc.ecs.sync.util.FilesystemUtil; import com.emc.ecs.sync.util.ReadOnlyIterator; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.activation.MimetypesFileTypeMap; import; import; import; import; import java.util.*; import java.util.concurrent.LinkedBlockingDeque; import java.util.regex.Pattern; /** * The filesystem source reads data from a file or directory. */ public class FilesystemSource extends SyncSource<FileSyncObject> { private static final Logger log = LoggerFactory.getLogger(FilesystemSource.class); protected static final String FILE_PREFIX = "file://"; public static final String ABSOLUTE_PATH_OPT = "use-absolute-path"; public static final String ABSOLUTE_PATH_DESC = "Uses the absolute path to the file when storing it instead of the relative path from the source dir."; public static final String DELETE_OLDER_OPT = "delete-older-than"; public static final String DELETE_OLDER_DESC = "when --delete is used, add this option to only delete files that have been modified more than <delete-age> milliseconds ago"; public static final String DELETE_OLDER_ARG_NAME = "delete-age"; public static final String DELETE_CHECK_OPT = "delete-check-script"; public static final String DELETE_CHECK_DESC = "when --delete is used, add this option to execute an external script to check whether a file should be deleted. If the process exits with return code zero, the file is safe to delete."; public static final String DELETE_CHECK_ARG_NAME = "path-to-check-script"; public static final String FOLLOW_LINKS_OPT = "follow-links"; public static final String FOLLOW_LINKS_DESC = "instead of preserving symbolic links, follow them and sync the actual files"; public static final String EXCLUDE_FILENAMES_OPT = "exclude-filenames"; public static final String EXCLUDE_FILENAMES_DESC = "(deprecated - use exclude-paths) A list of regular expressions to search against the file name. If the name matches, the file will be skipped. Since this is a regular expression, take care to escape special characters. For example, to exclude all filenames that begin with a period, the pattern would be \\..*"; public static final String EXCLUDE_FILENAMES_ARG_NAME = "pattern,pattern,..."; public static final String EXCLUDE_PATHS_OPT = "exclude-paths"; public static final String EXCLUDE_PATHS_DESC = "A list of regular expressions to search against the full file path. If the path matches, the file will be skipped. Since this is a regular expression, take care to escape special characters. For example, to exclude all files and directories that begin with a period, the pattern would be .*/\\..*"; public static final String EXCLUDE_PATHS_ARG_NAME = "pattern,pattern,..."; protected File rootFile; protected boolean useAbsolutePath = false; private long deleteOlderThan = 0; private File deleteCheckScript; private Date modifiedSince; private boolean followLinks = false; private FilenameFilter filter; private List<String> excludedFilenames; private List<Pattern> excludedFilenamePatterns; private List<String> excludedPaths; private List<Pattern> excludedPathPatterns; protected MimetypesFileTypeMap mimeMap; public FilesystemSource() { mimeMap = new MimetypesFileTypeMap(); filter = new SourceFilter(); } @Override public boolean canHandleSource(String sourceUri) { return sourceUri.startsWith(FILE_PREFIX); } @Override public SyncEstimate createEstimate() { SyncEstimate estimate = new SyncEstimate(); final EnhancedThreadPoolExecutor dirExecutor = new EnhancedThreadPoolExecutor(8, new LinkedBlockingDeque<Runnable>(), "dirEstimator"); final EnhancedThreadPoolExecutor fileExecutor = new EnhancedThreadPoolExecutor(8, new LinkedBlockingDeque<Runnable>(100), "fileEstimator"); dirExecutor.submit(new EstimateTask(rootFile, estimate, dirExecutor, fileExecutor)); new Thread(new Runnable() { @Override public void run() { while (true) { try { if (dirExecutor.getActiveCount() + fileExecutor.getActiveCount() == 0) { dirExecutor.shutdown(); fileExecutor.shutdown(); break; } Thread.sleep(1000); } catch (Throwable t) { log.warn("unexpected exception in estimation monitor", t); } } } }).start(); return estimate; } @Override public boolean veto(FileSyncObject syncObject) { return veto(syncObject.getRawSourceIdentifier()) || super.veto(syncObject); } protected boolean veto(File file) { if (SyncMetadata.METADATA_DIR.equals(file.getName()) || SyncMetadata.DIR_META_FILE.equals(file.getName())) return true; if (modifiedSince != null && file.lastModified() < modifiedSince.getTime()) return true; return false; } @Override public Iterator<FileSyncObject> iterator() { return Collections .singletonList(new FileSyncObject(this, mimeMap, rootFile, getRelativePath(rootFile), followLinks)) .iterator(); } @Override public Iterator<FileSyncObject> childIterator(FileSyncObject syncObject) { if (syncObject.isDirectory()) return new DirectoryIterator(syncObject.getRawSourceIdentifier(), syncObject.getRelativePath()); else return null; } @Override public Options getCustomOptions() { Options opts = new Options(); opts.addOption(Option.builder().longOpt(ABSOLUTE_PATH_OPT).desc(ABSOLUTE_PATH_DESC).build()); opts.addOption(Option.builder().longOpt(DELETE_OLDER_OPT).desc(DELETE_OLDER_DESC).hasArg() .argName(DELETE_OLDER_ARG_NAME).build()); opts.addOption(Option.builder().longOpt(DELETE_CHECK_OPT).desc(DELETE_CHECK_DESC).hasArg() .argName(DELETE_CHECK_ARG_NAME).build()); opts.addOption(Option.builder().longOpt(FOLLOW_LINKS_OPT).desc(FOLLOW_LINKS_DESC).build()); opts.addOption(Option.builder().longOpt(EXCLUDE_FILENAMES_OPT).desc(EXCLUDE_FILENAMES_DESC).hasArgs() .argName(EXCLUDE_FILENAMES_ARG_NAME).valueSeparator(',').build()); opts.addOption(Option.builder().longOpt(EXCLUDE_PATHS_OPT).desc(EXCLUDE_PATHS_DESC).hasArgs() .argName(EXCLUDE_PATHS_ARG_NAME).valueSeparator(',').build()); return opts; } @Override public void parseCustomOptions(CommandLine line) { if (!sourceUri.startsWith(FILE_PREFIX)) throw new ConfigurationException("source must start with " + FILE_PREFIX); try { rootFile = new File(new URI(sourceUri)); } catch (URISyntaxException e) { throw new ConfigurationException("Invalid URI", e); } useAbsolutePath = line.hasOption(ABSOLUTE_PATH_OPT); if (line.hasOption(DELETE_OLDER_OPT)) { deleteOlderThan = Long.parseLong(line.getOptionValue(DELETE_OLDER_OPT)); } if (line.hasOption(DELETE_CHECK_OPT)) { deleteCheckScript = new File(line.getOptionValue(DELETE_CHECK_OPT)); } followLinks = line.hasOption(FOLLOW_LINKS_OPT); if (line.hasOption(EXCLUDE_FILENAMES_OPT)) { log.warn(EXCLUDE_FILENAMES_OPT + " is deprected; please use " + EXCLUDE_PATHS_OPT); excludedFilenames = Arrays.asList(line.getOptionValues(EXCLUDE_FILENAMES_OPT)); } if (line.hasOption(EXCLUDE_PATHS_OPT)) { excludedPaths = Arrays.asList(line.getOptionValues(EXCLUDE_PATHS_OPT)); } } @Override public void configure(SyncSource source, Iterator<SyncFilter> filters, SyncTarget target) { if (!rootFile.exists()) throw new ConfigurationException("The source " + rootFile + " does not exist"); if (deleteCheckScript != null && !deleteCheckScript.exists()) throw new ConfigurationException("Delete check script " + deleteCheckScript + " does not exist"); if (excludedFilenames != null) { excludedFilenamePatterns = new ArrayList<>(); for (String pattern : excludedFilenames) { excludedFilenamePatterns.add(Pattern.compile(pattern)); } } if (excludedPaths != null) { excludedPathPatterns = new ArrayList<>(); for (String pattern : excludedPaths) { excludedPathPatterns.add(Pattern.compile(pattern)); } } } @Override public String getName() { return "Filesystem Source"; } @Override public String getDocumentation() { return "The filesystem source reads data from a file or directory. " + "It is triggered by setting the source to a valid File URL:\n" + "file://<path>, e.g. file:///home/user/myfiles\n" + "If the URL refers to a file, only that file will be " + "transferred. If a directory is specified, the contents of " + "the directory will be transferred. Unless the --non-recursive" + "flag is set, the subdirectories will also be recursively " + "transferred. By default, any metadata side-car files " + "will be assigned to their " + "target objects; use --" + CommonOptions.IGNORE_METADATA_OPTION + " to ignore the metadata directory."; } @Override public void delete(FileSyncObject syncObject) { syncObject.delete(deleteOlderThan, deleteCheckScript); } protected String getRelativePath(File file) { String relativePath = file.getAbsolutePath(); if (!useAbsolutePath && rootFile != null && relativePath.startsWith(rootFile.getAbsolutePath())) { relativePath = relativePath.substring(rootFile.getAbsolutePath().length()); } if (File.separatorChar == '\\') { relativePath = relativePath.replace('\\', '/'); } if (relativePath.startsWith("/")) { relativePath = relativePath.substring(1); } return relativePath; } public class DirectoryIterator extends ReadOnlyIterator<FileSyncObject> { private Iterator<File> childFiles; private String relativePath; public DirectoryIterator(File directory, String relativePath) { File[] files = directory.listFiles(filter); if (files == null) files = new File[] {}; childFiles = Arrays.asList(files).iterator(); this.relativePath = relativePath; } @Override protected FileSyncObject getNextObject() { while (childFiles.hasNext()) { File child =; if (SyncMetadata.METADATA_DIR.equals(child.getName())) continue; // don't recurse into the metadata dir String childPath = relativePath + "/" + child.getName(); childPath = childPath.replaceFirst("^/", ""); return new FileSyncObject(FilesystemSource.this, mimeMap, child, childPath, followLinks); } return null; } } public File getRootFile() { return rootFile; } public void setRootFile(File rootFile) { this.rootFile = rootFile; } /** * @return the useAbsolutePath */ public boolean isUseAbsolutePath() { return useAbsolutePath; } /** * @param useAbsolutePath the useAbsolutePath to set */ public void setUseAbsolutePath(boolean useAbsolutePath) { this.useAbsolutePath = useAbsolutePath; } /** * @return the mimeMap */ public MimetypesFileTypeMap getMimeMap() { return mimeMap; } /** * @param mimeMap the mimeMap to set */ public void setMimeMap(MimetypesFileTypeMap mimeMap) { this.mimeMap = mimeMap; } public Date getModifiedSince() { return modifiedSince; } public void setModifiedSince(Date modifiedSince) { this.modifiedSince = modifiedSince; } public boolean isFollowLinks() { return followLinks; } public void setFollowLinks(boolean followLinks) { this.followLinks = followLinks; } public List<String> getExcludedFilenames() { return excludedFilenames; } /** * @deprecated (2.0.1) use {@link #setExcludedPaths(List)} instead */ public void setExcludedFilenames(List<String> excludedFilenames) { this.excludedFilenames = excludedFilenames; } public List<String> getExcludedPaths() { return excludedPaths; } public void setExcludedPaths(List<String> excludedPaths) { this.excludedPaths = excludedPaths; } public FilenameFilter getFilter() { return filter; } class SourceFilter implements FilenameFilter { @Override public boolean accept(File dir, String name) { File target = new File(dir, name); if (excludedPathPatterns != null) { for (Pattern p : excludedPathPatterns) { if (p.matcher(target.getPath()).matches() || p.matcher(name).matches()) { if (log.isDebugEnabled()) log.debug("Skipping file {}: matches pattern: {}", target, p); return false; } } } if (excludedFilenamePatterns != null) { for (Pattern p : excludedFilenamePatterns) { if (p.matcher(name).matches()) { if (log.isDebugEnabled()) log.debug("Skipping file {}: matches filename pattern: {}", target, p); return false; } } } return true; } } class EstimateTask implements Runnable { private File file; private SyncEstimate estimate; private EnhancedThreadPoolExecutor dirExecutor; private EnhancedThreadPoolExecutor fileExecutor; public EstimateTask(File file, SyncEstimate estimate, EnhancedThreadPoolExecutor dirExecutor, EnhancedThreadPoolExecutor fileExecutor) { this.file = file; this.estimate = estimate; this.dirExecutor = dirExecutor; this.fileExecutor = fileExecutor; } @Override public void run() { try { if (veto(file)) return; estimate.incTotalObjectCount(1); if (followLinks || !FilesystemUtil.isSymLink(file)) { // don't recurse or tally symlinks unless we should if (file.isDirectory()) { File[] children = file.listFiles(filter); if (children != null) { for (File child : children) { if (child.isDirectory()) dirExecutor.blockingSubmit( new EstimateTask(child, estimate, dirExecutor, fileExecutor)); else fileExecutor.blockingSubmit( new EstimateTask(child, estimate, dirExecutor, fileExecutor)); } } } else { estimate.incTotalByteCount(file.length()); } } } catch (Throwable t) { log.warn("unexpected exception", t); } } } }