gobblin.data.management.trash.Trash.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.trash.Trash.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.data.management.trash;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Properties;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import gobblin.util.PathUtils;

/**
 * Flexible implementation of Trash similar to Hadoop trash. Allows for injecting cleanup policies for snapshots.
 */
public class Trash implements GobblinTrash {

    private static final Logger LOG = LoggerFactory.getLogger(Trash.class);
    private static final FsPermission PERM = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    private static final FsPermission ALL_PERM = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);

    /**
     * Location of trash directory in file system. The location can include a token $USER that will be automatically
     * replaced by the name of the active user.
     */
    public static final String TRASH_LOCATION_KEY = "gobblin.trash.location";
    public static final String SNAPSHOT_CLEANUP_POLICY_CLASS_KEY = "gobblin.trash.snapshot.cleanup.policy.class";
    public static final String TRASH_SNAPSHOT_PREFIX = "_TRASH_SNAPSHOT_";
    public static final String TRASH_IDENTIFIER_FILE = "_THIS_IS_TRASH_DIRECTORY";
    public static final String DEFAULT_TRASH_DIRECTORY = "_GOBBLIN_TRASH";
    public static final DateTimeFormatter TRASH_SNAPSHOT_NAME_FORMATTER = DateTimeFormat
            .forPattern(String.format("'%s'yyyyMMddHHmmss", TRASH_SNAPSHOT_PREFIX)).withZone(DateTimeZone.UTC);
    public static final PathFilter TRASH_SNAPSHOT_PATH_FILTER = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().equals(TRASH_IDENTIFIER_FILE)
                    && path.getName().startsWith(TRASH_SNAPSHOT_PREFIX);
        }
    };
    public static final PathFilter TRASH_NOT_SNAPSHOT_PATH_FILTER = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().equals(TRASH_IDENTIFIER_FILE)
                    && !path.getName().startsWith(TRASH_SNAPSHOT_PREFIX);
        }
    };

    /**
     * Get trash location.
     * @return {@link org.apache.hadoop.fs.Path} for trash directory.
     * @throws IOException
     */
    public Path getTrashLocation() throws IOException {
        return this.trashLocation;
    }

    /**
     * Create location of Trash directory. Parsed from props at key {@link #TRASH_LOCATION_KEY}, defaulting to
     * /home/directory/_GOBBLIN_TRASH.
     * @param fs {@link org.apache.hadoop.fs.FileSystem} where trash should be found.
     * @param props {@link java.util.Properties} containing trash configuration.
     * @param user If the trash location contains the token $USER, the token will be replaced by the value of user.
     * @return {@link org.apache.hadoop.fs.Path} for trash directory.
     * @throws java.io.IOException
     */
    protected Path createTrashLocation(FileSystem fs, Properties props, String user) throws IOException {
        Path trashLocation;
        if (props.containsKey(TRASH_LOCATION_KEY)) {
            trashLocation = new Path(props.getProperty(TRASH_LOCATION_KEY).replaceAll("\\$USER", user));
        } else {
            trashLocation = new Path(fs.getHomeDirectory(), DEFAULT_TRASH_DIRECTORY);
            LOG.info("Using default trash location at " + trashLocation);
        }
        if (!trashLocation.isAbsolute()) {
            throw new IllegalArgumentException(
                    "Trash location must be absolute. Found " + trashLocation.toString());
        }
        Path qualifiedTrashLocation = fs.makeQualified(trashLocation);
        ensureTrashLocationExists(fs, qualifiedTrashLocation);
        return qualifiedTrashLocation;
    }

    protected void ensureTrashLocationExists(FileSystem fs, Path trashLocation) throws IOException {
        if (fs.exists(trashLocation)) {
            if (!fs.isDirectory(trashLocation)) {
                throw new IOException(String.format("Trash location %s is not a directory.", trashLocation));
            }

            if (!fs.exists(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) {
                // If trash identifier file is not present, directory might have been created by user.
                // Add trash identifier file only if directory is empty.
                if (fs.listStatus(trashLocation).length > 0) {
                    throw new IOException(String.format(
                            "Trash directory %s exists, but it does not look like a trash directory. "
                                    + "File: %s missing and directory is not empty.",
                            trashLocation, TRASH_IDENTIFIER_FILE));
                } else if (!fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) {
                    throw new IOException(String.format("Failed to create file %s in existing trash directory %s.",
                            TRASH_IDENTIFIER_FILE, trashLocation));
                }
            }
        } else if (!(fs.mkdirs(trashLocation.getParent(), ALL_PERM) && fs.mkdirs(trashLocation, PERM)
                && fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE)))) {
            // Failed to create directory or create trash identifier file.
            throw new IOException("Failed to create trash directory at " + trashLocation.toString());
        }
    }

    protected final FileSystem fs;
    private final Path trashLocation;
    private final SnapshotCleanupPolicy snapshotCleanupPolicy;

    /**
     * @deprecated Use {@link gobblin.data.management.trash.TrashFactory}.
     */
    @Deprecated
    public Trash(FileSystem fs) throws IOException {
        this(fs, new Properties());
    }

    /**
     * @deprecated Use {@link gobblin.data.management.trash.TrashFactory}.
     */
    @Deprecated
    public Trash(FileSystem fs, Properties props) throws IOException {
        this(fs, props, UserGroupInformation.getCurrentUser().getUserName());
    }

    protected Trash(FileSystem fs, Properties props, String user) throws IOException {
        this.fs = fs;
        this.trashLocation = createTrashLocation(fs, props, user);
        try {
            Class<?> snapshotCleanupPolicyClass = Class.forName(props.getProperty(SNAPSHOT_CLEANUP_POLICY_CLASS_KEY,
                    TimeBasedSnapshotCleanupPolicy.class.getCanonicalName()));
            this.snapshotCleanupPolicy = (SnapshotCleanupPolicy) snapshotCleanupPolicyClass
                    .getConstructor(Properties.class).newInstance(props);
        } catch (Exception exception) {
            throw new IllegalArgumentException("Could not create snapshot cleanup policy with class "
                    + props.getProperty(SNAPSHOT_CLEANUP_POLICY_CLASS_KEY,
                            TimeBasedSnapshotCleanupPolicy.class.getCanonicalName()),
                    exception);
        }
    }

    /**
     * Move a path to trash. The absolute path of the input path will be replicated under the trash directory.
     * @param path {@link org.apache.hadoop.fs.FileSystem} path to move to trash.
     * @return true if move to trash was done successfully.
     * @throws IOException
     */
    @Override
    public boolean moveToTrash(Path path) throws IOException {
        Path fullyResolvedPath = path.isAbsolute() ? path : new Path(this.fs.getWorkingDirectory(), path);
        Path targetPathInTrash = PathUtils.mergePaths(this.trashLocation, fullyResolvedPath);

        if (!this.fs.exists(targetPathInTrash.getParent())) {
            this.fs.mkdirs(targetPathInTrash.getParent());
        } else if (this.fs.exists(targetPathInTrash)) {
            targetPathInTrash = targetPathInTrash.suffix("_" + System.currentTimeMillis());
        }

        return this.fs.rename(fullyResolvedPath, targetPathInTrash);
    }

    /**
     * Moves all current contents of trash directory into a snapshot directory with current timestamp.
     * @throws IOException
     */
    public void createTrashSnapshot() throws IOException {
        FileStatus[] pathsInTrash = this.fs.listStatus(this.trashLocation, TRASH_NOT_SNAPSHOT_PATH_FILTER);

        if (pathsInTrash.length <= 0) {
            LOG.info("Nothing in trash. Will not create snapshot.");
            return;
        }

        Path snapshotDir = new Path(this.trashLocation, new DateTime().toString(TRASH_SNAPSHOT_NAME_FORMATTER));
        if (this.fs.exists(snapshotDir)) {
            throw new IOException("New snapshot directory " + snapshotDir.toString() + " already exists.");
        }

        if (!this.fs.mkdirs(snapshotDir, PERM)) {
            throw new IOException("Failed to create new snapshot directory at " + snapshotDir.toString());
        }

        LOG.info(String.format("Moving %d paths in Trash directory to newly created snapshot at %s.",
                pathsInTrash.length, snapshotDir.toString()));

        int pathsFailedToMove = 0;
        for (FileStatus fileStatus : pathsInTrash) {
            Path pathRelativeToTrash = PathUtils.relativizePath(fileStatus.getPath(), this.trashLocation);
            Path targetPath = new Path(snapshotDir, pathRelativeToTrash);
            boolean movedThisPath = true;
            try {
                movedThisPath = this.fs.rename(fileStatus.getPath(), targetPath);
            } catch (IOException exception) {
                LOG.error("Failed to move path " + fileStatus.getPath().toString() + " to snapshot.", exception);
                pathsFailedToMove += 1;
                continue;
            }
            if (!movedThisPath) {
                LOG.error("Failed to move path " + fileStatus.getPath().toString() + " to snapshot.");
                pathsFailedToMove += 1;
            }
        }

        if (pathsFailedToMove > 0) {
            LOG.error(String.format("Failed to move %d paths to the snapshot at %s.", pathsFailedToMove,
                    snapshotDir.toString()));
        }

    }

    /**
     * For each existing trash snapshot, uses a {@link gobblin.data.management.trash.SnapshotCleanupPolicy} to determine whether
     * the snapshot should be deleted. If so, delete it permanently.
     *
     * <p>
     *   Each existing snapshot will be passed to {@link gobblin.data.management.trash.SnapshotCleanupPolicy#shouldDeleteSnapshot}
     *   from oldest to newest, and will be deleted if the method returns true.
     * </p>
     *
     * @throws IOException
     */
    public void purgeTrashSnapshots() throws IOException {
        List<FileStatus> snapshotsInTrash = Arrays
                .asList(this.fs.listStatus(this.trashLocation, TRASH_SNAPSHOT_PATH_FILTER));

        Collections.sort(snapshotsInTrash, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                return TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(o1.getPath().getName())
                        .compareTo(TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(o2.getPath().getName()));
            }
        });

        int totalSnapshots = snapshotsInTrash.size();
        int snapshotsDeleted = 0;

        for (FileStatus snapshot : snapshotsInTrash) {
            if (this.snapshotCleanupPolicy.shouldDeleteSnapshot(snapshot, this)) {
                try {
                    boolean successfullyDeleted = this.fs.delete(snapshot.getPath(), true);
                    if (successfullyDeleted) {
                        snapshotsDeleted++;
                    } else {
                        LOG.error("Failed to delete snapshot " + snapshot.getPath());
                    }
                } catch (IOException exception) {
                    LOG.error("Failed to delete snapshot " + snapshot.getPath(), exception);
                }
            }
        }

        LOG.info(String.format("Deleted %d out of %d existing snapshots.", snapshotsDeleted, totalSnapshots));
    }

}