org.apache.falcon.snapshots.retention.HdfsSnapshotEvictor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.snapshots.retention.HdfsSnapshotEvictor.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.snapshots.retention;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.falcon.FalconException;
import org.apache.falcon.extensions.mirroring.hdfsSnapshot.HdfsSnapshotMirrorProperties;
import org.apache.falcon.retention.EvictionHelper;
import org.apache.falcon.snapshots.util.HdfsSnapshotUtil;
import org.apache.falcon.workflow.util.OozieActionConfigurationHelper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.servlet.jsp.el.ELException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;

/**
 * HDFS snapshot evictor.
 */
public class HdfsSnapshotEvictor extends Configured implements Tool {
    private static final Logger LOG = LoggerFactory.getLogger(HdfsSnapshotEvictor.class);

    public static void main(String[] args) throws Exception {
        Configuration conf = OozieActionConfigurationHelper.createActionConf();
        int ret = ToolRunner.run(conf, new HdfsSnapshotEvictor(), args);
        if (ret != 0) {
            throw new Exception("Unable to perform eviction action args: " + Arrays.toString(args));
        }
    }

    @Override
    public int run(String[] args) throws Exception {
        CommandLine cmd = getCommand(args);
        DistributedFileSystem sourceFs = HdfsSnapshotUtil.getSourceFileSystem(cmd);
        DistributedFileSystem targetFs = HdfsSnapshotUtil.getTargetFileSystem(cmd);

        String sourceDir = cmd.getOptionValue(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_DIR.getName());
        String targetDir = cmd.getOptionValue(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_DIR.getName());

        // evict on source
        String retPolicy = cmd
                .getOptionValue(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_RETENTION_POLICY.getName());
        String ageLimit = cmd
                .getOptionValue(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_RETENTION_AGE_LIMIT.getName());
        int numSnapshots = Integer.parseInt(
                cmd.getOptionValue(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_RETENTION_NUMBER.getName()));
        if (retPolicy.equalsIgnoreCase("delete")) {
            evictSnapshots(sourceFs, sourceDir, ageLimit, numSnapshots);
        } else {
            LOG.warn("Unsupported source retention policy {}", retPolicy);
            throw new FalconException("Unsupported source retention policy " + retPolicy);
        }

        // evict on target
        retPolicy = cmd.getOptionValue(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_RETENTION_POLICY.getName());
        ageLimit = cmd.getOptionValue(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_RETENTION_AGE_LIMIT.getName());
        numSnapshots = Integer.parseInt(
                cmd.getOptionValue(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_RETENTION_NUMBER.getName()));
        if (retPolicy.equalsIgnoreCase("delete")) {
            evictSnapshots(targetFs, targetDir, ageLimit, numSnapshots);
        } else {
            LOG.warn("Unsupported target retention policy {}", retPolicy);
            throw new FalconException("Unsupported target retention policy " + retPolicy);
        }

        LOG.info("Completed HDFS Snapshot Eviction.");
        return 0;
    }

    protected static void evictSnapshots(DistributedFileSystem fs, String dirName, String ageLimit,
            int numSnapshots) throws FalconException {
        try {
            LOG.info("Started evicting snapshots on dir {}{} using policy {}, agelimit {}, numSnapshot {}",
                    fs.getUri(), dirName, ageLimit, numSnapshots);

            long evictionTime = System.currentTimeMillis() - EvictionHelper.evalExpressionToMilliSeconds(ageLimit);

            dirName = StringUtils.removeEnd(dirName, Path.SEPARATOR);
            String snapshotDir = dirName + Path.SEPARATOR + HdfsSnapshotUtil.SNAPSHOT_DIR_PREFIX + Path.SEPARATOR;
            FileStatus[] snapshots = fs.listStatus(new Path(snapshotDir));
            if (snapshots.length <= numSnapshots) {
                // no eviction needed
                return;
            }

            // Sort by last modified time, ascending order.
            Arrays.sort(snapshots, new Comparator<FileStatus>() {
                @Override
                public int compare(FileStatus f1, FileStatus f2) {
                    return Long.compare(f1.getModificationTime(), f2.getModificationTime());
                }
            });

            for (int i = 0; i < (snapshots.length - numSnapshots); i++) {
                // delete if older than ageLimit while retaining numSnapshots
                if (snapshots[i].getModificationTime() < evictionTime) {
                    fs.deleteSnapshot(new Path(dirName), snapshots[i].getPath().getName());
                }
            }

        } catch (ELException ele) {
            LOG.warn("Unable to parse retention age limit {} {}", ageLimit, ele.getMessage());
            throw new FalconException("Unable to parse retention age limit " + ageLimit, ele);
        } catch (IOException ioe) {
            LOG.warn("Unable to evict snapshots from dir {} {}", dirName, ioe);
            throw new FalconException("Unable to evict snapshots from dir " + dirName, ioe);
        }

    }

    private CommandLine getCommand(String[] args) throws org.apache.commons.cli.ParseException {
        Options options = new Options();

        Option opt = new Option(HdfsSnapshotMirrorProperties.SOURCE_NN.getName(), true, "Source Cluster");
        opt.setRequired(true);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.SOURCE_EXEC_URL.getName(), true,
                "Replication instance job Exec Url");
        opt.setRequired(true);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.SOURCE_NN_KERBEROS_PRINCIPAL.getName(), true,
                "Replication instance job NN Kerberos Principal");
        opt.setRequired(false);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_DIR.getName(), true,
                "Source snapshot-able dir to replicate");
        opt.setRequired(true);
        options.addOption(opt);

        opt = new Option(HdfsSnapshotMirrorProperties.TARGET_NN.getName(), true, "Target Cluster");
        opt.setRequired(true);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_DIR.getName(), true,
                "Target snapshot-able dir to replicate");
        opt.setRequired(true);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.TARGET_EXEC_URL.getName(), true,
                "Replication instance target Exec Url");
        opt.setRequired(true);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.TARGET_NN_KERBEROS_PRINCIPAL.getName(), true,
                "Replication instance target NN Kerberos Principal");
        opt.setRequired(false);
        options.addOption(opt);

        opt = new Option(HdfsSnapshotMirrorProperties.SNAPSHOT_JOB_NAME.getName(), true,
                "Replication instance job name");
        opt.setRequired(true);
        options.addOption(opt);

        opt = new Option(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_RETENTION_POLICY.getName(), true,
                "Source retention policy");
        opt.setRequired(false);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_RETENTION_AGE_LIMIT.getName(), true,
                "Source delete snapshots older than agelimit");
        opt.setRequired(true);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_RETENTION_NUMBER.getName(), true,
                "Source number of snapshots to retain");
        opt.setRequired(true);
        options.addOption(opt);

        opt = new Option(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_RETENTION_POLICY.getName(), true,
                "Target retention policy");
        opt.setRequired(false);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_RETENTION_AGE_LIMIT.getName(), true,
                "Target delete snapshots older than agelimit");
        opt.setRequired(true);
        options.addOption(opt);
        opt = new Option(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_RETENTION_NUMBER.getName(), true,
                "Target number of snapshots to retain");
        opt.setRequired(true);
        options.addOption(opt);

        return new GnuParser().parse(options, args);
    }
}