org.apache.tephra.hbase.txprune.InvalidListPruningDebugTool.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tephra.hbase.txprune.InvalidListPruningDebugTool.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.tephra.hbase.txprune;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Iterables;
import com.google.common.collect.MinMaxPriorityQueue;
import com.google.common.collect.Sets;
import com.google.gson.Gson;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.tephra.TxConstants;
import org.apache.tephra.txprune.RegionPruneInfo;
import org.apache.tephra.txprune.hbase.InvalidListPruningDebug;
import org.apache.tephra.txprune.hbase.RegionsAtTime;
import org.apache.tephra.util.TimeMathParser;
import org.apache.tephra.util.TxUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;

/**
 * Invalid List Pruning Debug Tool.
 */
public class InvalidListPruningDebugTool implements InvalidListPruningDebug {
    private static final Logger LOG = LoggerFactory.getLogger(InvalidListPruningDebugTool.class);
    private static final Gson GSON = new Gson();
    private static final String NOW = "now";
    @VisibleForTesting
    static final String DATE_FORMAT = "d-MMM-yyyy HH:mm:ss z";

    private DataJanitorState dataJanitorState;
    private Connection connection;
    private TableName tableName;

    /**
     * Initialize the Invalid List Debug Tool.
     * @param conf {@link Configuration}
     * @throws IOException when not able to create an HBase connection
     */
    @Override
    @SuppressWarnings("WeakerAccess")
    public void initialize(final Configuration conf) throws IOException {
        LOG.debug("InvalidListPruningDebugMain : initialize method called");
        connection = ConnectionFactory.createConnection(conf);
        tableName = TableName.valueOf(conf.get(TxConstants.TransactionPruning.PRUNE_STATE_TABLE,
                TxConstants.TransactionPruning.DEFAULT_PRUNE_STATE_TABLE));
        dataJanitorState = new DataJanitorState(new DataJanitorState.TableSupplier() {
            @Override
            public Table get() throws IOException {
                return connection.getTable(tableName);
            }
        });
    }

    @Override
    @SuppressWarnings("WeakerAccess")
    public void destroy() throws IOException {
        if (connection != null) {
            connection.close();
        }
    }

    /**
     * Returns a set of regions that are live but are not empty nor have a prune upper bound recorded. These regions
     * will stop the progress of pruning.
     * <p/>
     * Note that this can return false positives in the following case -
     * At time 't' empty regions were recorded, and time 't+1' prune iteration was invoked.
     * Since  a new set of regions was recorded at time 't+1', all regions recorded as empty before time 't + 1' will
     * now be reported as blocking the pruning, even though they are empty. This is because we cannot tell if those
     * regions got any new data between time 't' and 't + 1'.
     *
     * @param numRegions number of regions
     * @param time time in milliseconds or relative time, regions recorded before the given time are returned
     * @return {@link Set} of regions that needs to be compacted and flushed
     */
    @Override
    @SuppressWarnings("WeakerAccess")
    public Set<String> getRegionsToBeCompacted(Integer numRegions, String time) throws IOException {
        // Fetch the live regions at the given time
        RegionsAtTime timeRegion = getRegionsOnOrBeforeTime(time);
        if (timeRegion.getRegions().isEmpty()) {
            return Collections.emptySet();
        }

        Long timestamp = timeRegion.getTime();
        SortedSet<String> regions = timeRegion.getRegions();

        // Get the live regions
        SortedSet<String> liveRegions = getRegionsOnOrBeforeTime(NOW).getRegions();
        // Retain only the live regions
        regions = Sets.newTreeSet(Sets.intersection(liveRegions, regions));

        SortedSet<byte[]> emptyRegions = dataJanitorState.getEmptyRegionsAfterTime(timestamp, null);
        SortedSet<String> emptyRegionNames = new TreeSet<>();
        Iterable<String> regionStrings = Iterables.transform(emptyRegions, TimeRegions.BYTE_ARR_TO_STRING_FN);
        for (String regionString : regionStrings) {
            emptyRegionNames.add(regionString);
        }

        Set<String> nonEmptyRegions = Sets.newHashSet(Sets.difference(regions, emptyRegionNames));

        // Get all pruned regions for the current time and remove them from the nonEmptyRegions,
        // resulting in a set of regions that are not empty and have not been registered prune upper bound
        List<RegionPruneInfo> prunedRegions = dataJanitorState.getPruneInfoForRegions(null);
        for (RegionPruneInfo prunedRegion : prunedRegions) {
            if (nonEmptyRegions.contains(prunedRegion.getRegionNameAsString())) {
                nonEmptyRegions.remove(prunedRegion.getRegionNameAsString());
            }
        }

        if ((numRegions < 0) || (numRegions >= nonEmptyRegions.size())) {
            return nonEmptyRegions;
        }

        Set<String> subsetRegions = new HashSet<>(numRegions);
        for (String regionName : nonEmptyRegions) {
            if (subsetRegions.size() == numRegions) {
                break;
            }
            subsetRegions.add(regionName);
        }
        return subsetRegions;
    }

    /**
     * Return a list of RegionPruneInfo. These regions are the ones that have the lowest prune upper bounds.
     * If -1 is passed in, all the regions and their prune upper bound will be returned. Note that only the regions
     * that are known to be live will be returned.
     *
     * @param numRegions number of regions
     * @param time time in milliseconds or relative time, regions recorded before the given time are returned
     * @return Map of region name and its prune upper bound
     */
    @Override
    @SuppressWarnings("WeakerAccess")
    public SortedSet<RegionPruneInfoPretty> getIdleRegions(Integer numRegions, String time) throws IOException {
        List<RegionPruneInfo> regionPruneInfos = dataJanitorState.getPruneInfoForRegions(null);
        if (regionPruneInfos.isEmpty()) {
            return new TreeSet<>();
        }

        // Create a set with region names
        Set<String> pruneRegionNameSet = new HashSet<>();
        for (RegionPruneInfo regionPruneInfo : regionPruneInfos) {
            pruneRegionNameSet.add(regionPruneInfo.getRegionNameAsString());
        }

        // Fetch the latest live regions
        RegionsAtTime latestRegions = getRegionsOnOrBeforeTime(NOW);

        // Fetch the regions at the given time
        RegionsAtTime timeRegions = getRegionsOnOrBeforeTime(time);
        Set<String> liveRegions = Sets.intersection(latestRegions.getRegions(), timeRegions.getRegions());
        Set<String> liveRegionsWithPruneInfo = Sets.intersection(liveRegions, pruneRegionNameSet);
        List<RegionPruneInfo> liveRegionWithPruneInfoList = new ArrayList<>();
        for (RegionPruneInfo regionPruneInfo : regionPruneInfos) {
            if (liveRegionsWithPruneInfo.contains(regionPruneInfo.getRegionNameAsString())) {
                liveRegionWithPruneInfoList.add(regionPruneInfo);
            }

            // Use the subset of live regions and prune regions
            regionPruneInfos = liveRegionWithPruneInfoList;
        }

        if (numRegions < 0) {
            numRegions = regionPruneInfos.size();
        }

        Comparator<RegionPruneInfo> comparator = new Comparator<RegionPruneInfo>() {
            @Override
            public int compare(RegionPruneInfo o1, RegionPruneInfo o2) {
                int result = Long.compare(o1.getPruneUpperBound(), o2.getPruneUpperBound());
                if (result == 0) {
                    return o1.getRegionNameAsString().compareTo(o2.getRegionNameAsString());
                }
                return result;
            }
        };
        MinMaxPriorityQueue<RegionPruneInfoPretty> lowestPrunes = MinMaxPriorityQueue.orderedBy(comparator)
                .maximumSize(numRegions).create();

        for (RegionPruneInfo pruneInfo : regionPruneInfos) {
            lowestPrunes.add(new RegionPruneInfoPretty(pruneInfo));
        }

        SortedSet<RegionPruneInfoPretty> regions = new TreeSet<>(comparator);
        regions.addAll(lowestPrunes);
        return regions;
    }

    /**
     * Return the prune upper bound value of a given region. If no prune upper bound has been written for this region yet,
     * it will return a null.
     *
     * @param regionId region id
     * @return {@link RegionPruneInfo} of the region
     * @throws IOException if there are any errors while trying to fetch the {@link RegionPruneInfo}
     */
    @Override
    @SuppressWarnings("WeakerAccess")
    @Nullable
    public RegionPruneInfoPretty getRegionPruneInfo(String regionId) throws IOException {
        RegionPruneInfo pruneInfo = dataJanitorState.getPruneInfoForRegion(Bytes.toBytesBinary(regionId));
        return pruneInfo == null ? null : new RegionPruneInfoPretty(pruneInfo);
    }

    /**
     *
     * @param timeString Given a time, provide the {@link TimeRegions} at or before that time.
     *                   Time can be in milliseconds or relative time.
     * @return transactional regions that are present at or before the given time
     * @throws IOException if there are any errors while trying to fetch the {@link TimeRegions}
     */
    @Override
    @SuppressWarnings("WeakerAccess")
    public RegionsAtTime getRegionsOnOrBeforeTime(String timeString) throws IOException {
        long time = TimeMathParser.parseTime(timeString, TimeUnit.MILLISECONDS);
        SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
        TimeRegions timeRegions = dataJanitorState.getRegionsOnOrBeforeTime(time);
        if (timeRegions == null) {
            return new RegionsAtTime(time, new TreeSet<String>(), dateFormat);
        }
        SortedSet<String> regionNames = new TreeSet<>();
        Iterable<String> regionStrings = Iterables.transform(timeRegions.getRegions(),
                TimeRegions.BYTE_ARR_TO_STRING_FN);
        for (String regionString : regionStrings) {
            regionNames.add(regionString);
        }
        return new RegionsAtTime(timeRegions.getTime(), regionNames, dateFormat);
    }

    private void printUsage(PrintWriter pw) {
        pw.println();
        pw.println("Usage : org.apache.tephra.hbase.txprune.InvalidListPruning <command> <parameters>");
        pw.println();
        pw.println("Available commands");
        pw.println("------------------");
        pw.println("to-compact-regions limit [time]");
        pw.println("Desc: Prints out the regions that are active, but not empty, "
                + "and have not registered a prune upper bound.");
        pw.println();
        pw.println("idle-regions limit [time]");
        pw.println("Desc: Prints out the regions that have the lowest prune upper bounds.");
        pw.println();
        pw.println("prune-info region-name-as-string");
        pw.println("Desc: Prints the prune upper bound and the time it was recorded for the given region.");
        pw.println();
        pw.println("time-region [time]");
        pw.println(
                "Desc: Prints out the transactional regions present in HBase recorded at or before the given time.");
        pw.println();
        pw.println("Parameters");
        pw.println("----------");
        pw.println(" * limit - used to limit the number of regions returned, -1 to apply no limit");
        pw.println(" * time  - if time is not provided, the current time is used. ");
        pw.println("             When provided, the data recorded on or before the given time is returned.");
        pw.println("             Time can be provided in milliseconds, or can be provided as a relative time.");
        pw.println("             Examples for relative time -");
        pw.println("             now = current time,");
        pw.println("             now-1d = current time - 1 day,");
        pw.println("             now-1d+4h = 20 hours before now,");
        pw.println("             now+5s = current time + 5 seconds");
        pw.println();
    }

    @VisibleForTesting
    boolean execute(String[] args, PrintWriter out) throws IOException {
        if (args.length < 1) {
            printUsage(out);
            return false;
        }

        String command = args[0];
        switch (command) {
        case "time-region":
            if (args.length <= 2) {
                String time = args.length == 2 ? args[1] : NOW;
                RegionsAtTime timeRegion = getRegionsOnOrBeforeTime(time);
                out.println(GSON.toJson(timeRegion));
                return true;
            }
            break;
        case "idle-regions":
            if (args.length <= 3) {
                Integer numRegions = Integer.parseInt(args[1]);
                String time = args.length == 3 ? args[2] : NOW;
                SortedSet<RegionPruneInfoPretty> regionPruneInfos = getIdleRegions(numRegions, time);
                out.println(GSON.toJson(regionPruneInfos));
                return true;
            }
            break;
        case "prune-info":
            if (args.length == 2) {
                String regionName = args[1];
                RegionPruneInfo regionPruneInfo = getRegionPruneInfo(regionName);
                if (regionPruneInfo != null) {
                    out.println(GSON.toJson(regionPruneInfo));
                } else {
                    out.println(String.format("No prune info found for the region %s.", regionName));
                }
                return true;
            }
            break;
        case "to-compact-regions":
            if (args.length <= 3) {
                Integer numRegions = Integer.parseInt(args[1]);
                String time = args.length == 3 ? args[2] : NOW;
                Set<String> toBeCompactedRegions = getRegionsToBeCompacted(numRegions, time);
                out.println(GSON.toJson(toBeCompactedRegions));
                return true;
            }
            break;
        }

        printUsage(out);
        return false;
    }

    public static void main(String[] args) {
        Configuration hConf = HBaseConfiguration.create();
        InvalidListPruningDebugTool pruningDebug = new InvalidListPruningDebugTool();
        try (PrintWriter out = new PrintWriter(System.out)) {
            pruningDebug.initialize(hConf);
            boolean success = pruningDebug.execute(args, out);
            pruningDebug.destroy();
            if (!success) {
                System.exit(1);
            }
        } catch (IOException ex) {
            LOG.error("Received an exception while trying to execute the debug tool. ", ex);
        }
    }

    /**
     * Wrapper class around {@link RegionPruneInfo} to print human readable dates for timestamps.
     */
    @SuppressWarnings({ "WeakerAccess", "unused" })
    public static class RegionPruneInfoPretty extends RegionPruneInfo {
        private final transient SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
        private final String pruneUpperBoundAsString;
        private final String pruneRecordTimeAsString;

        public RegionPruneInfoPretty(RegionPruneInfo regionPruneInfo) {
            this(regionPruneInfo.getRegionName(), regionPruneInfo.getRegionNameAsString(),
                    regionPruneInfo.getPruneUpperBound(), regionPruneInfo.getPruneRecordTime());
        }

        public RegionPruneInfoPretty(byte[] regionName, String regionNameAsString, long pruneUpperBound,
                long pruneRecordTime) {
            super(regionName, regionNameAsString, pruneUpperBound, pruneRecordTime);
            pruneUpperBoundAsString = dateFormat.format(TxUtils.getTimestamp(pruneUpperBound));
            pruneRecordTimeAsString = dateFormat.format(pruneRecordTime);
        }

        public String getPruneUpperBoundAsString() {
            return pruneUpperBoundAsString;
        }

        public String getPruneRecordTimeAsString() {
            return pruneRecordTimeAsString;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }
            if (!super.equals(o)) {
                return false;
            }
            RegionPruneInfoPretty that = (RegionPruneInfoPretty) o;
            return Objects.equals(pruneUpperBoundAsString, that.pruneUpperBoundAsString)
                    && Objects.equals(pruneRecordTimeAsString, that.pruneRecordTimeAsString);
        }

        @Override
        public int hashCode() {
            return Objects.hash(super.hashCode(), pruneUpperBoundAsString, pruneRecordTimeAsString);
        }

        @Override
        public String toString() {
            return "RegionPruneInfoPretty{" + ", pruneUpperBoundAsString='" + pruneUpperBoundAsString + '\''
                    + ", pruneRecordTimeAsString='" + pruneRecordTimeAsString + '\'' + "} " + super.toString();
        }
    }

}