org.apache.hadoop.hbase.regionserver.compactions.CompactionPolicy.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.regionserver.compactions.CompactionPolicy.java

Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.regionserver.compactions;

import com.google.common.base.Predicate;
import com.google.common.collect.Collections2;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.regionserver.StoreConfiguration;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.StoreUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Random;

/**
 * The default (and only, as of now) algorithm for selecting files for compaction.
 * Combines the compaction configuration and the provisional file selection that
 * it's given to produce the list of suitable candidates for compaction.
 */
@InterfaceAudience.Private
public class CompactionPolicy {

    private static final Log LOG = LogFactory.getLog(CompactionPolicy.class);
    private final static Calendar calendar = new GregorianCalendar();

    CompactionConfiguration comConf;
    StoreConfiguration storeConfig;

    public CompactionPolicy(Configuration configuration, StoreConfiguration storeConfig) {
        updateConfiguration(configuration, storeConfig);
    }

    /**
     * @param candidateFiles candidate files, ordered from oldest to newest
     * @return subset copy of candidate list that meets compaction criteria
     * @throws java.io.IOException
     */
    public CompactSelection selectCompaction(List<StoreFile> candidateFiles, boolean isUserCompaction,
            boolean forceMajor) throws IOException {
        // Prelimanry compaction subject to filters
        CompactSelection candidateSelection = new CompactSelection(candidateFiles);
        long cfTtl = this.storeConfig.getStoreFileTtl();
        if (!forceMajor) {
            // If there are expired files, only select them so that compaction deletes them
            if (comConf.shouldDeleteExpired() && (cfTtl != Long.MAX_VALUE)) {
                CompactSelection expiredSelection = selectExpiredStoreFiles(candidateSelection,
                        EnvironmentEdgeManager.currentTimeMillis() - cfTtl);
                if (expiredSelection != null) {
                    return expiredSelection;
                }
            }
            candidateSelection = skipLargeFiles(candidateSelection);
        }

        // Force a major compaction if this is a user-requested major compaction,
        // or if we do not have too many files to compact and this was requested
        // as a major compaction.
        // Or, if there are any references among the candidates.
        boolean majorCompaction = ((forceMajor && isUserCompaction)
                || ((forceMajor || isMajorCompaction(candidateSelection.getFilesToCompact()))
                        && (candidateSelection.getFilesToCompact().size() < comConf.getMaxFilesToCompact()))
                || StoreUtils.hasReferences(candidateSelection.getFilesToCompact()));

        if (!majorCompaction) {
            // we're doing a minor compaction, let's see what files are applicable
            candidateSelection = filterBulk(candidateSelection);
            candidateSelection = applyCompactionPolicy(candidateSelection);
            candidateSelection = checkMinFilesCriteria(candidateSelection);
        }
        candidateSelection = removeExcessFiles(candidateSelection, isUserCompaction, majorCompaction);
        return candidateSelection;
    }

    /**
     * Updates the compaction configuration. Used for tests.
     * TODO: replace when HBASE-3909 is completed in some form.
     */
    public void updateConfiguration(Configuration configuration, StoreConfiguration storeConfig) {
        this.comConf = new CompactionConfiguration(configuration, storeConfig);
        this.storeConfig = storeConfig;
    }

    /**
     * Select the expired store files to compact
     *
     * @param candidates the initial set of storeFiles
     * @param maxExpiredTimeStamp
     *          The store file will be marked as expired if its max time stamp is
     *          less than this maxExpiredTimeStamp.
     * @return A CompactSelection contains the expired store files as
     *         filesToCompact
     */
    private CompactSelection selectExpiredStoreFiles(CompactSelection candidates, long maxExpiredTimeStamp) {
        List<StoreFile> filesToCompact = candidates.getFilesToCompact();
        if (filesToCompact == null || filesToCompact.size() == 0)
            return null;
        ArrayList<StoreFile> expiredStoreFiles = null;
        boolean hasExpiredStoreFiles = false;
        CompactSelection expiredSFSelection = null;

        for (StoreFile storeFile : filesToCompact) {
            if (storeFile.getReader().getMaxTimestamp() < maxExpiredTimeStamp) {
                LOG.info("Deleting the expired store file by compaction: " + storeFile.getPath()
                        + " whose maxTimeStamp is " + storeFile.getReader().getMaxTimestamp()
                        + " while the max expired timestamp is " + maxExpiredTimeStamp);
                if (!hasExpiredStoreFiles) {
                    expiredStoreFiles = new ArrayList<StoreFile>();
                    hasExpiredStoreFiles = true;
                }
                expiredStoreFiles.add(storeFile);
            }
        }

        if (hasExpiredStoreFiles) {
            expiredSFSelection = new CompactSelection(expiredStoreFiles);
        }
        return expiredSFSelection;
    }

    /**
     * @param candidates pre-filtrate
     * @return filtered subset
     * exclude all files above maxCompactSize
     * Also save all references. We MUST compact them
     */
    private CompactSelection skipLargeFiles(CompactSelection candidates) {
        int pos = 0;
        while (pos < candidates.getFilesToCompact().size()
                && candidates.getFilesToCompact().get(pos).getReader().length() > comConf.getMaxCompactSize()
                && !candidates.getFilesToCompact().get(pos).isReference()) {
            ++pos;
        }
        if (pos > 0) {
            LOG.debug("Some files are too large. Excluding " + pos + " files from compaction candidates");
            candidates.clearSubList(0, pos);
        }
        return candidates;
    }

    /**
     * @param candidates pre-filtrate
     * @return filtered subset
     * exclude all bulk load files if configured
     */
    private CompactSelection filterBulk(CompactSelection candidates) {
        candidates.getFilesToCompact()
                .removeAll(Collections2.filter(candidates.getFilesToCompact(), new Predicate<StoreFile>() {
                    @Override
                    public boolean apply(StoreFile input) {
                        return input.excludeFromMinorCompaction();
                    }
                }));
        return candidates;
    }

    /**
     * @param candidates pre-filtrate
     * @return filtered subset
     * take upto maxFilesToCompact from the start
     */
    private CompactSelection removeExcessFiles(CompactSelection candidates, boolean isUserCompaction,
            boolean isMajorCompaction) {
        int excess = candidates.getFilesToCompact().size() - comConf.getMaxFilesToCompact();
        if (excess > 0) {
            if (isMajorCompaction && isUserCompaction) {
                LOG.debug("Warning, compacting more than " + comConf.getMaxFilesToCompact()
                        + " files because of a user-requested major compaction");
            } else {
                LOG.debug("Too many admissible files. Excluding " + excess + " files from compaction candidates");
                candidates.clearSubList(comConf.getMaxFilesToCompact(), candidates.getFilesToCompact().size());
            }
        }
        return candidates;
    }

    /**
     * @param candidates pre-filtrate
     * @return filtered subset
     * forget the compactionSelection if we don't have enough files
     */
    private CompactSelection checkMinFilesCriteria(CompactSelection candidates) {
        int minFiles = comConf.getMinFilesToCompact();
        if (candidates.getFilesToCompact().size() < minFiles) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Not compacting files because we only have " + candidates.getFilesToCompact().size()
                        + " files ready for compaction.  Need " + minFiles + " to initiate.");
            }
            candidates.emptyFileList();
        }
        return candidates;
    }

    /**
      * @param candidates pre-filtrate
      * @return filtered subset
      * -- Default minor compaction selection algorithm:
      * choose CompactSelection from candidates --
      * First exclude bulk-load files if indicated in configuration.
      * Start at the oldest file and stop when you find the first file that
      * meets compaction criteria:
      * (1) a recently-flushed, small file (i.e. <= minCompactSize)
      * OR
      * (2) within the compactRatio of sum(newer_files)
      * Given normal skew, any newer files will also meet this criteria
      * <p/>
      * Additional Note:
      * If fileSizes.size() >> maxFilesToCompact, we will recurse on
      * compact().  Consider the oldest files first to avoid a
      * situation where we always compact [end-threshold,end).  Then, the
      * last file becomes an aggregate of the previous compactions.
      *
      * normal skew:
      *
      *         older ----> newer (increasing seqID)
      *     _
      *    | |   _
      *    | |  | |   _
      *  --|-|- |-|- |-|---_-------_-------  minCompactSize
      *    | |  | |  | |  | |  _  | |
      *    | |  | |  | |  | | | | | |
      *    | |  | |  | |  | | | | | |
      */
    CompactSelection applyCompactionPolicy(CompactSelection candidates) throws IOException {
        if (candidates.getFilesToCompact().isEmpty()) {
            return candidates;
        }

        // we're doing a minor compaction, let's see what files are applicable
        int start = 0;
        double ratio = comConf.getCompactionRatio();
        if (isOffPeakHour() && candidates.trySetOffpeak()) {
            ratio = comConf.getCompactionRatioOffPeak();
            LOG.info("Running an off-peak compaction, selection ratio = " + ratio
                    + ", numOutstandingOffPeakCompactions is now "
                    + CompactSelection.getNumOutStandingOffPeakCompactions());
        }

        // get store file sizes for incremental compacting selection.
        int countOfFiles = candidates.getFilesToCompact().size();
        long[] fileSizes = new long[countOfFiles];
        long[] sumSize = new long[countOfFiles];
        for (int i = countOfFiles - 1; i >= 0; --i) {
            StoreFile file = candidates.getFilesToCompact().get(i);
            fileSizes[i] = file.getReader().length();
            // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo
            int tooFar = i + comConf.getMaxFilesToCompact() - 1;
            sumSize[i] = fileSizes[i] + ((i + 1 < countOfFiles) ? sumSize[i + 1] : 0)
                    - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0);
        }

        while (countOfFiles - start >= comConf.getMinFilesToCompact()
                && fileSizes[start] > Math.max(comConf.getMinCompactSize(), (long) (sumSize[start + 1] * ratio))) {
            ++start;
        }
        if (start < countOfFiles) {
            LOG.info("Default compaction algorithm has selected " + (countOfFiles - start) + " files from "
                    + countOfFiles + " candidates");
        }

        candidates = candidates.getSubList(start, countOfFiles);

        return candidates;
    }

    /*
     * @param filesToCompact Files to compact. Can be null.
     * @return True if we should run a major compaction.
     */
    public boolean isMajorCompaction(final List<StoreFile> filesToCompact) throws IOException {
        boolean result = false;
        long mcTime = getNextMajorCompactTime(filesToCompact);
        if (filesToCompact == null || filesToCompact.isEmpty() || mcTime == 0) {
            return result;
        }
        // TODO: Use better method for determining stamp of last major (HBASE-2990)
        long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
        long now = System.currentTimeMillis();
        if (lowTimestamp > 0l && lowTimestamp < (now - mcTime)) {
            // Major compaction time has elapsed.
            long cfTtl = this.storeConfig.getStoreFileTtl();
            if (filesToCompact.size() == 1) {
                // Single file
                StoreFile sf = filesToCompact.get(0);
                Long minTimestamp = sf.getMinimumTimestamp();
                long oldest = (minTimestamp == null) ? Long.MIN_VALUE : now - minTimestamp.longValue();
                if (sf.isMajorCompaction() && (cfTtl == HConstants.FOREVER || oldest < cfTtl)) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Skipping major compaction of " + this
                                + " because one (major) compacted file only and oldestTime " + oldest
                                + "ms is < ttl=" + cfTtl);
                    }
                } else if (cfTtl != HConstants.FOREVER && oldest > cfTtl) {
                    LOG.debug("Major compaction triggered on store " + this
                            + ", because keyvalues outdated; time since last major compaction "
                            + (now - lowTimestamp) + "ms");
                    result = true;
                }
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Major compaction triggered on store " + this + "; time since last major compaction "
                            + (now - lowTimestamp) + "ms");
                }
                result = true;
            }
        }
        return result;
    }

    public long getNextMajorCompactTime(final List<StoreFile> filesToCompact) {
        // default = 24hrs
        long ret = comConf.getMajorCompactionPeriod();
        if (ret > 0) {
            // default = 20% = +/- 4.8 hrs
            double jitterPct = comConf.getMajorCompactionJitter();
            if (jitterPct > 0) {
                long jitter = Math.round(ret * jitterPct);
                // deterministic jitter avoids a major compaction storm on restart
                Integer seed = StoreUtils.getDeterministicRandomSeed(filesToCompact);
                if (seed != null) {
                    double rnd = (new Random(seed)).nextDouble();
                    ret += jitter - Math.round(2L * jitter * rnd);
                } else {
                    ret = 0; // no storefiles == no major compaction
                }
            }
        }
        return ret;
    }

    /**
     * @param compactionSize Total size of some compaction
     * @return whether this should be a large or small compaction
     */
    public boolean throttleCompaction(long compactionSize) {
        return compactionSize > comConf.getThrottlePoint();
    }

    /**
     * @param numCandidates Number of candidate store files
     * @return whether a compactionSelection is possible
     */
    public boolean needsCompaction(int numCandidates) {
        return numCandidates > comConf.getMinFilesToCompact();
    }

    /**
     * @return whether this is off-peak hour
     */
    private boolean isOffPeakHour() {
        int currentHour = calendar.get(Calendar.HOUR_OF_DAY);
        int startHour = comConf.getOffPeakStartHour();
        int endHour = comConf.getOffPeakEndHour();
        // If offpeak time checking is disabled just return false.
        if (startHour == endHour) {
            return false;
        }
        if (startHour < endHour) {
            return (currentHour >= startHour && currentHour < endHour);
        }
        return (currentHour >= startHour || currentHour < endHour);
    }
}