org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler.java

Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.master.handler;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.HashSet;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.KeeperException;

import com.google.common.annotations.VisibleForTesting;

import java.util.concurrent.atomic.AtomicInteger;

/**
 * Shutdown handler for the server hosting <code>hbase:meta</code>
 */
@InterfaceAudience.Private
public class MetaServerShutdownHandler extends ServerShutdownHandler {
    private static final Log LOG = LogFactory.getLog(MetaServerShutdownHandler.class);
    private AtomicInteger eventExceptionCount = new AtomicInteger(0);
    @VisibleForTesting
    static final int SHOW_STRACKTRACE_FREQUENCY = 100;

    public MetaServerShutdownHandler(final Server server, final MasterServices services,
            final DeadServer deadServers, final ServerName serverName) {
        super(server, services, deadServers, serverName, EventType.M_META_SERVER_SHUTDOWN, true);
    }

    @Override
    public void process() throws IOException {
        boolean gotException = true;
        try {
            AssignmentManager am = this.services.getAssignmentManager();
            try {
                if (this.shouldSplitHlog) {
                    LOG.info("Splitting hbase:meta logs for " + serverName);
                    if (this.distributedLogReplay) {
                        Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
                        regions.add(HRegionInfo.FIRST_META_REGIONINFO);
                        this.services.getMasterFileSystem().prepareLogReplay(serverName, regions);
                    } else {
                        this.services.getMasterFileSystem().splitMetaLog(serverName);
                    }
                    am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO);
                }
            } catch (IOException ioe) {
                this.services.getExecutorService().submit(this);
                this.deadServers.add(serverName);
                throw new IOException("failed log splitting for " + serverName + ", will retry", ioe);
            }

            // Assign meta if we were carrying it.
            // Check again: region may be assigned to other where because of RIT
            // timeout
            if (am.isCarryingMeta(serverName)) {
                LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
                am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
                verifyAndAssignMetaWithRetries();
            } else if (!this.services.getCatalogTracker().isMetaLocationAvailable()) {
                // the meta location as per master is null. This could happen in case when meta assignment
                // in previous run failed, while meta znode has been updated to null. We should try to
                // assign the meta again.
                verifyAndAssignMetaWithRetries();
            } else {
                LOG.info("META has been assigned to otherwhere, skip assigning.");
            }

            try {
                if (this.shouldSplitHlog && this.distributedLogReplay) {
                    if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
                            regionAssignmentWaitTimeout)) {
                        // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
                        // when replay happens before region assignment completes.
                        LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
                                + " didn't complete assignment in time");
                    }
                    this.services.getMasterFileSystem().splitMetaLog(serverName);
                }
            } catch (Exception ex) {
                if (ex instanceof IOException) {
                    this.services.getExecutorService().submit(this);
                    this.deadServers.add(serverName);
                    throw new IOException("failed log splitting for " + serverName + ", will retry", ex);
                } else {
                    throw new IOException(ex);
                }
            }

            gotException = false;
        } finally {
            if (gotException) {
                // If we had an exception, this.deadServers.finish will be skipped in super.process()
                this.deadServers.finish(serverName);
            }
        }

        super.process();
        // Clear this counter on successful handling.
        this.eventExceptionCount.set(0);
    }

    @Override
    boolean isCarryingMeta() {
        return true;
    }

    /**
     * Before assign the hbase:meta region, ensure it haven't
     *  been assigned by other place
     * <p>
     * Under some scenarios, the hbase:meta region can be opened twice, so it seemed online
     * in two regionserver at the same time.
     * If the hbase:meta region has been assigned, so the operation can be canceled.
     * @throws InterruptedException
     * @throws IOException
     * @throws KeeperException
     */
    private void verifyAndAssignMeta() throws InterruptedException, IOException, KeeperException {
        long timeout = this.server.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
        if (!this.server.getCatalogTracker().verifyMetaRegionLocation(timeout)) {
            this.services.getAssignmentManager().assignMeta();
        } else if (serverName.equals(server.getCatalogTracker().getMetaLocation())) {
            throw new IOException("hbase:meta is onlined on the dead server " + serverName);
        } else {
            LOG.info("Skip assigning hbase:meta, because it is online on the "
                    + server.getCatalogTracker().getMetaLocation());
        }
    }

    /**
     * Failed many times, shutdown processing
     * @throws IOException
     */
    private void verifyAndAssignMetaWithRetries() throws IOException {
        int iTimes = this.server.getConfiguration().getInt("hbase.catalog.verification.retries", 10);

        long waitTime = this.server.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);

        int iFlag = 0;
        while (true) {
            try {
                verifyAndAssignMeta();
                break;
            } catch (KeeperException e) {
                this.server.abort("In server shutdown processing, assigning meta", e);
                throw new IOException("Aborting", e);
            } catch (Exception e) {
                if (iFlag >= iTimes) {
                    this.server.abort("verifyAndAssignMeta failed after" + iTimes + " times retries, aborting", e);
                    throw new IOException("Aborting", e);
                }
                try {
                    Thread.sleep(waitTime);
                } catch (InterruptedException e1) {
                    LOG.warn("Interrupted when is the thread sleep", e1);
                    Thread.currentThread().interrupt();
                    throw (InterruptedIOException) new InterruptedIOException().initCause(e1);
                }
                iFlag++;
            }
        }
    }

    @Override
    protected void handleException(Throwable t) {
        int count = eventExceptionCount.getAndIncrement();
        if (count < 0)
            count = eventExceptionCount.getAndSet(0);
        if (count > SHOW_STRACKTRACE_FREQUENCY) { // Too frequent, let's slow reporting
            Threads.sleep(1000);
        }
        if (count % SHOW_STRACKTRACE_FREQUENCY == 0) {
            LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount, t);
        } else {
            LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount + "; " + t.getMessage()
                    + "; stack trace shows every " + SHOW_STRACKTRACE_FREQUENCY + "th time.");
        }
    }
}