Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.catalog; import com.google.common.base.Stopwatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService; import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.ipc.RemoteException; import java.io.EOFException; import java.io.IOException; import java.net.ConnectException; import java.net.NoRouteToHostException; import java.net.SocketException; import java.net.SocketTimeoutException; import java.net.UnknownHostException; /** * Tracks the availability of the catalog tables * <code>hbase:meta</code>. * * This class is "read-only" in that the locations of the catalog tables cannot * be explicitly set. Instead, ZooKeeper is used to learn of the availability * and location of <code>hbase:meta</code>. * * <p>Call {@link #start()} to start up operation. Call {@link #stop()}} to * interrupt waits and close up shop. */ @InterfaceAudience.Private public class CatalogTracker { // TODO JDC 11/30 We don't even have ROOT anymore, revisit // TODO: This class needs a rethink. The original intent was that it would be // the one-stop-shop for meta locations and that it would get this // info from reading and watching zk state. The class was to be used by // servers when they needed to know of meta movement but also by // client-side (inside in HTable) so rather than figure meta // locations on fault, the client would instead get notifications out of zk. // // But this original intent is frustrated by the fact that this class has to // read an hbase table, the -ROOT- table, to figure out the hbase:meta region // location which means we depend on an HConnection. HConnection will do // retrying but also, it has its own mechanism for finding root and meta // locations (and for 'verifying'; it tries the location and if it fails, does // new lookup, etc.). So, at least for now, HConnection (or HTable) can't // have a CT since CT needs a HConnection (Even then, do want HT to have a CT? // For HT keep up a session with ZK? Rather, shouldn't we do like asynchbase // where we'd open a connection to zk, read what we need then let the // connection go?). The 'fix' is make it so both root and meta addresses // are wholey up in zk -- not in zk (root) -- and in an hbase table (meta). // // But even then, this class does 'verification' of the location and it does // this by making a call over an HConnection (which will do its own root // and meta lookups). Isn't this verification 'useless' since when we // return, whatever is dependent on the result of this call then needs to // use HConnection; what we have verified may change in meantime (HConnection // uses the CT primitives, the root and meta trackers finding root locations). // // When meta is moved to zk, this class may make more sense. In the // meantime, it does not cohere. It should just watch meta and root and not // NOT do verification -- let that be out in HConnection since its going to // be done there ultimately anyways. // // This class has spread throughout the codebase. It needs to be reigned in. // This class should be used server-side only, even if we move meta location // up into zk. Currently its used over in the client package. Its used in // MetaReader and MetaEditor classes usually just to get the Configuration // its using (It does this indirectly by asking its HConnection for its // Configuration and even then this is just used to get an HConnection out on // the other end). I made https://issues.apache.org/jira/browse/HBASE-4495 for // doing CT fixup. St.Ack 09/30/2011. // // TODO: Timeouts have never been as advertised in here and its worse now // with retries; i.e. the HConnection retries and pause goes ahead whatever // the passed timeout is. Fix. private static final Log LOG = LogFactory.getLog(CatalogTracker.class); private final HConnection connection; private final ZooKeeperWatcher zookeeper; private final MetaRegionTracker metaRegionTracker; private boolean instantiatedzkw = false; private Abortable abortable; private boolean stopped = false; static final byte[] META_REGION_NAME = HRegionInfo.FIRST_META_REGIONINFO.getRegionName(); /** * Constructs a catalog tracker. Find current state of catalog tables. * Begin active tracking by executing {@link #start()} post construction. Does * not timeout. * * @param conf * the {@link Configuration} from which a {@link HConnection} will be * obtained; if problem, this connections * {@link HConnection#abort(String, Throwable)} will be called. * @throws IOException */ public CatalogTracker(final Configuration conf) throws IOException { this(null, conf, HConnectionManager.getConnection(conf), null); } /** * Constructs the catalog tracker. Find current state of catalog tables. * Begin active tracking by executing {@link #start()} post construction. * Does not timeout. * @param zk If zk is null, we'll create an instance (and shut it down * when {@link #stop()} is called) else we'll use what is passed. * @param conf * @param abortable If fatal exception we'll call abort on this. May be null. * If it is we'll use the Connection associated with the passed * {@link Configuration} as our Abortable. * @throws IOException */ public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf, HConnection connection, Abortable abortable) throws IOException { this.connection = connection; if (abortable == null) { // A connection is abortable. this.abortable = this.connection; } else { this.abortable = abortable; } Abortable throwableAborter = new Abortable() { @Override public void abort(String why, Throwable e) { throw new RuntimeException(why, e); } @Override public boolean isAborted() { return true; } }; if (zk == null) { // Create our own. Set flag so we tear it down on stop. this.zookeeper = new ZooKeeperWatcher(conf, "catalogtracker-on-" + connection.toString(), abortable); instantiatedzkw = true; } else { this.zookeeper = zk; } this.metaRegionTracker = new MetaRegionTracker(zookeeper, throwableAborter); } /** * Starts the catalog tracker. * Determines current availability of catalog tables and ensures all further * transitions of either region are tracked. * @throws IOException * @throws InterruptedException */ public void start() throws IOException, InterruptedException { LOG.debug("Starting catalog tracker " + this); try { this.metaRegionTracker.start(); } catch (RuntimeException e) { Throwable t = e.getCause(); this.abortable.abort(e.getMessage(), t); throw new IOException("Attempt to start meta tracker failed.", t); } } /** * Stop working. * Interrupts any ongoing waits. */ public void stop() { if (!this.stopped) { LOG.debug("Stopping catalog tracker " + this); this.stopped = true; this.metaRegionTracker.stop(); try { if (this.connection != null) { this.connection.close(); } } catch (IOException e) { // Although the {@link Closeable} interface throws an {@link // IOException}, in reality, the implementation would never do that. LOG.error("Attempt to close catalog tracker's connection failed.", e); } if (this.instantiatedzkw) { this.zookeeper.close(); } } } /** * Gets the current location for <code>hbase:meta</code> or null if location is * not currently available. * @return {@link ServerName} for server hosting <code>hbase:meta</code> or null * if none available * @throws InterruptedException */ public ServerName getMetaLocation() throws InterruptedException { return this.metaRegionTracker.getMetaRegionLocation(); } /** * Checks whether meta regionserver znode has some non null data. * @return true if data is not null, false otherwise. */ public boolean isMetaLocationAvailable() { return this.metaRegionTracker.isLocationAvailable(); } /** * Gets the current location for <code>hbase:meta</code> if available and waits * for up to the specified timeout if not immediately available. Returns null * if the timeout elapses before root is available. * @param timeout maximum time to wait for root availability, in milliseconds * @return {@link ServerName} for server hosting <code>hbase:meta</code> or null * if none available * @throws InterruptedException if interrupted while waiting * @throws NotAllMetaRegionsOnlineException if meta not available before * timeout */ public ServerName waitForMeta(final long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException { ServerName sn = metaRegionTracker.waitMetaRegionLocation(timeout); if (sn == null) { throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms"); } return sn; } /** * Gets a connection to the server hosting meta, as reported by ZooKeeper, * waiting up to the specified timeout for availability. * @param timeout How long to wait on meta location * @see #waitForMeta for additional information * @return connection to server hosting meta * @throws InterruptedException * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException * @deprecated Use #getMetaServerConnection(long) */ public AdminService.BlockingInterface waitForMetaServerConnection(long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { return getMetaServerConnection(timeout); } /** * Gets a connection to the server hosting meta, as reported by ZooKeeper, * waiting up to the specified timeout for availability. * <p>WARNING: Does not retry. Use an {@link HTable} instead. * @param timeout How long to wait on meta location * @see #waitForMeta for additional information * @return connection to server hosting meta * @throws InterruptedException * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException */ AdminService.BlockingInterface getMetaServerConnection(long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { return getCachedConnection(waitForMeta(timeout)); } /** * Waits indefinitely for availability of <code>hbase:meta</code>. Used during * cluster startup. Does not verify meta, just that something has been * set up in zk. * @see #waitForMeta(long) * @throws InterruptedException if interrupted while waiting */ public void waitForMeta() throws InterruptedException { Stopwatch stopwatch = new Stopwatch().start(); while (!this.stopped) { try { if (waitForMeta(100) != null) break; long sleepTime = stopwatch.elapsedMillis(); // +1 in case sleepTime=0 if ((sleepTime + 1) % 10000 == 0) { LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms"); } } catch (NotAllMetaRegionsOnlineException e) { if (LOG.isTraceEnabled()) { LOG.trace("hbase:meta still not available, sleeping and retrying." + " Reason: " + e.getMessage()); } } } } /** * @param sn ServerName to get a connection against. * @return The AdminProtocol we got when we connected to <code>sn</code> * May have come from cache, may not be good, may have been setup by this * invocation, or may be null. * @throws IOException */ @SuppressWarnings("deprecation") private AdminService.BlockingInterface getCachedConnection(ServerName sn) throws IOException { if (sn == null) { return null; } AdminService.BlockingInterface service = null; try { service = connection.getAdmin(sn); } catch (RetriesExhaustedException e) { if (e.getCause() != null && e.getCause() instanceof ConnectException) { // Catch this; presume it means the cached connection has gone bad. } else { throw e; } } catch (SocketTimeoutException e) { LOG.debug("Timed out connecting to " + sn); } catch (NoRouteToHostException e) { LOG.debug("Connecting to " + sn, e); } catch (SocketException e) { LOG.debug("Exception connecting to " + sn); } catch (UnknownHostException e) { LOG.debug("Unknown host exception connecting to " + sn); } catch (FailedServerException e) { if (LOG.isDebugEnabled()) { LOG.debug("Server " + sn + " is in failed server list."); } } catch (IOException ioe) { Throwable cause = ioe.getCause(); if (ioe instanceof ConnectException) { // Catch. Connect refused. } else if (cause != null && cause instanceof EOFException) { // Catch. Other end disconnected us. } else if (cause != null && cause.getMessage() != null && cause.getMessage().toLowerCase().contains("connection reset")) { // Catch. Connection reset. } else { throw ioe; } } return service; } /** * Verify we can connect to <code>hostingServer</code> and that its carrying * <code>regionName</code>. * @param hostingServer Interface to the server hosting <code>regionName</code> * @param address The servername that goes with the <code>metaServer</code> * Interface. Used logging. * @param regionName The regionname we are interested in. * @return True if we were able to verify the region located at other side of * the Interface. * @throws IOException */ // TODO: We should be able to get the ServerName from the AdminProtocol // rather than have to pass it in. Its made awkward by the fact that the // HRI is likely a proxy against remote server so the getServerName needs // to be fixed to go to a local method or to a cache before we can do this. private boolean verifyRegionLocation(AdminService.BlockingInterface hostingServer, final ServerName address, final byte[] regionName) throws IOException { if (hostingServer == null) { LOG.info("Passed hostingServer is null"); return false; } Throwable t = null; try { // Try and get regioninfo from the hosting server. return ProtobufUtil.getRegionInfo(hostingServer, regionName) != null; } catch (ConnectException e) { t = e; } catch (RetriesExhaustedException e) { t = e; } catch (RemoteException e) { IOException ioe = e.unwrapRemoteException(); t = ioe; } catch (IOException e) { Throwable cause = e.getCause(); if (cause != null && cause instanceof EOFException) { t = cause; } else if (cause != null && cause.getMessage() != null && cause.getMessage().contains("Connection reset")) { t = cause; } else { t = e; } } LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) + " at address=" + address + ", exception=" + t); return false; } /** * Verify <code>hbase:meta</code> is deployed and accessible. * @param timeout How long to wait on zk for meta address (passed through to * the internal call to {@link #waitForMetaServerConnection(long)}. * @return True if the <code>hbase:meta</code> location is healthy. * @throws IOException * @throws InterruptedException */ public boolean verifyMetaRegionLocation(final long timeout) throws InterruptedException, IOException { AdminService.BlockingInterface service = null; try { service = waitForMetaServerConnection(timeout); } catch (NotAllMetaRegionsOnlineException e) { // Pass } catch (ServerNotRunningYetException e) { // Pass -- remote server is not up so can't be carrying root } catch (UnknownHostException e) { // Pass -- server name doesn't resolve so it can't be assigned anything. } catch (RegionServerStoppedException e) { // Pass -- server name sends us to a server that is dying or already dead. } return (service == null) ? false : verifyRegionLocation(service, this.metaRegionTracker.getMetaRegionLocation(), META_REGION_NAME); } public HConnection getConnection() { return this.connection; } @Override public String toString() { return "CatalogTracker{" + "connection=" + connection + ", zookeeper=" + zookeeper + ", metaRegionTracker=" + metaRegionTracker + ", stopped=" + stopped + '}'; } }