Java tutorial
/** * Copyright 2007 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.regionserver; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.SequenceFile.Metadata; import org.apache.hadoop.io.SequenceFile.Reader; import org.apache.hadoop.io.compress.DefaultCodec; /** * HLog stores all the edits to the HStore. * * It performs logfile-rolling, so external callers are not aware that the * underlying file is being rolled. * * <p> * A single HLog is used by several HRegions simultaneously. * * <p> * Each HRegion is identified by a unique long <code>int</code>. HRegions do * not need to declare themselves before using the HLog; they simply include * their HRegion-id in the <code>append</code> or * <code>completeCacheFlush</code> calls. * * <p> * An HLog consists of multiple on-disk files, which have a chronological order. * As data is flushed to other (better) on-disk structures, the log becomes * obsolete. We can destroy all the log messages for a given HRegion-id up to * the most-recent CACHEFLUSH message from that HRegion. * * <p> * It's only practical to delete entire files. Thus, we delete an entire on-disk * file F when all of the messages in F have a log-sequence-id that's older * (smaller) than the most-recent CACHEFLUSH message for every HRegion that has * a message in F. * * <p> * Synchronized methods can never execute in parallel. However, between the * start of a cache flush and the completion point, appends are allowed but log * rolling is not. To prevent log rolling taking place during this period, a * separate reentrant lock is used. * */ public class HLog implements HConstants, Syncable { static final Log LOG = LogFactory.getLog(HLog.class); private static final String HLOG_DATFILE = "hlog.dat."; static final byte[] METAFAMILY = Bytes.toBytes("METAFAMILY"); static final byte[] METAROW = Bytes.toBytes("METAROW"); private final FileSystem fs; private final Path dir; private final Configuration conf; private final LogRollListener listener; private final long optionalFlushInterval; private final long blocksize; private final int flushlogentries; private final AtomicInteger unflushedEntries = new AtomicInteger(0); private volatile long lastLogFlushTime; private final boolean append; private final Method syncfs; private final static Object[] NO_ARGS = new Object[] {}; /* * Current log file. */ SequenceFile.Writer writer; /* * Map of all log files but the current one. */ final SortedMap<Long, Path> outputfiles = Collections.synchronizedSortedMap(new TreeMap<Long, Path>()); /* * Map of regions to first sequence/edit id in their memstore. */ private final ConcurrentSkipListMap<byte[], Long> lastSeqWritten = new ConcurrentSkipListMap<byte[], Long>( Bytes.BYTES_COMPARATOR); private volatile boolean closed = false; private final AtomicLong logSeqNum = new AtomicLong(0); private volatile long filenum = -1; private final AtomicInteger numEntries = new AtomicInteger(0); // Size of edits written so far. Used figuring when to rotate logs. private final AtomicLong editsSize = new AtomicLong(0); // If > than this size, roll the log. private final long logrollsize; // This lock prevents starting a log roll during a cache flush. // synchronized is insufficient because a cache flush spans two method calls. private final Lock cacheFlushLock = new ReentrantLock(); // We synchronize on updateLock to prevent updates and to prevent a log roll // during an update private final Object updateLock = new Object(); private final boolean enabled; /* * If more than this many logs, force flush of oldest region to oldest edit * goes to disk. If too many and we crash, then will take forever replaying. * Keep the number of logs tidy. */ private final int maxLogs; static byte[] COMPLETE_CACHE_FLUSH; static { try { COMPLETE_CACHE_FLUSH = "HBASE::CACHEFLUSH".getBytes(UTF8_ENCODING); } catch (UnsupportedEncodingException e) { assert (false); } } // For measuring latency of writes private static volatile long writeOps; private static volatile long writeTime; // For measuring latency of syncs private static volatile long syncOps; private static volatile long syncTime; public static long getWriteOps() { long ret = writeOps; writeOps = 0; return ret; } public static long getWriteTime() { long ret = writeTime; writeTime = 0; return ret; } public static long getSyncOps() { long ret = syncOps; syncOps = 0; return ret; } public static long getSyncTime() { long ret = syncTime; syncTime = 0; return ret; } /** * Create an edit log at the given <code>dir</code> location. * * You should never have to load an existing log. If there is a log at * startup, it should have already been processed and deleted by the time the * HLog object is started up. * * @param fs * @param dir * @param conf * @param listener * @throws IOException */ public HLog(final FileSystem fs, final Path dir, final HBaseConfiguration conf, final LogRollListener listener) throws IOException { super(); this.fs = fs; this.dir = dir; this.conf = conf; this.listener = listener; this.flushlogentries = conf.getInt("hbase.regionserver.flushlogentries", 100); this.blocksize = conf.getLong("hbase.regionserver.hlog.blocksize", this.fs.getDefaultBlockSize()); // Roll at 95% of block size. float multi = conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f); this.logrollsize = (long) (this.blocksize * multi); this.optionalFlushInterval = conf.getLong("hbase.regionserver.optionallogflushinterval", 10 * 1000); this.lastLogFlushTime = System.currentTimeMillis(); if (fs.exists(dir)) { throw new IOException("Target HLog directory already exists: " + dir); } fs.mkdirs(dir); this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32); this.enabled = conf.getBoolean("hbase.regionserver.hlog.enabled", true); LOG.info("HLog configuration: blocksize=" + this.blocksize + ", rollsize=" + this.logrollsize + ", enabled=" + this.enabled + ", flushlogentries=" + this.flushlogentries + ", optionallogflushinternal=" + this.optionalFlushInterval + "ms"); rollWriter(); // Test if syncfs is available. this.append = isAppend(conf); Method m = null; if (this.append) { try { m = this.writer.getClass().getMethod("syncFs", new Class<?>[] {}); LOG.debug("Using syncFs--hadoop-4379"); } catch (SecurityException e) { throw new IOException("Failed test for syncfs", e); } catch (NoSuchMethodException e) { // This can happen LOG.info("syncFs--hadoop-4379 not available"); } } this.syncfs = m; } /** * @return Current state of the monotonically increasing file id. */ public long getFilenum() { return this.filenum; } /** * Get the compression type for the hlog files * @param c Configuration to use. * @return the kind of compression to use */ static CompressionType getCompressionType(final Configuration c) { // Compression makes no sense for commit log. Always return NONE. return CompressionType.NONE; } /** * Called by HRegionServer when it opens a new region to ensure that log * sequence numbers are always greater than the latest sequence number of the * region being brought on-line. * * @param newvalue We'll set log edit/sequence number to this value if it * is greater than the current value. */ void setSequenceNumber(final long newvalue) { for (long id = this.logSeqNum.get(); id < newvalue && !this.logSeqNum.compareAndSet(id, newvalue); id = this.logSeqNum.get()) { // This could spin on occasion but better the occasional spin than locking // every increment of sequence number. LOG.debug("Change sequence number from " + logSeqNum + " to " + newvalue); } } /** * @return log sequence number */ public long getSequenceNumber() { return logSeqNum.get(); } /** * Roll the log writer. That is, start writing log messages to a new file. * * Because a log cannot be rolled during a cache flush, and a cache flush * spans two method calls, a special lock needs to be obtained so that a cache * flush cannot start when the log is being rolled and the log cannot be * rolled during a cache flush. * * <p>Note that this method cannot be synchronized because it is possible that * startCacheFlush runs, obtaining the cacheFlushLock, then this method could * start which would obtain the lock on this but block on obtaining the * cacheFlushLock and then completeCacheFlush could be called which would wait * for the lock on this and consequently never release the cacheFlushLock * * @return If lots of logs, flush the returned regions so next time through * we can clean logs. Returns null if nothing to flush. * @throws FailedLogCloseException * @throws IOException */ public byte[][] rollWriter() throws FailedLogCloseException, IOException { // Return if nothing to flush. if (this.writer != null && this.numEntries.get() <= 0) { return null; } byte[][] regionsToFlush = null; this.cacheFlushLock.lock(); try { if (closed) { return regionsToFlush; } synchronized (updateLock) { // Clean up current writer. Path oldFile = cleanupCurrentWriter(this.filenum); this.filenum = System.currentTimeMillis(); Path newPath = computeFilename(this.filenum); this.writer = createWriter(newPath); LOG.info( (oldFile != null ? "Roll " + FSUtils.getPath(oldFile) + ", entries=" + this.numEntries.get() + ", calcsize=" + this.editsSize.get() + ", filesize=" + this.fs.getFileStatus(oldFile).getLen() + ". " : "") + "New hlog " + FSUtils.getPath(newPath)); // Can we delete any of the old log files? if (this.outputfiles.size() > 0) { if (this.lastSeqWritten.size() <= 0) { LOG.debug("Last sequence written is empty. Deleting all old hlogs"); // If so, then no new writes have come in since all regions were // flushed (and removed from the lastSeqWritten map). Means can // remove all but currently open log file. for (Map.Entry<Long, Path> e : this.outputfiles.entrySet()) { deleteLogFile(e.getValue(), e.getKey()); } this.outputfiles.clear(); } else { regionsToFlush = cleanOldLogs(); } } this.numEntries.set(0); this.editsSize.set(0); updateLock.notifyAll(); } } finally { this.cacheFlushLock.unlock(); } return regionsToFlush; } protected SequenceFile.Writer createWriter(Path path) throws IOException { return createWriter(path, HLogKey.class, KeyValue.class); } protected SequenceFile.Writer createWriter(Path path, Class<? extends HLogKey> keyClass, Class<? extends KeyValue> valueClass) throws IOException { return SequenceFile.createWriter(this.fs, this.conf, path, keyClass, valueClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), this.blocksize, SequenceFile.CompressionType.NONE, new DefaultCodec(), null, new Metadata()); } /* * Clean up old commit logs. * @return If lots of logs, flush the returned region so next time through * we can clean logs. Returns null if nothing to flush. * @throws IOException */ private byte[][] cleanOldLogs() throws IOException { Long oldestOutstandingSeqNum = getOldestOutstandingSeqNum(); // Get the set of all log files whose final ID is older than or // equal to the oldest pending region operation TreeSet<Long> sequenceNumbers = new TreeSet<Long>( this.outputfiles.headMap((Long.valueOf(oldestOutstandingSeqNum.longValue() + 1L))).keySet()); // Now remove old log files (if any) int logsToRemove = sequenceNumbers.size(); if (logsToRemove > 0) { if (LOG.isDebugEnabled()) { // Find associated region; helps debugging. byte[] oldestRegion = getOldestRegion(oldestOutstandingSeqNum); LOG.debug("Found " + logsToRemove + " hlogs to remove " + " out of total " + this.outputfiles.size() + "; " + "oldest outstanding seqnum is " + oldestOutstandingSeqNum + " from region " + Bytes.toString(oldestRegion)); } for (Long seq : sequenceNumbers) { deleteLogFile(this.outputfiles.remove(seq), seq); } } // If too many log files, figure which regions we need to flush. byte[][] regions = null; int logCount = this.outputfiles.size() - logsToRemove; if (logCount > this.maxLogs && this.outputfiles != null && this.outputfiles.size() > 0) { regions = findMemstoresWithEditsOlderThan(this.outputfiles.firstKey(), this.lastSeqWritten); StringBuilder sb = new StringBuilder(); for (int i = 0; i < regions.length; i++) { if (i > 0) sb.append(", "); sb.append(Bytes.toStringBinary(regions[i])); } LOG.info("Too many hlogs: logs=" + logCount + ", maxlogs=" + this.maxLogs + "; forcing flush of " + regions.length + " regions(s): " + sb.toString()); } return regions; } /** * Return regions (memstores) that have edits that are less than the passed * <code>oldestWALseqid</code>. * @param oldestWALseqid * @param regionsToSeqids * @return All regions whose seqid is < than <code>oldestWALseqid</code> (Not * necessarily in order). Null if no regions found. */ static byte[][] findMemstoresWithEditsOlderThan(final long oldestWALseqid, final Map<byte[], Long> regionsToSeqids) { // This method is static so it can be unit tested the easier. List<byte[]> regions = null; for (Map.Entry<byte[], Long> e : regionsToSeqids.entrySet()) { if (e.getValue().longValue() < oldestWALseqid) { if (regions == null) regions = new ArrayList<byte[]>(); regions.add(e.getKey()); } } return regions == null ? null : regions.toArray(new byte[][] { HConstants.EMPTY_BYTE_ARRAY }); } /* * @return Logs older than this id are safe to remove. */ private Long getOldestOutstandingSeqNum() { return Collections.min(this.lastSeqWritten.values()); } private byte[] getOldestRegion(final Long oldestOutstandingSeqNum) { byte[] oldestRegion = null; for (Map.Entry<byte[], Long> e : this.lastSeqWritten.entrySet()) { if (e.getValue().longValue() == oldestOutstandingSeqNum.longValue()) { oldestRegion = e.getKey(); break; } } return oldestRegion; } /* * Cleans up current writer closing and adding to outputfiles. * Presumes we're operating inside an updateLock scope. * @return Path to current writer or null if none. * @throws IOException */ private Path cleanupCurrentWriter(final long currentfilenum) throws IOException { Path oldFile = null; if (this.writer != null) { // Close the current writer, get a new one. try { this.writer.close(); } catch (IOException e) { // Failed close of log file. Means we're losing edits. For now, // shut ourselves down to minimize loss. Alternative is to try and // keep going. See HBASE-930. FailedLogCloseException flce = new FailedLogCloseException("#" + currentfilenum); flce.initCause(e); throw e; } if (currentfilenum >= 0) { oldFile = computeFilename(currentfilenum); this.outputfiles.put(Long.valueOf(this.logSeqNum.get() - 1), oldFile); } } return oldFile; } private void deleteLogFile(final Path p, final Long seqno) throws IOException { LOG.info("removing old hlog file " + FSUtils.getPath(p) + " whose highest sequence/edit id is " + seqno); this.fs.delete(p, true); } /** * This is a convenience method that computes a new filename with a given * file-number. * @param fn * @return Path */ public Path computeFilename(final long fn) { if (fn < 0) return null; return new Path(dir, HLOG_DATFILE + fn); } /** * Shut down the log and delete the log directory * * @throws IOException */ public void closeAndDelete() throws IOException { close(); fs.delete(dir, true); } /** * Shut down the log. * * @throws IOException */ public void close() throws IOException { cacheFlushLock.lock(); try { synchronized (updateLock) { this.closed = true; if (LOG.isDebugEnabled()) { LOG.debug("closing hlog writer in " + this.dir.toString()); } this.writer.close(); updateLock.notifyAll(); } } finally { cacheFlushLock.unlock(); } } /** Append an entry to the log. * * @param regionInfo * @param logEdit * @param now Time of this edit write. * @throws IOException */ public void append(HRegionInfo regionInfo, KeyValue logEdit, final long now) throws IOException { byte[] regionName = regionInfo.getRegionName(); byte[] tableName = regionInfo.getTableDesc().getName(); this.append(regionInfo, makeKey(regionName, tableName, -1, now), logEdit); } /** * @param now * @param regionName * @param tableName * @return New log key. */ protected HLogKey makeKey(byte[] regionName, byte[] tableName, long seqnum, long now) { return new HLogKey(regionName, tableName, seqnum, now); } /** Append an entry to the log. * * @param regionInfo * @param logEdit * @param logKey * @throws IOException */ public void append(HRegionInfo regionInfo, HLogKey logKey, KeyValue logEdit) throws IOException { if (this.closed) { throw new IOException("Cannot append; log is closed"); } byte[] regionName = regionInfo.getRegionName(); synchronized (updateLock) { long seqNum = obtainSeqNum(); logKey.setLogSeqNum(seqNum); // The 'lastSeqWritten' map holds the sequence number of the oldest // write for each region (i.e. the first edit added to the particular // memstore). When the cache is flushed, the entry for the // region being flushed is removed if the sequence number of the flush // is greater than or equal to the value in lastSeqWritten. this.lastSeqWritten.putIfAbsent(regionName, Long.valueOf(seqNum)); boolean sync = regionInfo.isMetaRegion() || regionInfo.isRootRegion(); doWrite(logKey, logEdit, sync); this.numEntries.incrementAndGet(); updateLock.notifyAll(); } if (this.editsSize.get() > this.logrollsize) { if (listener != null) { listener.logRollRequested(); } } } /** * Append a set of edits to the log. Log edits are keyed by regionName, * rowname, and log-sequence-id. * * Later, if we sort by these keys, we obtain all the relevant edits for a * given key-range of the HRegion (TODO). Any edits that do not have a * matching COMPLETE_CACHEFLUSH message can be discarded. * * <p> * Logs cannot be restarted once closed, or once the HLog process dies. Each * time the HLog starts, it must create a new log. This means that other * systems should process the log appropriately upon each startup (and prior * to initializing HLog). * * synchronized prevents appends during the completion of a cache flush or for * the duration of a log roll. * * @param regionName * @param tableName * @param edits * @param sync * @param now * @throws IOException */ public void append(byte[] regionName, byte[] tableName, List<KeyValue> edits, boolean sync, final long now) throws IOException { if (this.closed) { throw new IOException("Cannot append; log is closed"); } long seqNum[] = obtainSeqNum(edits.size()); synchronized (this.updateLock) { // The 'lastSeqWritten' map holds the sequence number of the oldest // write for each region (i.e. the first edit added to the particular // memstore). . When the cache is flushed, the entry for the // region being flushed is removed if the sequence number of the flush // is greater than or equal to the value in lastSeqWritten. this.lastSeqWritten.putIfAbsent(regionName, Long.valueOf(seqNum[0])); int counter = 0; for (KeyValue kv : edits) { HLogKey logKey = makeKey(regionName, tableName, seqNum[counter++], now); doWrite(logKey, kv, sync); this.numEntries.incrementAndGet(); } updateLock.notifyAll(); } if (this.editsSize.get() > this.logrollsize) { requestLogRoll(); } } public void sync() throws IOException { lastLogFlushTime = System.currentTimeMillis(); if (this.append && syncfs != null) { try { this.syncfs.invoke(this.writer, NO_ARGS); } catch (Exception e) { throw new IOException("Reflection", e); } } else { this.writer.sync(); } this.unflushedEntries.set(0); syncTime += System.currentTimeMillis() - lastLogFlushTime; syncOps++; } void optionalSync() { if (!this.closed) { long now = System.currentTimeMillis(); synchronized (updateLock) { if (((now - this.optionalFlushInterval) > this.lastLogFlushTime) && this.unflushedEntries.get() > 0) { try { sync(); } catch (IOException e) { LOG.error("Error flushing hlog", e); } } } long took = System.currentTimeMillis() - now; if (took > 1000) { LOG.warn(Thread.currentThread().getName() + " took " + took + "ms optional sync'ing hlog; editcount=" + this.numEntries.get()); } } } private void requestLogRoll() { if (this.listener != null) { this.listener.logRollRequested(); } } private void doWrite(HLogKey logKey, KeyValue logEdit, boolean sync) throws IOException { if (!this.enabled) { return; } try { this.editsSize.addAndGet(logKey.heapSize() + logEdit.heapSize()); long now = System.currentTimeMillis(); this.writer.append(logKey, logEdit); long took = System.currentTimeMillis() - now; writeTime += took; writeOps++; if (took > 1000) { LOG.warn(Thread.currentThread().getName() + " took " + took + "ms appending an edit to hlog; editcount=" + this.numEntries.get()); } if (sync || this.unflushedEntries.incrementAndGet() >= flushlogentries) { sync(); } } catch (IOException e) { LOG.fatal("Could not append. Requesting close of hlog", e); // requestLogRoll(); //throw e; } } /** @return How many items have been added to the log */ int getNumEntries() { return numEntries.get(); } /** * Obtain a log sequence number. */ private long obtainSeqNum() { return this.logSeqNum.incrementAndGet(); } /** @return the number of log files in use */ int getNumLogFiles() { return outputfiles.size(); } /* * Obtain a specified number of sequence numbers * * @param num number of sequence numbers to obtain * @return array of sequence numbers */ private long[] obtainSeqNum(int num) { long[] results = new long[num]; for (int i = 0; i < num; i++) { results[i] = this.logSeqNum.incrementAndGet(); } return results; } /** * By acquiring a log sequence ID, we can allow log messages to continue while * we flush the cache. * * Acquire a lock so that we do not roll the log between the start and * completion of a cache-flush. Otherwise the log-seq-id for the flush will * not appear in the correct logfile. * * @return sequence ID to pass {@link #completeCacheFlush(byte [], byte [], long, boolean)} * @see #completeCacheFlush(byte [], byte [], long, boolean) * @see #abortCacheFlush() */ long startCacheFlush() { this.cacheFlushLock.lock(); return obtainSeqNum(); } /** * Complete the cache flush * * Protected by cacheFlushLock * * @param regionName * @param tableName * @param logSeqId * @throws IOException */ void completeCacheFlush(final byte[] regionName, final byte[] tableName, final long logSeqId) throws IOException { try { if (this.closed) { return; } synchronized (updateLock) { long now = System.currentTimeMillis(); this.writer.append(makeKey(regionName, tableName, logSeqId, System.currentTimeMillis()), completeCacheFlushLogEdit()); writeTime += System.currentTimeMillis() - now; writeOps++; this.numEntries.incrementAndGet(); Long seq = this.lastSeqWritten.get(regionName); if (seq != null && logSeqId >= seq.longValue()) { this.lastSeqWritten.remove(regionName); } updateLock.notifyAll(); } } finally { this.cacheFlushLock.unlock(); } } private KeyValue completeCacheFlushLogEdit() { return new KeyValue(METAROW, METAFAMILY, null, System.currentTimeMillis(), COMPLETE_CACHE_FLUSH); } /** * Abort a cache flush. * Call if the flush fails. Note that the only recovery for an aborted flush * currently is a restart of the regionserver so the snapshot content dropped * by the failure gets restored to the memstore. */ void abortCacheFlush() { this.cacheFlushLock.unlock(); } /** * @param family * @return true if the column is a meta column */ public static boolean isMetaFamily(byte[] family) { return Bytes.equals(METAFAMILY, family); } /** * Split up a bunch of regionserver commit log files that are no longer * being written to, into new files, one per region for region to replay on * startup. Delete the old log files when finished. * * @param rootDir qualified root directory of the HBase instance * @param srcDir Directory of log files to split: e.g. * <code>${ROOTDIR}/log_HOST_PORT</code> * @param fs FileSystem * @param conf HBaseConfiguration * @throws IOException */ public static List<Path> splitLog(final Path rootDir, final Path srcDir, final FileSystem fs, final HBaseConfiguration conf) throws IOException { long millis = System.currentTimeMillis(); List<Path> splits = null; if (!fs.exists(srcDir)) { // Nothing to do return splits; } FileStatus[] logfiles = fs.listStatus(srcDir); if (logfiles == null || logfiles.length == 0) { // Nothing to do return splits; } LOG.info("Splitting " + logfiles.length + " hlog(s) in " + srcDir.toString()); splits = splitLog(rootDir, logfiles, fs, conf); try { fs.delete(srcDir, true); } catch (IOException e) { e = RemoteExceptionHandler.checkIOException(e); IOException io = new IOException("Cannot delete: " + srcDir); io.initCause(e); throw io; } long endMillis = System.currentTimeMillis(); LOG.info("hlog file splitting completed in " + (endMillis - millis) + " millis for " + srcDir.toString()); return splits; } // Private immutable datastructure to hold Writer and its Path. private final static class WriterAndPath { final Path p; final SequenceFile.Writer w; WriterAndPath(final Path p, final SequenceFile.Writer w) { this.p = p; this.w = w; } } static Class<? extends HLogKey> getKeyClass(HBaseConfiguration conf) { return (Class<? extends HLogKey>) conf.getClass("hbase.regionserver.hlog.keyclass", HLogKey.class); } static HLogKey newKey(HBaseConfiguration conf) throws IOException { Class<? extends HLogKey> keyClass = getKeyClass(conf); try { return keyClass.newInstance(); } catch (InstantiationException e) { throw new IOException("cannot create hlog key"); } catch (IllegalAccessException e) { throw new IOException("cannot create hlog key"); } } /* * @param rootDir * @param logfiles * @param fs * @param conf * @throws IOException * @return List of splits made. */ private static List<Path> splitLog(final Path rootDir, final FileStatus[] logfiles, final FileSystem fs, final HBaseConfiguration conf) throws IOException { final Map<byte[], WriterAndPath> logWriters = Collections .synchronizedMap(new TreeMap<byte[], WriterAndPath>(Bytes.BYTES_COMPARATOR)); List<Path> splits = null; // Number of threads to use when log splitting to rewrite the logs. // More means faster but bigger mem consumption. int logWriterThreads = conf.getInt("hbase.regionserver.hlog.splitlog.writer.threads", 3); // Number of logs to read concurrently when log splitting. // More means faster but bigger mem consumption */ int concurrentLogReads = conf.getInt("hbase.regionserver.hlog.splitlog.reader.threads", 3); // Is append supported? boolean append = isAppend(conf); try { int maxSteps = Double.valueOf(Math.ceil((logfiles.length * 1.0) / concurrentLogReads)).intValue(); for (int step = 0; step < maxSteps; step++) { final Map<byte[], LinkedList<HLogEntry>> logEntries = new TreeMap<byte[], LinkedList<HLogEntry>>( Bytes.BYTES_COMPARATOR); // Stop at logfiles.length when it's the last step int endIndex = step == maxSteps - 1 ? logfiles.length : step * concurrentLogReads + concurrentLogReads; for (int i = (step * concurrentLogReads); i < endIndex; i++) { // Check for possibly empty file. With appends, currently Hadoop // reports a zero length even if the file has been sync'd. Revisit if // HADOOP-4751 is committed. long length = logfiles[i].getLen(); if (LOG.isDebugEnabled()) { LOG.debug("Splitting hlog " + (i + 1) + " of " + logfiles.length + ": " + logfiles[i].getPath() + ", length=" + logfiles[i].getLen()); } recoverLog(fs, logfiles[i].getPath(), append); SequenceFile.Reader in = null; int count = 0; try { in = new SequenceFile.Reader(fs, logfiles[i].getPath(), conf); try { HLogKey key = newKey(conf); KeyValue val = new KeyValue(); while (in.next(key, val)) { byte[] regionName = key.getRegionName(); LinkedList<HLogEntry> queue = logEntries.get(regionName); if (queue == null) { queue = new LinkedList<HLogEntry>(); LOG.debug("Adding queue for " + Bytes.toStringBinary(regionName)); logEntries.put(regionName, queue); } HLogEntry hle = new HLogEntry(val, key); queue.push(hle); count++; // Make the key and value new each time; otherwise same instance // is used over and over. key = newKey(conf); val = new KeyValue(); } LOG.debug("Pushed=" + count + " entries from " + logfiles[i].getPath()); } catch (IOException e) { LOG.debug("IOE Pushed=" + count + " entries from " + logfiles[i].getPath()); e = RemoteExceptionHandler.checkIOException(e); if (!(e instanceof EOFException)) { LOG.warn("Exception processing " + logfiles[i].getPath() + " -- continuing. Possible DATA LOSS!", e); } } } catch (IOException e) { if (length <= 0) { LOG.warn("Empty hlog, continuing: " + logfiles[i] + " count=" + count, e); continue; } throw e; } finally { try { if (in != null) { in.close(); } } catch (IOException e) { LOG.warn("Close in finally threw exception -- continuing", e); } // Delete the input file now so we do not replay edits. We could // have gotten here because of an exception. If so, probably // nothing we can do about it. Replaying it, it could work but we // could be stuck replaying for ever. Just continue though we // could have lost some edits. fs.delete(logfiles[i].getPath(), true); } } ExecutorService threadPool = Executors.newFixedThreadPool(logWriterThreads); for (final byte[] key : logEntries.keySet()) { Thread thread = new Thread(Bytes.toStringBinary(key)) { @Override public void run() { LinkedList<HLogEntry> entries = logEntries.get(key); LOG.debug("Thread got " + entries.size() + " to process"); long threadTime = System.currentTimeMillis(); try { int count = 0; // Items were added to the linkedlist oldest first. Pull them // out in that order. for (ListIterator<HLogEntry> i = entries.listIterator(entries.size()); i .hasPrevious();) { HLogEntry logEntry = i.previous(); WriterAndPath wap = logWriters.get(key); if (wap == null) { Path logfile = new Path( HRegion.getRegionDir( HTableDescriptor.getTableDir(rootDir, logEntry.getKey().getTablename()), HRegionInfo.encodeRegionName(key)), HREGION_OLDLOGFILE_NAME); Path oldlogfile = null; SequenceFile.Reader old = null; if (fs.exists(logfile)) { FileStatus stat = fs.getFileStatus(logfile); if (stat.getLen() <= 0) { LOG.warn("Old hlog file " + logfile + " is zero " + "length. Deleting existing file"); fs.delete(logfile, false); } else { LOG.warn("Old hlog file " + logfile + " already " + "exists. Copying existing file to new file"); oldlogfile = new Path(logfile.toString() + ".old"); fs.rename(logfile, oldlogfile); old = new SequenceFile.Reader(fs, oldlogfile, conf); } } SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, logfile, getKeyClass(conf), KeyValue.class, getCompressionType(conf)); wap = new WriterAndPath(logfile, w); logWriters.put(key, wap); if (LOG.isDebugEnabled()) { LOG.debug("Creating new hlog file writer for path " + logfile + " and region " + Bytes.toStringBinary(key)); } if (old != null) { // Copy from existing log file HLogKey oldkey = newKey(conf); KeyValue oldval = new KeyValue(); for (; old.next(oldkey, oldval); count++) { if (LOG.isDebugEnabled() && count > 0 && count % 10000 == 0) { LOG.debug("Copied " + count + " edits"); } w.append(oldkey, oldval); } old.close(); fs.delete(oldlogfile, true); } } wap.w.append(logEntry.getKey(), logEntry.getEdit()); count++; } if (LOG.isDebugEnabled()) { LOG.debug("Applied " + count + " total edits to " + Bytes.toStringBinary(key) + " in " + (System.currentTimeMillis() - threadTime) + "ms"); } } catch (IOException e) { e = RemoteExceptionHandler.checkIOException(e); LOG.warn("Got while writing region " + Bytes.toStringBinary(key) + " log " + e); e.printStackTrace(); } } }; threadPool.execute(thread); } threadPool.shutdown(); // Wait for all threads to terminate try { for (int i = 0; !threadPool.awaitTermination(5, TimeUnit.SECONDS); i++) { LOG.debug("Waiting for hlog writers to terminate, iteration #" + i); } } catch (InterruptedException ex) { LOG.warn("Hlog writers were interrupted, possible data loss!"); } } } finally { splits = new ArrayList<Path>(logWriters.size()); for (WriterAndPath wap : logWriters.values()) { wap.w.close(); LOG.debug("Closed " + wap.p); splits.add(wap.p); } } return splits; } /** * @param conf * @return True if append enabled and we have the syncFs in our path. */ private static boolean isAppend(final HBaseConfiguration conf) { boolean append = conf.getBoolean("dfs.support.append", false); if (append) { try { SequenceFile.Writer.class.getMethod("syncFs", new Class<?>[] {}); append = true; } catch (SecurityException e) { } catch (NoSuchMethodException e) { append = false; } } return append; } /** * Utility class that lets us keep track of the edit with it's key * Only used when splitting logs */ public static class HLogEntry { private KeyValue edit; private HLogKey key; /** * Constructor for both params * @param edit log's edit * @param key log's key */ public HLogEntry(KeyValue edit, HLogKey key) { super(); this.edit = edit; this.key = key; } /** * Gets the edit * @return edit */ public KeyValue getEdit() { return edit; } /** * Gets the key * @return key */ public HLogKey getKey() { return key; } public String toString() { return this.key + "=" + this.edit; } } /** * Construct the HLog directory name * * @param info HServerInfo for server * @return the HLog directory name */ public static String getHLogDirectoryName(HServerInfo info) { return getHLogDirectoryName(info.getServerName()); } /* * Recover log. * If append has been set, try and open log in append mode. * Doing this, we get a hold of the file that crashed writer * was writing to. Once we have it, close it. This will * allow subsequent reader to see up to last sync. * @param fs * @param p * @param append */ private static void recoverLog(final FileSystem fs, final Path p, final boolean append) { if (!append) { return; } // Trying recovery boolean recovered = false; while (!recovered) { try { FSDataOutputStream out = fs.append(p); out.close(); recovered = true; } catch (IOException e) { LOG.info("Failed open for append, waiting on lease recovery: " + p, e); try { Thread.sleep(1000); } catch (InterruptedException ex) { // ignore it and try again } } } LOG.info("Past out lease recovery"); } /** * Construct the HLog directory name * * @param serverAddress * @param startCode * @return the HLog directory name */ public static String getHLogDirectoryName(String serverAddress, long startCode) { if (serverAddress == null || serverAddress.length() == 0) { return null; } return getHLogDirectoryName(HServerInfo.getServerName(serverAddress, startCode)); } /** * Construct the HLog directory name * * @param serverName * @return the HLog directory name */ public static String getHLogDirectoryName(String serverName) { StringBuilder dirName = new StringBuilder(HConstants.HREGION_LOGDIR_NAME); dirName.append("/"); dirName.append(serverName); return dirName.toString(); } private static void usage() { System.err.println("Usage: java org.apache.hbase.HLog" + " {--dump <logfile>... | --split <logdir>...}"); } /** * Pass one or more log file names and it will either dump out a text version * on <code>stdout</code> or split the specified log files. * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 2) { usage(); System.exit(-1); } boolean dump = true; if (args[0].compareTo("--dump") != 0) { if (args[0].compareTo("--split") == 0) { dump = false; } else { usage(); System.exit(-1); } } HBaseConfiguration conf = new HBaseConfiguration(); FileSystem fs = FileSystem.get(conf); Path baseDir = new Path(conf.get(HBASE_DIR)); for (int i = 1; i < args.length; i++) { Path logPath = new Path(args[i]); if (!fs.exists(logPath)) { throw new FileNotFoundException(args[i] + " does not exist"); } if (dump) { if (!fs.isFile(logPath)) { throw new IOException(args[i] + " is not a file"); } Reader log = new SequenceFile.Reader(fs, logPath, conf); try { HLogKey key = new HLogKey(); KeyValue val = new KeyValue(); while (log.next(key, val)) { System.out.println(key.toString() + " " + val.toString()); } } finally { log.close(); } } else { if (!fs.getFileStatus(logPath).isDir()) { throw new IOException(args[i] + " is not a directory"); } splitLog(baseDir, logPath, fs, conf); } } } public static final long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT + (5 * ClassSize.REFERENCE) + ClassSize.ATOMIC_INTEGER + Bytes.SIZEOF_INT + (3 * Bytes.SIZEOF_LONG)); }