org.apache.accumulo.server.tabletserver.Tablet.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.accumulo.server.tabletserver.Tablet.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.server.tabletserver;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.impl.ScannerImpl;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.conf.ConfigurationObserver;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.constraints.Violations;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Column;
import org.apache.accumulo.core.data.ColumnUpdate;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.KeyExtent;
import org.apache.accumulo.core.data.KeyValue;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.data.thrift.IterInfo;
import org.apache.accumulo.core.data.thrift.MapFileInfo;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVIterator;
import org.apache.accumulo.core.iterators.IterationInterruptedException;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.IteratorUtil;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
import org.apache.accumulo.core.iterators.system.ColumnQualifierFilter;
import org.apache.accumulo.core.iterators.system.DeletingIterator;
import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
import org.apache.accumulo.core.iterators.system.MultiIterator;
import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
import org.apache.accumulo.core.iterators.system.StatsIterator;
import org.apache.accumulo.core.iterators.system.VisibilityFilter;
import org.apache.accumulo.core.master.thrift.TabletLoadState;
import org.apache.accumulo.core.metadata.MetadataTable;
import org.apache.accumulo.core.metadata.RootTable;
import org.apache.accumulo.core.metadata.schema.DataFileValue;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.accumulo.core.security.Credentials;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.accumulo.core.util.LocalityGroupUtil;
import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
import org.apache.accumulo.core.util.MapCounter;
import org.apache.accumulo.core.util.Pair;
import org.apache.accumulo.core.util.UtilWaitThread;
import org.apache.accumulo.fate.zookeeper.IZooReaderWriter;
import org.apache.accumulo.server.ServerConstants;
import org.apache.accumulo.server.client.HdfsZooInstance;
import org.apache.accumulo.server.conf.TableConfiguration;
import org.apache.accumulo.server.constraints.ConstraintChecker;
import org.apache.accumulo.server.fs.FileRef;
import org.apache.accumulo.server.fs.VolumeManager;
import org.apache.accumulo.server.fs.VolumeManager.FileType;
import org.apache.accumulo.server.fs.VolumeManagerImpl;
import org.apache.accumulo.server.master.state.TServerInstance;
import org.apache.accumulo.server.master.tableOps.CompactRange.CompactionIterators;
import org.apache.accumulo.server.problems.ProblemReport;
import org.apache.accumulo.server.problems.ProblemReports;
import org.apache.accumulo.server.problems.ProblemType;
import org.apache.accumulo.server.security.SystemCredentials;
import org.apache.accumulo.server.tabletserver.Compactor.CompactionCanceledException;
import org.apache.accumulo.server.tabletserver.Compactor.CompactionEnv;
import org.apache.accumulo.server.tabletserver.FileManager.ScanFileManager;
import org.apache.accumulo.server.tabletserver.InMemoryMap.MemoryIterator;
import org.apache.accumulo.server.tabletserver.TabletServer.TservConstraintEnv;
import org.apache.accumulo.server.tabletserver.TabletServerResourceManager.TabletResourceManager;
import org.apache.accumulo.server.tabletserver.TabletStatsKeeper.Operation;
import org.apache.accumulo.server.tabletserver.log.DfsLogger;
import org.apache.accumulo.server.tabletserver.log.MutationReceiver;
import org.apache.accumulo.server.tabletserver.mastermessage.TabletStatusMessage;
import org.apache.accumulo.server.tabletserver.metrics.TabletServerMinCMetrics;
import org.apache.accumulo.server.util.FileUtil;
import org.apache.accumulo.server.util.MetadataTableUtil;
import org.apache.accumulo.server.util.MetadataTableUtil.LogEntry;
import org.apache.accumulo.server.util.TabletOperations;
import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader;
import org.apache.accumulo.trace.instrument.Span;
import org.apache.accumulo.trace.instrument.Trace;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;

/*
 * We need to be able to have the master tell a tabletServer to
 * close this file, and the tablet server to handle all pending client reads
 * before closing
 * 
 */

/**
 * 
 * this class just provides an interface to read from a MapFile mostly takes care of reporting start and end keys
 * 
 * need this because a single row extent can have multiple columns this manages all the columns (each handled by a store) for a single row-extent
 * 
 * 
 */

public class Tablet {

    enum MajorCompactionReason {
        // do not change the order, the order of this enum determines the order
        // in which queued major compactions are executed
        USER, CHOP, NORMAL, IDLE
    }

    enum MinorCompactionReason {
        USER, SYSTEM, CLOSE
    }

    public class CommitSession {

        private int seq;
        private InMemoryMap memTable;
        private int commitsInProgress;
        private long maxCommittedTime = Long.MIN_VALUE;

        private CommitSession(int seq, InMemoryMap imm) {
            this.seq = seq;
            this.memTable = imm;
            commitsInProgress = 0;
        }

        public int getWALogSeq() {
            return seq;
        }

        private void decrementCommitsInProgress() {
            if (commitsInProgress < 1)
                throw new IllegalStateException("commitsInProgress = " + commitsInProgress);

            commitsInProgress--;
            if (commitsInProgress == 0)
                Tablet.this.notifyAll();
        }

        private void incrementCommitsInProgress() {
            if (commitsInProgress < 0)
                throw new IllegalStateException("commitsInProgress = " + commitsInProgress);

            commitsInProgress++;
        }

        private void waitForCommitsToFinish() {
            while (commitsInProgress > 0) {
                try {
                    Tablet.this.wait(50);
                } catch (InterruptedException e) {
                    log.warn(e, e);
                }
            }
        }

        public void abortCommit(List<Mutation> value) {
            Tablet.this.abortCommit(this, value);
        }

        public void commit(List<Mutation> mutations) {
            Tablet.this.commit(this, mutations);
        }

        public Tablet getTablet() {
            return Tablet.this;
        }

        public boolean beginUpdatingLogsUsed(ArrayList<DfsLogger> copy, boolean mincFinish) {
            return Tablet.this.beginUpdatingLogsUsed(memTable, copy, mincFinish);
        }

        public void finishUpdatingLogsUsed() {
            Tablet.this.finishUpdatingLogsUsed();
        }

        public int getLogId() {
            return logId;
        }

        public KeyExtent getExtent() {
            return extent;
        }

        private void updateMaxCommittedTime(long time) {
            maxCommittedTime = Math.max(time, maxCommittedTime);
        }

        private long getMaxCommittedTime() {
            if (maxCommittedTime == Long.MIN_VALUE)
                throw new IllegalStateException("Tried to read max committed time when it was never set");
            return maxCommittedTime;
        }

    }

    private class TabletMemory {
        private InMemoryMap memTable;
        private InMemoryMap otherMemTable;
        private InMemoryMap deletingMemTable;
        private int nextSeq = 1;
        private CommitSession commitSession;

        TabletMemory() {
            try {
                memTable = new InMemoryMap(acuTableConf);
            } catch (LocalityGroupConfigurationError e) {
                throw new RuntimeException(e);
            }
            commitSession = new CommitSession(nextSeq, memTable);
            nextSeq += 2;
        }

        InMemoryMap getMemTable() {
            return memTable;
        }

        InMemoryMap getMinCMemTable() {
            return otherMemTable;
        }

        CommitSession prepareForMinC() {
            if (otherMemTable != null) {
                throw new IllegalStateException();
            }

            if (deletingMemTable != null) {
                throw new IllegalStateException();
            }

            otherMemTable = memTable;
            try {
                memTable = new InMemoryMap(acuTableConf);
            } catch (LocalityGroupConfigurationError e) {
                throw new RuntimeException(e);
            }

            CommitSession oldCommitSession = commitSession;
            commitSession = new CommitSession(nextSeq, memTable);
            nextSeq += 2;

            tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(),
                    otherMemTable.estimatedSizeInBytes());

            return oldCommitSession;
        }

        void finishedMinC() {

            if (otherMemTable == null) {
                throw new IllegalStateException();
            }

            if (deletingMemTable != null) {
                throw new IllegalStateException();
            }

            deletingMemTable = otherMemTable;

            otherMemTable = null;
            Tablet.this.notifyAll();
        }

        void finalizeMinC() {
            try {
                deletingMemTable.delete(15000);
            } finally {
                synchronized (Tablet.this) {
                    if (otherMemTable != null) {
                        throw new IllegalStateException();
                    }

                    if (deletingMemTable == null) {
                        throw new IllegalStateException();
                    }

                    deletingMemTable = null;

                    tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), 0);
                }
            }
        }

        boolean memoryReservedForMinC() {
            return otherMemTable != null || deletingMemTable != null;
        }

        void waitForMinC() {
            while (otherMemTable != null || deletingMemTable != null) {
                try {
                    Tablet.this.wait(50);
                } catch (InterruptedException e) {
                    log.warn(e, e);
                }
            }
        }

        void mutate(CommitSession cm, List<Mutation> mutations) {
            cm.memTable.mutate(mutations);
        }

        void updateMemoryUsageStats() {
            long other = 0;
            if (otherMemTable != null)
                other = otherMemTable.estimatedSizeInBytes();
            else if (deletingMemTable != null)
                other = deletingMemTable.estimatedSizeInBytes();

            tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), other);
        }

        List<MemoryIterator> getIterators() {
            List<MemoryIterator> toReturn = new ArrayList<MemoryIterator>(2);
            toReturn.add(memTable.skvIterator());
            if (otherMemTable != null)
                toReturn.add(otherMemTable.skvIterator());
            return toReturn;
        }

        void returnIterators(List<MemoryIterator> iters) {
            for (MemoryIterator iter : iters) {
                iter.close();
            }
        }

        public long getNumEntries() {
            if (otherMemTable != null)
                return memTable.getNumEntries() + otherMemTable.getNumEntries();
            return memTable.getNumEntries();
        }

        CommitSession getCommitSession() {
            return commitSession;
        }
    }

    private TabletMemory tabletMemory;

    private final TabletTime tabletTime;
    private long persistedTime;
    private final Object timeLock = new Object();

    private final Path location; // absolute path of this tablets dir
    private TServerInstance lastLocation;

    private Configuration conf;
    private VolumeManager fs;

    private TableConfiguration acuTableConf;

    private volatile boolean tableDirChecked = false;

    private AtomicLong dataSourceDeletions = new AtomicLong(0);
    private Set<ScanDataSource> activeScans = new HashSet<ScanDataSource>();

    private volatile boolean closing = false;
    private boolean closed = false;
    private boolean closeComplete = false;

    private long lastFlushID = -1;
    private long lastCompactID = -1;

    private KeyExtent extent;

    private TabletResourceManager tabletResources;
    final private DatafileManager datafileManager;
    private volatile boolean majorCompactionInProgress = false;
    private volatile boolean majorCompactionWaitingToStart = false;
    private Set<MajorCompactionReason> majorCompactionQueued = Collections
            .synchronizedSet(EnumSet.noneOf(MajorCompactionReason.class));
    private volatile boolean minorCompactionInProgress = false;
    private volatile boolean minorCompactionWaitingToStart = false;

    private boolean updatingFlushID = false;

    private AtomicReference<ConstraintChecker> constraintChecker = new AtomicReference<ConstraintChecker>();

    private final String tabletDirectory;

    private int writesInProgress = 0;

    private static final Logger log = Logger.getLogger(Tablet.class);
    public TabletStatsKeeper timer;

    private Rate queryRate = new Rate(0.2);
    private long queryCount = 0;

    private Rate queryByteRate = new Rate(0.2);
    private long queryBytes = 0;

    private Rate ingestRate = new Rate(0.2);
    private long ingestCount = 0;

    private Rate ingestByteRate = new Rate(0.2);
    private long ingestBytes = 0;

    private byte[] defaultSecurityLabel = new byte[0];

    private long lastMinorCompactionFinishTime;
    private long lastMapFileImportTime;

    private volatile long numEntries;
    private volatile long numEntriesInMemory;

    // a count of the amount of data read by the iterators
    private AtomicLong scannedCount = new AtomicLong(0);
    private Rate scannedRate = new Rate(0.2);

    private ConfigurationObserver configObserver;

    private TabletServer tabletServer;

    private final int logId;
    // ensure we only have one reader/writer of our bulk file notes at at time
    public final Object bulkFileImportLock = new Object();

    public int getLogId() {
        return logId;
    }

    public static class TabletClosedException extends RuntimeException {
        public TabletClosedException(Exception e) {
            super(e);
        }

        public TabletClosedException() {
            super();
        }

        private static final long serialVersionUID = 1L;
    }

    FileRef getNextMapFilename(String prefix) throws IOException {
        String extension = FileOperations.getNewFileExtension(tabletServer.getTableConfiguration(extent));
        checkTabletDir();
        return new FileRef(location.toString() + "/" + prefix + UniqueNameAllocator.getInstance().getNextName()
                + "." + extension);
    }

    private void checkTabletDir() throws IOException {
        if (!tableDirChecked) {
            checkTabletDir(this.location);
            tableDirChecked = true;
        }
    }

    private void checkTabletDir(Path tabletDir) throws IOException {

        FileStatus[] files = null;
        try {
            files = fs.listStatus(tabletDir);
        } catch (FileNotFoundException ex) {
            // ignored
        }

        if (files == null) {
            if (tabletDir.getName().startsWith("c-"))
                log.debug("Tablet " + extent + " had no dir, creating " + tabletDir); // its a clone dir...
            else
                log.warn("Tablet " + extent + " had no dir, creating " + tabletDir);

            fs.mkdirs(tabletDir);
        }
    }

    class DatafileManager {
        // access to datafilesizes needs to be synchronized: see CompactionRunner#getNumFiles
        final private Map<FileRef, DataFileValue> datafileSizes = Collections
                .synchronizedMap(new TreeMap<FileRef, DataFileValue>());

        DatafileManager(SortedMap<FileRef, DataFileValue> datafileSizes) {
            for (Entry<FileRef, DataFileValue> datafiles : datafileSizes.entrySet())
                this.datafileSizes.put(datafiles.getKey(), datafiles.getValue());
        }

        FileRef mergingMinorCompactionFile = null;
        Set<FileRef> filesToDeleteAfterScan = new HashSet<FileRef>();
        Map<Long, Set<FileRef>> scanFileReservations = new HashMap<Long, Set<FileRef>>();
        MapCounter<FileRef> fileScanReferenceCounts = new MapCounter<FileRef>();
        long nextScanReservationId = 0;
        boolean reservationsBlocked = false;

        Set<FileRef> majorCompactingFiles = new HashSet<FileRef>();

        Pair<Long, Map<FileRef, DataFileValue>> reserveFilesForScan() {
            synchronized (Tablet.this) {

                while (reservationsBlocked) {
                    try {
                        Tablet.this.wait(50);
                    } catch (InterruptedException e) {
                        log.warn(e, e);
                    }
                }

                Set<FileRef> absFilePaths = new HashSet<FileRef>(datafileSizes.keySet());

                long rid = nextScanReservationId++;

                scanFileReservations.put(rid, absFilePaths);

                Map<FileRef, DataFileValue> ret = new HashMap<FileRef, DataFileValue>();

                for (FileRef path : absFilePaths) {
                    fileScanReferenceCounts.increment(path, 1);
                    ret.put(path, datafileSizes.get(path));
                }

                return new Pair<Long, Map<FileRef, DataFileValue>>(rid, ret);
            }
        }

        void returnFilesForScan(Long reservationId) {

            final Set<FileRef> filesToDelete = new HashSet<FileRef>();

            synchronized (Tablet.this) {
                Set<FileRef> absFilePaths = scanFileReservations.remove(reservationId);

                if (absFilePaths == null)
                    throw new IllegalArgumentException("Unknown scan reservation id " + reservationId);

                boolean notify = false;
                for (FileRef path : absFilePaths) {
                    long refCount = fileScanReferenceCounts.decrement(path, 1);
                    if (refCount == 0) {
                        if (filesToDeleteAfterScan.remove(path))
                            filesToDelete.add(path);
                        notify = true;
                    } else if (refCount < 0)
                        throw new IllegalStateException("Scan ref count for " + path + " is " + refCount);
                }

                if (notify)
                    Tablet.this.notifyAll();
            }

            if (filesToDelete.size() > 0) {
                log.debug("Removing scan refs from metadata " + extent + " " + filesToDelete);
                MetadataTableUtil.removeScanFiles(extent, filesToDelete, SystemCredentials.get(),
                        tabletServer.getLock());
            }
        }

        private void removeFilesAfterScan(Set<FileRef> scanFiles) {
            if (scanFiles.size() == 0)
                return;

            Set<FileRef> filesToDelete = new HashSet<FileRef>();

            synchronized (Tablet.this) {
                for (FileRef path : scanFiles) {
                    if (fileScanReferenceCounts.get(path) == 0)
                        filesToDelete.add(path);
                    else
                        filesToDeleteAfterScan.add(path);
                }
            }

            if (filesToDelete.size() > 0) {
                log.debug("Removing scan refs from metadata " + extent + " " + filesToDelete);
                MetadataTableUtil.removeScanFiles(extent, filesToDelete, SystemCredentials.get(),
                        tabletServer.getLock());
            }
        }

        private TreeSet<FileRef> waitForScansToFinish(Set<FileRef> pathsToWaitFor, boolean blockNewScans,
                long maxWaitTime) {
            long startTime = System.currentTimeMillis();
            TreeSet<FileRef> inUse = new TreeSet<FileRef>();

            Span waitForScans = Trace.start("waitForScans");
            try {
                synchronized (Tablet.this) {
                    if (blockNewScans) {
                        if (reservationsBlocked)
                            throw new IllegalStateException();

                        reservationsBlocked = true;
                    }

                    for (FileRef path : pathsToWaitFor) {
                        while (fileScanReferenceCounts.get(path) > 0
                                && System.currentTimeMillis() - startTime < maxWaitTime) {
                            try {
                                Tablet.this.wait(100);
                            } catch (InterruptedException e) {
                                log.warn(e, e);
                            }
                        }
                    }

                    for (FileRef path : pathsToWaitFor) {
                        if (fileScanReferenceCounts.get(path) > 0)
                            inUse.add(path);
                    }

                    if (blockNewScans) {
                        reservationsBlocked = false;
                        Tablet.this.notifyAll();
                    }

                }
            } finally {
                waitForScans.stop();
            }
            return inUse;
        }

        public void importMapFiles(long tid, Map<FileRef, DataFileValue> pathsString, boolean setTime)
                throws IOException {

            String bulkDir = null;

            Map<FileRef, DataFileValue> paths = new HashMap<FileRef, DataFileValue>();
            for (Entry<FileRef, DataFileValue> entry : pathsString.entrySet())
                paths.put(entry.getKey(), entry.getValue());

            for (FileRef tpath : paths.keySet()) {

                boolean inTheRightDirectory = false;
                Path parent = tpath.path().getParent().getParent();
                for (String tablesDir : ServerConstants.getTablesDirs()) {
                    if (parent.equals(new Path(tablesDir, extent.getTableId().toString()))) {
                        inTheRightDirectory = true;
                        break;
                    }
                }
                if (!inTheRightDirectory) {
                    throw new IOException("Data file " + tpath + " not in table dirs");
                }

                if (bulkDir == null)
                    bulkDir = tpath.path().getParent().toString();
                else if (!bulkDir.equals(tpath.path().getParent().toString()))
                    throw new IllegalArgumentException("bulk files in different dirs " + bulkDir + " " + tpath);

            }

            if (extent.isRootTablet()) {
                throw new IllegalArgumentException("Can not import files to root tablet");
            }

            synchronized (bulkFileImportLock) {
                Credentials creds = SystemCredentials.get();
                Connector conn;
                try {
                    conn = HdfsZooInstance.getInstance().getConnector(creds.getPrincipal(), creds.getToken());
                } catch (Exception ex) {
                    throw new IOException(ex);
                }
                // Remove any bulk files we've previously loaded and compacted away
                List<FileRef> files = MetadataTableUtil.getBulkFilesLoaded(conn, extent, tid);

                for (FileRef file : files)
                    if (paths.keySet().remove(file.path()))
                        log.debug("Ignoring request to re-import a file already imported: " + extent + ": " + file);

                if (paths.size() > 0) {
                    long bulkTime = Long.MIN_VALUE;
                    if (setTime) {
                        for (DataFileValue dfv : paths.values()) {
                            long nextTime = tabletTime.getAndUpdateTime();
                            if (nextTime < bulkTime)
                                throw new IllegalStateException(
                                        "Time went backwards unexpectedly " + nextTime + " " + bulkTime);
                            bulkTime = nextTime;
                            dfv.setTime(bulkTime);
                        }
                    }

                    synchronized (timeLock) {
                        if (bulkTime > persistedTime)
                            persistedTime = bulkTime;

                        MetadataTableUtil.updateTabletDataFile(tid, extent, paths,
                                tabletTime.getMetadataValue(persistedTime), creds, tabletServer.getLock());
                    }
                }
            }

            synchronized (Tablet.this) {
                for (Entry<FileRef, DataFileValue> tpath : paths.entrySet()) {
                    if (datafileSizes.containsKey(tpath.getKey())) {
                        log.error("Adding file that is already in set " + tpath.getKey());
                    }
                    datafileSizes.put(tpath.getKey(), tpath.getValue());

                }

                tabletResources.importedMapFiles();

                computeNumEntries();
            }

            for (FileRef tpath : paths.keySet()) {
                log.log(TLevel.TABLET_HIST, extent + " import " + tpath + " " + paths.get(tpath));
            }
        }

        FileRef reserveMergingMinorCompactionFile() {
            if (mergingMinorCompactionFile != null)
                throw new IllegalStateException(
                        "Tried to reserve merging minor compaction file when already reserved  : "
                                + mergingMinorCompactionFile);

            if (extent.isRootTablet())
                return null;

            int maxFiles = acuTableConf.getMaxFilesPerTablet();

            // when a major compaction is running and we are at max files, write out
            // one extra file... want to avoid the case where major compaction is
            // compacting everything except for the largest file, and therefore the
            // largest file is returned for merging.. the following check mostly
            // avoids this case, except for the case where major compactions fail or
            // are canceled
            if (majorCompactingFiles.size() > 0 && datafileSizes.size() == maxFiles)
                return null;

            if (datafileSizes.size() >= maxFiles) {
                // find the smallest file

                long min = Long.MAX_VALUE;
                FileRef minName = null;

                for (Entry<FileRef, DataFileValue> entry : datafileSizes.entrySet()) {
                    if (entry.getValue().getSize() < min && !majorCompactingFiles.contains(entry.getKey())) {
                        min = entry.getValue().getSize();
                        minName = entry.getKey();
                    }
                }

                if (minName == null)
                    return null;

                mergingMinorCompactionFile = minName;
                return minName;
            }

            return null;
        }

        void unreserveMergingMinorCompactionFile(FileRef file) {
            if ((file == null && mergingMinorCompactionFile != null)
                    || (file != null && mergingMinorCompactionFile == null) || (file != null
                            && mergingMinorCompactionFile != null && !file.equals(mergingMinorCompactionFile)))
                throw new IllegalStateException("Disagreement " + file + " " + mergingMinorCompactionFile);

            mergingMinorCompactionFile = null;
        }

        void bringMinorCompactionOnline(FileRef tmpDatafile, FileRef newDatafile, FileRef absMergeFile,
                DataFileValue dfv, CommitSession commitSession, long flushId) throws IOException {

            IZooReaderWriter zoo = ZooReaderWriter.getRetryingInstance();
            if (extent.isRootTablet()) {
                try {
                    if (!zoo.isLockHeld(tabletServer.getLock().getLockID())) {
                        throw new IllegalStateException();
                    }
                } catch (Exception e) {
                    throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
                }
            }

            // rename before putting in metadata table, so files in metadata table should
            // always exist
            do {
                try {
                    if (dfv.getNumEntries() == 0) {
                        fs.deleteRecursively(tmpDatafile.path());
                    } else {
                        if (fs.exists(newDatafile.path())) {
                            log.warn("Target map file already exist " + newDatafile);
                            fs.deleteRecursively(newDatafile.path());
                        }

                        if (!fs.rename(tmpDatafile.path(), newDatafile.path())) {
                            throw new IOException("rename fails");
                        }
                    }
                    break;
                } catch (IOException ioe) {
                    log.warn("Tablet " + extent + " failed to rename " + newDatafile
                            + " after MinC, will retry in 60 secs...", ioe);
                    UtilWaitThread.sleep(60 * 1000);
                }
            } while (true);

            long t1, t2;

            // the code below always assumes merged files are in use by scans... this must be done
            // because the in memory list of files is not updated until after the metadata table
            // therefore the file is available to scans until memory is updated, but want to ensure
            // the file is not available for garbage collection... if memory were updated
            // before this point (like major compactions do), then the following code could wait
            // for scans to finish like major compactions do.... used to wait for scans to finish
            // here, but that was incorrect because a scan could start after waiting but before
            // memory was updated... assuming the file is always in use by scans leads to
            // one uneeded metadata update when it was not actually in use
            Set<FileRef> filesInUseByScans = Collections.emptySet();
            if (absMergeFile != null)
                filesInUseByScans = Collections.singleton(absMergeFile);

            // very important to write delete entries outside of log lock, because
            // this !METADATA write does not go up... it goes sideways or to itself
            if (absMergeFile != null)
                MetadataTableUtil.addDeleteEntries(extent, Collections.singleton(absMergeFile),
                        SystemCredentials.get());

            Set<String> unusedWalLogs = beginClearingUnusedLogs();
            try {
                // the order of writing to !METADATA and walog is important in the face of machine/process failures
                // need to write to !METADATA before writing to walog, when things are done in the reverse order
                // data could be lost... the minor compaction start even should be written before the following metadata
                // write is made

                synchronized (timeLock) {
                    if (commitSession.getMaxCommittedTime() > persistedTime)
                        persistedTime = commitSession.getMaxCommittedTime();

                    String time = tabletTime.getMetadataValue(persistedTime);
                    MetadataTableUtil.updateTabletDataFile(extent, newDatafile, absMergeFile, dfv, time,
                            SystemCredentials.get(), filesInUseByScans, tabletServer.getClientAddressString(),
                            tabletServer.getLock(), unusedWalLogs, lastLocation, flushId);
                }

            } finally {
                finishClearingUnusedLogs();
            }

            do {
                try {
                    // the purpose of making this update use the new commit session, instead of the old one passed in,
                    // is because the new one will reference the logs used by current memory...

                    tabletServer.minorCompactionFinished(tabletMemory.getCommitSession(), newDatafile.toString(),
                            commitSession.getWALogSeq() + 2);
                    break;
                } catch (IOException e) {
                    log.error("Failed to write to write-ahead log " + e.getMessage() + " will retry", e);
                    UtilWaitThread.sleep(1 * 1000);
                }
            } while (true);

            synchronized (Tablet.this) {
                lastLocation = null;

                t1 = System.currentTimeMillis();
                if (datafileSizes.containsKey(newDatafile)) {
                    log.error("Adding file that is already in set " + newDatafile);
                }

                if (dfv.getNumEntries() > 0) {
                    datafileSizes.put(newDatafile, dfv);
                }

                if (absMergeFile != null) {
                    datafileSizes.remove(absMergeFile);
                }

                unreserveMergingMinorCompactionFile(absMergeFile);

                dataSourceDeletions.incrementAndGet();
                tabletMemory.finishedMinC();

                lastFlushID = flushId;

                computeNumEntries();
                t2 = System.currentTimeMillis();
            }

            // must do this after list of files in memory is updated above
            removeFilesAfterScan(filesInUseByScans);

            if (absMergeFile != null)
                log.log(TLevel.TABLET_HIST, extent + " MinC [" + absMergeFile + ",memory] -> " + newDatafile);
            else
                log.log(TLevel.TABLET_HIST, extent + " MinC [memory] -> " + newDatafile);
            log.debug(String.format("MinC finish lock %.2f secs %s", (t2 - t1) / 1000.0, getExtent().toString()));
            if (dfv.getSize() > acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD)) {
                log.debug(String.format(
                        "Minor Compaction wrote out file larger than split threshold.  split threshold = %,d  file size = %,d",
                        acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD), dfv.getSize()));
            }

        }

        public void reserveMajorCompactingFiles(Set<FileRef> files) {
            if (majorCompactingFiles.size() != 0)
                throw new IllegalStateException("Major compacting files not empty " + majorCompactingFiles);

            if (mergingMinorCompactionFile != null && files.contains(mergingMinorCompactionFile))
                throw new IllegalStateException("Major compaction tried to resrve file in use by minor compaction "
                        + mergingMinorCompactionFile);

            majorCompactingFiles.addAll(files);
        }

        public void clearMajorCompactingFile() {
            majorCompactingFiles.clear();
        }

        void bringMajorCompactionOnline(Set<FileRef> oldDatafiles, FileRef tmpDatafile, FileRef newDatafile,
                Long compactionId, DataFileValue dfv) throws IOException {
            long t1, t2;

            if (!extent.isRootTablet()) {

                if (fs.exists(newDatafile.path())) {
                    log.error("Target map file already exist " + newDatafile, new Exception());
                    throw new IllegalStateException("Target map file already exist " + newDatafile);
                }

                // rename before putting in metadata table, so files in metadata table should
                // always exist
                if (!fs.rename(tmpDatafile.path(), newDatafile.path()))
                    log.warn("Rename of " + tmpDatafile + " to " + newDatafile + " returned false");

                if (dfv.getNumEntries() == 0) {
                    fs.deleteRecursively(newDatafile.path());
                }
            }

            TServerInstance lastLocation = null;
            synchronized (Tablet.this) {

                t1 = System.currentTimeMillis();

                IZooReaderWriter zoo = ZooReaderWriter.getRetryingInstance();

                dataSourceDeletions.incrementAndGet();

                if (extent.isRootTablet()) {

                    waitForScansToFinish(oldDatafiles, true, Long.MAX_VALUE);

                    try {
                        if (!zoo.isLockHeld(tabletServer.getLock().getLockID())) {
                            throw new IllegalStateException();
                        }
                    } catch (Exception e) {
                        throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
                    }

                    // mark files as ready for deletion, but
                    // do not delete them until we successfully
                    // rename the compacted map file, in case
                    // the system goes down

                    String compactName = newDatafile.path().getName();

                    for (FileRef ref : oldDatafiles) {
                        Path path = ref.path();
                        fs.rename(path, new Path(location + "/delete+" + compactName + "+" + path.getName()));
                    }

                    if (fs.exists(newDatafile.path())) {
                        log.error("Target map file already exist " + newDatafile, new Exception());
                        throw new IllegalStateException("Target map file already exist " + newDatafile);
                    }

                    if (!fs.rename(tmpDatafile.path(), newDatafile.path()))
                        log.warn("Rename of " + tmpDatafile + " to " + newDatafile + " returned false");

                    // start deleting files, if we do not finish they will be cleaned
                    // up later
                    for (FileRef ref : oldDatafiles) {
                        Path path = ref.path();
                        Path deleteFile = new Path(location + "/delete+" + compactName + "+" + path.getName());
                        if (acuTableConf.getBoolean(Property.GC_TRASH_IGNORE) || !fs.moveToTrash(deleteFile))
                            fs.deleteRecursively(deleteFile);
                    }
                }

                // atomically remove old files and add new file
                for (FileRef oldDatafile : oldDatafiles) {
                    if (!datafileSizes.containsKey(oldDatafile)) {
                        log.error("file does not exist in set " + oldDatafile);
                    }
                    datafileSizes.remove(oldDatafile);
                    majorCompactingFiles.remove(oldDatafile);
                }

                if (datafileSizes.containsKey(newDatafile)) {
                    log.error("Adding file that is already in set " + newDatafile);
                }

                if (dfv.getNumEntries() > 0) {
                    datafileSizes.put(newDatafile, dfv);
                }

                // could be used by a follow on compaction in a multipass compaction
                majorCompactingFiles.add(newDatafile);

                computeNumEntries();

                lastLocation = Tablet.this.lastLocation;
                Tablet.this.lastLocation = null;

                if (compactionId != null)
                    lastCompactID = compactionId;

                t2 = System.currentTimeMillis();
            }

            if (!extent.isRootTablet()) {
                Set<FileRef> filesInUseByScans = waitForScansToFinish(oldDatafiles, false, 10000);
                if (filesInUseByScans.size() > 0)
                    log.debug("Adding scan refs to metadata " + extent + " " + filesInUseByScans);
                MetadataTableUtil.replaceDatafiles(extent, oldDatafiles, filesInUseByScans, newDatafile,
                        compactionId, dfv, SystemCredentials.get(), tabletServer.getClientAddressString(),
                        lastLocation, tabletServer.getLock());
                removeFilesAfterScan(filesInUseByScans);
            }

            log.debug(String.format("MajC finish lock %.2f secs", (t2 - t1) / 1000.0));
            log.log(TLevel.TABLET_HIST, extent + " MajC " + oldDatafiles + " --> " + newDatafile);
        }

        public SortedMap<FileRef, DataFileValue> getDatafileSizes() {
            synchronized (Tablet.this) {
                TreeMap<FileRef, DataFileValue> copy = new TreeMap<FileRef, DataFileValue>(datafileSizes);
                return Collections.unmodifiableSortedMap(copy);
            }
        }

        public Set<FileRef> getFiles() {
            synchronized (Tablet.this) {
                HashSet<FileRef> files = new HashSet<FileRef>(datafileSizes.keySet());
                return Collections.unmodifiableSet(files);
            }
        }

    }

    public Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm,
            SortedMap<Key, Value> tabletsKeyValues) throws IOException {
        this(tabletServer, location, extent, trm, CachedConfiguration.getInstance(), tabletsKeyValues);
        splitCreationTime = 0;
    }

    public Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm,
            SortedMap<FileRef, DataFileValue> datafiles, String time, long initFlushID, long initCompactID)
            throws IOException {
        this(tabletServer, location, extent, trm, CachedConfiguration.getInstance(), datafiles, time, initFlushID,
                initCompactID);
        splitCreationTime = System.currentTimeMillis();
    }

    private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm,
            Configuration conf, SortedMap<Key, Value> tabletsKeyValues) throws IOException {
        this(tabletServer, location, extent, trm, conf, VolumeManagerImpl.get(), tabletsKeyValues);
    }

    static private final List<LogEntry> EMPTY = Collections.emptyList();

    private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm,
            Configuration conf, SortedMap<FileRef, DataFileValue> datafiles, String time, long initFlushID,
            long initCompactID) throws IOException {
        this(tabletServer, location, extent, trm, conf, VolumeManagerImpl.get(), EMPTY, datafiles, time, null,
                new HashSet<FileRef>(), initFlushID, initCompactID);
    }

    private static String lookupTime(AccumuloConfiguration conf, KeyExtent extent,
            SortedMap<Key, Value> tabletsKeyValues) {
        SortedMap<Key, Value> entries;

        if (extent.isRootTablet()) {
            return null;
        } else {
            entries = new TreeMap<Key, Value>();
            Text rowName = extent.getMetadataEntry();
            for (Entry<Key, Value> entry : tabletsKeyValues.entrySet()) {
                if (entry.getKey().compareRow(rowName) == 0
                        && TabletsSection.ServerColumnFamily.TIME_COLUMN.hasColumns(entry.getKey())) {
                    entries.put(new Key(entry.getKey()), new Value(entry.getValue()));
                }
            }
        }

        // log.debug("extent : "+extent+"   entries : "+entries);

        if (entries.size() == 1)
            return entries.values().iterator().next().toString();
        return null;
    }

    private static SortedMap<FileRef, DataFileValue> lookupDatafiles(AccumuloConfiguration conf, VolumeManager fs,
            KeyExtent extent, SortedMap<Key, Value> tabletsKeyValues) throws IOException {

        TreeMap<FileRef, DataFileValue> datafiles = new TreeMap<FileRef, DataFileValue>();

        if (extent.isRootTablet()) { // the meta0 tablet
            Path location = new Path(ServerConstants.getRootTabletDir());
            location = location.makeQualified(fs.getDefaultVolume());
            // cleanUpFiles() has special handling for delete. files
            FileStatus[] files = fs.listStatus(location);
            Collection<String> goodPaths = cleanUpFiles(fs, files, true);
            for (String good : goodPaths) {
                Path path = new Path(good);
                String filename = path.getName();
                FileRef ref = new FileRef(location.toString() + "/" + filename, path);
                DataFileValue dfv = new DataFileValue(0, 0);
                datafiles.put(ref, dfv);
            }
        } else {

            Text rowName = extent.getMetadataEntry();

            String tableId = extent.isMeta() ? RootTable.ID : MetadataTable.ID;
            ScannerImpl mdScanner = new ScannerImpl(HdfsZooInstance.getInstance(), SystemCredentials.get(), tableId,
                    Authorizations.EMPTY);

            // Commented out because when no data file is present, each tablet will scan through metadata table and return nothing
            // reduced batch size to improve performance
            // changed here after endKeys were implemented from 10 to 1000
            mdScanner.setBatchSize(1000);

            // leave these in, again, now using endKey for safety
            mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);

            mdScanner.setRange(new Range(rowName));

            for (Entry<Key, Value> entry : mdScanner) {

                if (entry.getKey().compareRow(rowName) != 0) {
                    break;
                }

                FileRef ref = new FileRef(entry.getKey().getColumnQualifier().toString(),
                        fs.getFullPath(entry.getKey()));
                datafiles.put(ref, new DataFileValue(entry.getValue().get()));
            }
        }
        return datafiles;
    }

    private static List<LogEntry> lookupLogEntries(KeyExtent ke, SortedMap<Key, Value> tabletsKeyValues) {
        List<LogEntry> logEntries = new ArrayList<LogEntry>();

        if (ke.isMeta()) {
            try {
                logEntries = MetadataTableUtil.getLogEntries(SystemCredentials.get(), ke);
            } catch (Exception ex) {
                throw new RuntimeException("Unable to read tablet log entries", ex);
            }
        } else {
            log.debug("Looking at metadata " + tabletsKeyValues);
            Text row = ke.getMetadataEntry();
            for (Entry<Key, Value> entry : tabletsKeyValues.entrySet()) {
                Key key = entry.getKey();
                if (key.getRow().equals(row)) {
                    if (key.getColumnFamily().equals(LogColumnFamily.NAME)) {
                        logEntries.add(MetadataTableUtil.entryFromKeyValue(key, entry.getValue()));
                    }
                }
            }
        }

        log.debug("got " + logEntries + " for logs for " + ke);
        return logEntries;
    }

    private static Set<FileRef> lookupScanFiles(KeyExtent extent, SortedMap<Key, Value> tabletsKeyValues,
            VolumeManager fs) throws IOException {
        HashSet<FileRef> scanFiles = new HashSet<FileRef>();

        Text row = extent.getMetadataEntry();
        for (Entry<Key, Value> entry : tabletsKeyValues.entrySet()) {
            Key key = entry.getKey();
            if (key.getRow().equals(row) && key.getColumnFamily().equals(ScanFileColumnFamily.NAME)) {
                String meta = key.getColumnQualifier().toString();
                Path path = fs.getFullPath(extent.getTableId().toString(), meta);
                scanFiles.add(new FileRef(meta, path));
            }
        }

        return scanFiles;
    }

    private static long lookupFlushID(KeyExtent extent, SortedMap<Key, Value> tabletsKeyValues) {
        Text row = extent.getMetadataEntry();
        for (Entry<Key, Value> entry : tabletsKeyValues.entrySet()) {
            Key key = entry.getKey();
            if (key.getRow().equals(row) && TabletsSection.ServerColumnFamily.FLUSH_COLUMN
                    .equals(key.getColumnFamily(), key.getColumnQualifier()))
                return Long.parseLong(entry.getValue().toString());
        }

        return -1;
    }

    private static long lookupCompactID(KeyExtent extent, SortedMap<Key, Value> tabletsKeyValues) {
        Text row = extent.getMetadataEntry();
        for (Entry<Key, Value> entry : tabletsKeyValues.entrySet()) {
            Key key = entry.getKey();
            if (key.getRow().equals(row) && TabletsSection.ServerColumnFamily.COMPACT_COLUMN
                    .equals(key.getColumnFamily(), key.getColumnQualifier()))
                return Long.parseLong(entry.getValue().toString());
        }

        return -1;
    }

    private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm,
            Configuration conf, VolumeManager fs, SortedMap<Key, Value> tabletsKeyValues) throws IOException {
        this(tabletServer, location, extent, trm, conf, fs, lookupLogEntries(extent, tabletsKeyValues),
                lookupDatafiles(tabletServer.getSystemConfiguration(), fs, extent, tabletsKeyValues),
                lookupTime(tabletServer.getSystemConfiguration(), extent, tabletsKeyValues),
                lookupLastServer(extent, tabletsKeyValues), lookupScanFiles(extent, tabletsKeyValues, fs),
                lookupFlushID(extent, tabletsKeyValues), lookupCompactID(extent, tabletsKeyValues));
    }

    private static TServerInstance lookupLastServer(KeyExtent extent, SortedMap<Key, Value> tabletsKeyValues) {
        for (Entry<Key, Value> entry : tabletsKeyValues.entrySet()) {
            if (entry.getKey().getColumnFamily().compareTo(TabletsSection.LastLocationColumnFamily.NAME) == 0) {
                return new TServerInstance(entry.getValue(), entry.getKey().getColumnQualifier());
            }
        }
        return null;
    }

    /**
     * yet another constructor - this one allows us to avoid costly lookups into the Metadata table if we already know the files we need - as at split time
     */
    private Tablet(final TabletServer tabletServer, final Text location, final KeyExtent extent,
            final TabletResourceManager trm, final Configuration conf, final VolumeManager fs,
            final List<LogEntry> logEntries, final SortedMap<FileRef, DataFileValue> datafiles, String time,
            final TServerInstance lastLocation, Set<FileRef> scanFiles, long initFlushID, long initCompactID)
            throws IOException {
        Path locationPath;
        if (location.find(":") >= 0) {
            locationPath = new Path(location.toString());
        } else {
            locationPath = fs.getFullPath(FileType.TABLE, extent.getTableId().toString() + location.toString());
        }
        this.location = locationPath.makeQualified(fs.getFileSystemByPath(locationPath));
        this.lastLocation = lastLocation;
        this.tabletDirectory = location.toString();
        this.conf = conf;
        this.acuTableConf = tabletServer.getTableConfiguration(extent);

        this.fs = fs;
        this.extent = extent;
        this.tabletResources = trm;

        this.lastFlushID = initFlushID;
        this.lastCompactID = initCompactID;

        if (extent.isRootTablet()) {

            long rtime = Long.MIN_VALUE;
            for (FileRef ref : datafiles.keySet()) {
                Path path = ref.path();
                FileSystem ns = fs.getFileSystemByPath(path);
                FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), true, ns,
                        ns.getConf(), tabletServer.getTableConfiguration(extent));
                long maxTime = -1;
                try {

                    while (reader.hasTop()) {
                        maxTime = Math.max(maxTime, reader.getTopKey().getTimestamp());
                        reader.next();
                    }

                } finally {
                    reader.close();
                }

                if (maxTime > rtime) {
                    time = TabletTime.LOGICAL_TIME_ID + "" + maxTime;
                    rtime = maxTime;
                }
            }
        }

        this.tabletServer = tabletServer;
        this.logId = tabletServer.createLogId(extent);

        this.timer = new TabletStatsKeeper();

        setupDefaultSecurityLabels(extent);

        tabletMemory = new TabletMemory();
        tabletTime = TabletTime.getInstance(time);
        persistedTime = tabletTime.getTime();

        acuTableConf.addObserver(configObserver = new ConfigurationObserver() {

            private void reloadConstraints() {
                constraintChecker.set(new ConstraintChecker(getTableConfiguration()));
            }

            @Override
            public void propertiesChanged() {
                reloadConstraints();

                try {
                    setupDefaultSecurityLabels(extent);
                } catch (Exception e) {
                    log.error("Failed to reload default security labels for extent: " + extent.toString());
                }
            }

            @Override
            public void propertyChanged(String prop) {
                if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey()))
                    reloadConstraints();
                else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) {
                    try {
                        log.info("Default security labels changed for extent: " + extent.toString());
                        setupDefaultSecurityLabels(extent);
                    } catch (Exception e) {
                        log.error("Failed to reload default security labels for extent: " + extent.toString());
                    }
                }

            }

            @Override
            public void sessionExpired() {
                log.debug("Session expired, no longer updating per table props...");
            }

        });
        // Force a load of any per-table properties
        configObserver.propertiesChanged();

        tabletResources.setTablet(this, acuTableConf);
        if (!logEntries.isEmpty()) {
            log.info("Starting Write-Ahead Log recovery for " + this.extent);
            final long[] count = new long[2];
            final CommitSession commitSession = tabletMemory.getCommitSession();
            count[1] = Long.MIN_VALUE;
            try {
                Set<String> absPaths = new HashSet<String>();
                for (FileRef ref : datafiles.keySet())
                    absPaths.add(ref.path().toString());

                tabletServer.recover(this.tabletServer.getFileSystem(), this, logEntries, absPaths,
                        new MutationReceiver() {
                            @Override
                            public void receive(Mutation m) {
                                // LogReader.printMutation(m);
                                Collection<ColumnUpdate> muts = m.getUpdates();
                                for (ColumnUpdate columnUpdate : muts) {
                                    if (!columnUpdate.hasTimestamp()) {
                                        // if it is not a user set timestamp, it must have been set
                                        // by the system
                                        count[1] = Math.max(count[1], columnUpdate.getTimestamp());
                                    }
                                }
                                tabletMemory.mutate(commitSession, Collections.singletonList(m));
                                count[0]++;
                            }
                        });

                if (count[1] != Long.MIN_VALUE) {
                    tabletTime.useMaxTimeFromWALog(count[1]);
                }
                commitSession.updateMaxCommittedTime(tabletTime.getTime());

                tabletMemory.updateMemoryUsageStats();

                if (count[0] == 0) {
                    MetadataTableUtil.removeUnusedWALEntries(extent, logEntries, tabletServer.getLock());
                    logEntries.clear();
                }

            } catch (Throwable t) {
                if (acuTableConf.getBoolean(Property.TABLE_FAILURES_IGNORE)) {
                    log.warn("Error recovering from log files: ", t);
                } else {
                    throw new RuntimeException(t);
                }
            }
            // make some closed references that represent the recovered logs
            currentLogs = new HashSet<DfsLogger>();
            for (LogEntry logEntry : logEntries) {
                for (String log : logEntry.logSet) {
                    String[] parts = log.split("/", 2);
                    Path file = fs.getFullPath(FileType.WAL, parts[1]);
                    currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), logEntry.server, file));
                }
            }

            log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + count[0]
                    + " mutations applied, " + tabletMemory.getNumEntries() + " entries created)");
        }

        String contextName = acuTableConf.get(Property.TABLE_CLASSPATH);
        if (contextName != null && !contextName.equals("")) {
            // initialize context classloader, instead of possibly waiting for it to initialize for a scan
            // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292
            AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName);
        }

        // do this last after tablet is completely setup because it
        // could cause major compaction to start
        datafileManager = new DatafileManager(datafiles);

        computeNumEntries();

        datafileManager.removeFilesAfterScan(scanFiles);

        // look for hints of a failure on the previous tablet server
        if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) {
            // look for any temp files hanging around
            removeOldTemporaryFiles();
        }

        log.log(TLevel.TABLET_HIST, extent + " opened ");
    }

    private void removeOldTemporaryFiles() {
        // remove any temporary files created by a previous tablet server
        try {
            for (FileStatus tmp : fs.globStatus(new Path(location, "*_tmp"))) {
                try {
                    log.debug("Removing old temp file " + tmp.getPath());
                    fs.delete(tmp.getPath());
                } catch (IOException ex) {
                    log.error("Unable to remove old temp file " + tmp.getPath() + ": " + ex);
                }
            }
        } catch (IOException ex) {
            log.error("Error scanning for old temp files in " + location);
        }
    }

    private void setupDefaultSecurityLabels(KeyExtent extent) {
        if (extent.isMeta()) {
            defaultSecurityLabel = new byte[0];
        } else {
            try {
                ColumnVisibility cv = new ColumnVisibility(
                        acuTableConf.get(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY));
                this.defaultSecurityLabel = cv.getExpression();
            } catch (Exception e) {
                log.error(e, e);
                this.defaultSecurityLabel = new byte[0];
            }
        }
    }

    private static Collection<String> cleanUpFiles(VolumeManager fs, FileStatus[] files, boolean deleteTmp)
            throws IOException {
        /*
         * called in constructor and before major compactions
         */
        Collection<String> goodFiles = new ArrayList<String>(files.length);

        for (FileStatus file : files) {

            String path = file.getPath().toString();
            String filename = file.getPath().getName();

            // check for incomplete major compaction, this should only occur
            // for root tablet
            if (filename.startsWith("delete+")) {
                String expectedCompactedFile = path.substring(0, path.lastIndexOf("/delete+")) + "/"
                        + filename.split("\\+")[1];
                if (fs.exists(new Path(expectedCompactedFile))) {
                    // compaction finished, but did not finish deleting compacted files.. so delete it
                    if (!fs.deleteRecursively(file.getPath()))
                        log.warn("Delete of file: " + file.getPath().toString() + " return false");
                    continue;
                }
                // compaction did not finish, so put files back

                // reset path and filename for rest of loop
                filename = filename.split("\\+", 3)[2];
                path = path.substring(0, path.lastIndexOf("/delete+")) + "/" + filename;

                if (!fs.rename(file.getPath(), new Path(path)))
                    log.warn("Rename of " + file.getPath().toString() + " to " + path + " returned false");
            }

            if (filename.endsWith("_tmp")) {
                if (deleteTmp) {
                    log.warn("cleaning up old tmp file: " + path);
                    if (!fs.deleteRecursively(file.getPath()))
                        log.warn("Delete of tmp file: " + file.getPath().toString() + " return false");

                }
                continue;
            }

            if (!filename.startsWith(Constants.MAPFILE_EXTENSION + "_")
                    && !FileOperations.getValidExtensions().contains(filename.split("\\.")[1])) {
                log.error("unknown file in tablet" + path);
                continue;
            }

            goodFiles.add(path);
        }

        return goodFiles;
    }

    public static class KVEntry extends KeyValue {
        public KVEntry(Key k, Value v) {
            super(new Key(k), Arrays.copyOf(v.get(), v.get().length));
        }

        @Override
        public String toString() {
            return key.toString() + "=" + getValue();
        }

        int numBytes() {
            return key.getSize() + getValue().get().length;
        }

        int estimateMemoryUsed() {
            return key.getSize() + getValue().get().length + (9 * 32); // overhead is 32 per object
        }
    }

    private LookupResult lookup(SortedKeyValueIterator<Key, Value> mmfi, List<Range> ranges,
            HashSet<Column> columnSet, ArrayList<KVEntry> results, long maxResultsSize) throws IOException {

        LookupResult lookupResult = new LookupResult();

        boolean exceededMemoryUsage = false;
        boolean tabletClosed = false;

        Set<ByteSequence> cfset = null;
        if (columnSet.size() > 0)
            cfset = LocalityGroupUtil.families(columnSet);

        for (Range range : ranges) {

            if (exceededMemoryUsage || tabletClosed) {
                lookupResult.unfinishedRanges.add(range);
                continue;
            }

            int entriesAdded = 0;

            try {
                if (cfset != null)
                    mmfi.seek(range, cfset, true);
                else
                    mmfi.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false);

                while (mmfi.hasTop()) {
                    Key key = mmfi.getTopKey();

                    KVEntry kve = new KVEntry(key, mmfi.getTopValue());
                    results.add(kve);
                    entriesAdded++;
                    lookupResult.bytesAdded += kve.estimateMemoryUsed();
                    lookupResult.dataSize += kve.numBytes();

                    exceededMemoryUsage = lookupResult.bytesAdded > maxResultsSize;

                    if (exceededMemoryUsage) {
                        addUnfinishedRange(lookupResult, range, key, false);
                        break;
                    }

                    mmfi.next();
                }

            } catch (TooManyFilesException tmfe) {
                // treat this as a closed tablet, and let the client retry
                log.warn("Tablet " + getExtent() + " has too many files, batch lookup can not run");
                handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
                tabletClosed = true;
            } catch (IOException ioe) {
                if (shutdownInProgress()) {
                    // assume HDFS shutdown hook caused this exception
                    log.debug("IOException while shutdown in progress ", ioe);
                    handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
                    tabletClosed = true;
                } else {
                    throw ioe;
                }
            } catch (IterationInterruptedException iie) {
                if (isClosed()) {
                    handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
                    tabletClosed = true;
                } else {
                    throw iie;
                }
            } catch (TabletClosedException tce) {
                handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
                tabletClosed = true;
            }

        }

        return lookupResult;
    }

    private void handleTabletClosedDuringScan(ArrayList<KVEntry> results, LookupResult lookupResult,
            boolean exceededMemoryUsage, Range range, int entriesAdded) {
        if (exceededMemoryUsage)
            throw new IllegalStateException("tablet should not exceed memory usage or close, not both");

        if (entriesAdded > 0)
            addUnfinishedRange(lookupResult, range, results.get(results.size() - 1).key, false);
        else
            lookupResult.unfinishedRanges.add(range);

        lookupResult.closed = true;
    }

    private void addUnfinishedRange(LookupResult lookupResult, Range range, Key key, boolean inclusiveStartKey) {
        if (range.getEndKey() == null || key.compareTo(range.getEndKey()) < 0) {
            Range nlur = new Range(new Key(key), inclusiveStartKey, range.getEndKey(), range.isEndKeyInclusive());
            lookupResult.unfinishedRanges.add(nlur);
        }
    }

    public static interface KVReceiver {
        void receive(List<KVEntry> matches) throws IOException;
    }

    class LookupResult {
        List<Range> unfinishedRanges = new ArrayList<Range>();
        long bytesAdded = 0;
        long dataSize = 0;
        boolean closed = false;
    }

    public LookupResult lookup(List<Range> ranges, HashSet<Column> columns, Authorizations authorizations,
            ArrayList<KVEntry> results, long maxResultSize, List<IterInfo> ssiList,
            Map<String, Map<String, String>> ssio, AtomicBoolean interruptFlag) throws IOException {

        if (ranges.size() == 0) {
            return new LookupResult();
        }

        ranges = Range.mergeOverlapping(ranges);
        Collections.sort(ranges);

        Range tabletRange = extent.toDataRange();
        for (Range range : ranges) {
            // do a test to see if this range falls within the tablet, if it does not
            // then clip will throw an exception
            tabletRange.clip(range);
        }

        ScanDataSource dataSource = new ScanDataSource(authorizations, this.defaultSecurityLabel, columns, ssiList,
                ssio, interruptFlag);

        LookupResult result = null;

        try {
            SortedKeyValueIterator<Key, Value> iter = new SourceSwitchingIterator(dataSource);
            result = lookup(iter, ranges, columns, results, maxResultSize);
            return result;
        } catch (IOException ioe) {
            dataSource.close(true);
            throw ioe;
        } finally {
            // code in finally block because always want
            // to return mapfiles, even when exception is thrown
            dataSource.close(false);

            synchronized (this) {
                queryCount += results.size();
                if (result != null)
                    queryBytes += result.dataSize;
            }
        }
    }

    private Batch nextBatch(SortedKeyValueIterator<Key, Value> iter, Range range, int num, Set<Column> columns)
            throws IOException {

        // log.info("In nextBatch..");

        List<KVEntry> results = new ArrayList<KVEntry>();
        Key key = null;

        Value value;
        long resultSize = 0L;
        long resultBytes = 0L;

        long maxResultsSize = acuTableConf.getMemoryInBytes(Property.TABLE_SCAN_MAXMEM);

        if (columns.size() == 0) {
            iter.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false);
        } else {
            iter.seek(range, LocalityGroupUtil.families(columns), true);
        }

        Key continueKey = null;
        boolean skipContinueKey = false;

        boolean endOfTabletReached = false;
        while (iter.hasTop()) {

            value = iter.getTopValue();
            key = iter.getTopKey();

            KVEntry kvEntry = new KVEntry(key, value); // copies key and value
            results.add(kvEntry);
            resultSize += kvEntry.estimateMemoryUsed();
            resultBytes += kvEntry.numBytes();

            if (resultSize >= maxResultsSize || results.size() >= num) {
                continueKey = new Key(key);
                skipContinueKey = true;
                break;
            }

            iter.next();
        }

        if (iter.hasTop() == false) {
            endOfTabletReached = true;
        }

        Batch retBatch = new Batch();
        retBatch.numBytes = resultBytes;

        if (!endOfTabletReached) {
            retBatch.continueKey = continueKey;
            retBatch.skipContinueKey = skipContinueKey;
        } else {
            retBatch.continueKey = null;
        }

        if (endOfTabletReached && results.size() == 0)
            retBatch.results = null;
        else
            retBatch.results = results;

        return retBatch;
    }

    /**
     * Determine if a JVM shutdown is in progress.
     * 
     */
    private boolean shutdownInProgress() {
        try {
            Runtime.getRuntime().removeShutdownHook(new Thread(new Runnable() {
                @Override
                public void run() {
                }
            }));
        } catch (IllegalStateException ise) {
            return true;
        }

        return false;
    }

    private class Batch {
        public boolean skipContinueKey;
        public List<KVEntry> results;
        public Key continueKey;
        public long numBytes;
    }

    Scanner createScanner(Range range, int num, Set<Column> columns, Authorizations authorizations,
            List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, boolean isolated,
            AtomicBoolean interruptFlag) {
        // do a test to see if this range falls within the tablet, if it does not
        // then clip will throw an exception
        extent.toDataRange().clip(range);

        ScanOptions opts = new ScanOptions(num, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio,
                interruptFlag, isolated);
        return new Scanner(range, opts);
    }

    class ScanBatch {
        boolean more;
        List<KVEntry> results;

        ScanBatch(List<KVEntry> results, boolean more) {
            this.results = results;
            this.more = more;
        }
    }

    class Scanner {

        private ScanOptions options;
        private Range range;
        private SortedKeyValueIterator<Key, Value> isolatedIter;
        private ScanDataSource isolatedDataSource;
        private boolean sawException = false;
        private boolean scanClosed = false;

        Scanner(Range range, ScanOptions options) {
            this.range = range;
            this.options = options;
        }

        synchronized ScanBatch read() throws IOException, TabletClosedException {

            if (sawException)
                throw new IllegalStateException("Tried to use scanner after exception occurred.");

            if (scanClosed)
                throw new IllegalStateException("Tried to use scanner after it was closed.");

            Batch results = null;

            ScanDataSource dataSource;

            if (options.isolated) {
                if (isolatedDataSource == null)
                    isolatedDataSource = new ScanDataSource(options);
                dataSource = isolatedDataSource;
            } else {
                dataSource = new ScanDataSource(options);
            }

            try {

                SortedKeyValueIterator<Key, Value> iter;

                if (options.isolated) {
                    if (isolatedIter == null)
                        isolatedIter = new SourceSwitchingIterator(dataSource, true);
                    else
                        isolatedDataSource.fileManager.reattach();
                    iter = isolatedIter;
                } else {
                    iter = new SourceSwitchingIterator(dataSource, false);
                }

                results = nextBatch(iter, range, options.num, options.columnSet);

                if (results.results == null) {
                    range = null;
                    return new ScanBatch(new ArrayList<Tablet.KVEntry>(), false);
                } else if (results.continueKey == null) {
                    return new ScanBatch(results.results, false);
                } else {
                    range = new Range(results.continueKey, !results.skipContinueKey, range.getEndKey(),
                            range.isEndKeyInclusive());
                    return new ScanBatch(results.results, true);
                }

            } catch (IterationInterruptedException iie) {
                sawException = true;
                if (isClosed())
                    throw new TabletClosedException(iie);
                else
                    throw iie;
            } catch (IOException ioe) {
                if (shutdownInProgress()) {
                    log.debug("IOException while shutdown in progress ", ioe);
                    throw new TabletClosedException(ioe); // assume IOException was caused by execution of HDFS shutdown hook
                }

                sawException = true;
                dataSource.close(true);
                throw ioe;
            } catch (RuntimeException re) {
                sawException = true;
                throw re;
            } finally {
                // code in finally block because always want
                // to return mapfiles, even when exception is thrown
                if (!options.isolated)
                    dataSource.close(false);
                else if (dataSource.fileManager != null)
                    dataSource.fileManager.detach();

                synchronized (Tablet.this) {
                    if (results != null && results.results != null) {
                        long more = results.results.size();
                        queryCount += more;
                        queryBytes += results.numBytes;
                    }
                }
            }
        }

        // close and read are synchronized because can not call close on the data source while it is in use
        // this cloud lead to the case where file iterators that are in use by a thread are returned
        // to the pool... this would be bad
        void close() {
            options.interruptFlag.set(true);
            synchronized (this) {
                scanClosed = true;
                if (isolatedDataSource != null)
                    isolatedDataSource.close(false);
            }
        }
    }

    static class ScanOptions {

        // scan options
        Authorizations authorizations;
        byte[] defaultLabels;
        Set<Column> columnSet;
        List<IterInfo> ssiList;
        Map<String, Map<String, String>> ssio;
        AtomicBoolean interruptFlag;
        int num;
        boolean isolated;

        ScanOptions(int num, Authorizations authorizations, byte[] defaultLabels, Set<Column> columnSet,
                List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, AtomicBoolean interruptFlag,
                boolean isolated) {
            this.num = num;
            this.authorizations = authorizations;
            this.defaultLabels = defaultLabels;
            this.columnSet = columnSet;
            this.ssiList = ssiList;
            this.ssio = ssio;
            this.interruptFlag = interruptFlag;
            this.isolated = isolated;
        }

    }

    class ScanDataSource implements DataSource {

        // data source state
        private ScanFileManager fileManager;
        private SortedKeyValueIterator<Key, Value> iter;
        private long expectedDeletionCount;
        private List<MemoryIterator> memIters = null;
        private long fileReservationId;
        private AtomicBoolean interruptFlag;
        private StatsIterator statsIterator;

        ScanOptions options;

        ScanDataSource(Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet,
                List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, AtomicBoolean interruptFlag) {
            expectedDeletionCount = dataSourceDeletions.get();
            this.options = new ScanOptions(-1, authorizations, defaultLabels, columnSet, ssiList, ssio,
                    interruptFlag, false);
            this.interruptFlag = interruptFlag;
        }

        ScanDataSource(ScanOptions options) {
            expectedDeletionCount = dataSourceDeletions.get();
            this.options = options;
            this.interruptFlag = options.interruptFlag;
        }

        @Override
        public DataSource getNewDataSource() {
            if (!isCurrent()) {
                // log.debug("Switching data sources during a scan");
                if (memIters != null) {
                    tabletMemory.returnIterators(memIters);
                    memIters = null;
                    datafileManager.returnFilesForScan(fileReservationId);
                    fileReservationId = -1;
                }

                if (fileManager != null)
                    fileManager.releaseOpenFiles(false);

                expectedDeletionCount = dataSourceDeletions.get();
                iter = null;

                return this;
            } else
                return this;
        }

        @Override
        public boolean isCurrent() {
            return expectedDeletionCount == dataSourceDeletions.get();
        }

        @Override
        public SortedKeyValueIterator<Key, Value> iterator() throws IOException {
            if (iter == null)
                iter = createIterator();
            return iter;
        }

        private SortedKeyValueIterator<Key, Value> createIterator() throws IOException {

            Map<FileRef, DataFileValue> files;

            synchronized (Tablet.this) {

                if (memIters != null)
                    throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");

                if (Tablet.this.closed)
                    throw new TabletClosedException();

                if (interruptFlag.get())
                    throw new IterationInterruptedException(extent.toString() + " " + interruptFlag.hashCode());

                // only acquire the file manager when we know the tablet is open
                if (fileManager == null) {
                    fileManager = tabletResources.newScanFileManager();
                    activeScans.add(this);
                }

                if (fileManager.getNumOpenFiles() != 0)
                    throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");

                // set this before trying to get iterators in case
                // getIterators() throws an exception
                expectedDeletionCount = dataSourceDeletions.get();

                memIters = tabletMemory.getIterators();
                Pair<Long, Map<FileRef, DataFileValue>> reservation = datafileManager.reserveFilesForScan();
                fileReservationId = reservation.getFirst();
                files = reservation.getSecond();
            }

            Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, options.isolated);

            List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<SortedKeyValueIterator<Key, Value>>(
                    mapfiles.size() + memIters.size());

            iters.addAll(mapfiles);
            iters.addAll(memIters);

            for (SortedKeyValueIterator<Key, Value> skvi : iters)
                ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);

            MultiIterator multiIter = new MultiIterator(iters, extent);

            TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(IteratorScope.scan, acuTableConf,
                    fileManager, files);

            statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, scannedCount);

            DeletingIterator delIter = new DeletingIterator(statsIterator, false);

            ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);

            ColumnQualifierFilter colFilter = new ColumnQualifierFilter(cfsi, options.columnSet);

            VisibilityFilter visFilter = new VisibilityFilter(colFilter, options.authorizations,
                    options.defaultLabels);

            return iterEnv.getTopLevelIterator(IteratorUtil.loadIterators(IteratorScope.scan, visFilter, extent,
                    acuTableConf, options.ssiList, options.ssio, iterEnv));
        }

        private void close(boolean sawErrors) {

            if (memIters != null) {
                tabletMemory.returnIterators(memIters);
                memIters = null;
                datafileManager.returnFilesForScan(fileReservationId);
                fileReservationId = -1;
            }

            synchronized (Tablet.this) {
                activeScans.remove(this);
                if (activeScans.size() == 0)
                    Tablet.this.notifyAll();
            }

            if (fileManager != null) {
                fileManager.releaseOpenFiles(sawErrors);
                fileManager = null;
            }

            if (statsIterator != null) {
                statsIterator.report();
            }

        }

        public void interrupt() {
            interruptFlag.set(true);
        }

        @Override
        public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
            throw new UnsupportedOperationException();
        }

    }

    private DataFileValue minorCompact(Configuration conf, VolumeManager fs, InMemoryMap memTable,
            FileRef tmpDatafile, FileRef newDatafile, FileRef mergeFile, boolean hasQueueTime, long queued,
            CommitSession commitSession, long flushId, MinorCompactionReason mincReason) {
        boolean failed = false;
        long start = System.currentTimeMillis();
        timer.incrementStatusMinor();

        long count = 0;

        try {
            Span span = Trace.start("write");
            CompactionStats stats;
            try {
                count = memTable.getNumEntries();

                DataFileValue dfv = null;
                if (mergeFile != null)
                    dfv = datafileManager.getDatafileSizes().get(mergeFile);

                MinorCompactor compactor = new MinorCompactor(conf, fs, memTable, mergeFile, dfv, tmpDatafile,
                        acuTableConf, extent, mincReason);
                stats = compactor.call();
            } finally {
                span.stop();
            }
            span = Trace.start("bringOnline");
            try {
                datafileManager.bringMinorCompactionOnline(tmpDatafile, newDatafile, mergeFile,
                        new DataFileValue(stats.getFileSize(), stats.getEntriesWritten()), commitSession, flushId);
            } finally {
                span.stop();
            }
            return new DataFileValue(stats.getFileSize(), stats.getEntriesWritten());
        } catch (Exception E) {
            failed = true;
            throw new RuntimeException(E);
        } catch (Error E) {
            // Weird errors like "OutOfMemoryError" when trying to create the thread for the compaction
            failed = true;
            throw new RuntimeException(E);
        } finally {
            try {
                tabletMemory.finalizeMinC();
            } catch (Throwable t) {
                log.error("Failed to free tablet memory", t);
            }

            if (!failed) {
                lastMinorCompactionFinishTime = System.currentTimeMillis();
            }
            if (tabletServer.mincMetrics.isEnabled())
                tabletServer.mincMetrics.add(TabletServerMinCMetrics.minc, (lastMinorCompactionFinishTime - start));
            if (hasQueueTime) {
                timer.updateTime(Operation.MINOR, queued, start, count, failed);
                if (tabletServer.mincMetrics.isEnabled())
                    tabletServer.mincMetrics.add(TabletServerMinCMetrics.queue, (start - queued));
            } else
                timer.updateTime(Operation.MINOR, start, count, failed);
        }
    }

    private class MinorCompactionTask implements Runnable {

        private long queued;
        private CommitSession commitSession;
        private DataFileValue stats;
        private FileRef mergeFile;
        private long flushId;
        private MinorCompactionReason mincReason;

        MinorCompactionTask(FileRef mergeFile, CommitSession commitSession, long flushId,
                MinorCompactionReason mincReason) {
            queued = System.currentTimeMillis();
            minorCompactionWaitingToStart = true;
            this.commitSession = commitSession;
            this.mergeFile = mergeFile;
            this.flushId = flushId;
            this.mincReason = mincReason;
        }

        @Override
        public void run() {
            minorCompactionWaitingToStart = false;
            minorCompactionInProgress = true;
            Span minorCompaction = Trace.on("minorCompaction");
            try {
                FileRef newMapfileLocation = getNextMapFilename(mergeFile == null ? "F" : "M");
                FileRef tmpFileRef = new FileRef(newMapfileLocation.path() + "_tmp");
                Span span = Trace.start("waitForCommits");
                synchronized (Tablet.this) {
                    commitSession.waitForCommitsToFinish();
                }
                span.stop();
                span = Trace.start("start");
                while (true) {
                    try {
                        // the purpose of the minor compaction start event is to keep track of the filename... in the case
                        // where the metadata table write for the minor compaction finishes and the process dies before
                        // writing the minor compaction finish event, then the start event+filename in metadata table will
                        // prevent recovery of duplicate data... the minor compaction start event could be written at any time
                        // before the metadata write for the minor compaction
                        tabletServer.minorCompactionStarted(commitSession, commitSession.getWALogSeq() + 1,
                                newMapfileLocation.path().toString());
                        break;
                    } catch (IOException e) {
                        log.warn("Failed to write to write ahead log " + e.getMessage(), e);
                    }
                }
                span.stop();
                span = Trace.start("compact");
                this.stats = minorCompact(conf, fs, tabletMemory.getMinCMemTable(), tmpFileRef, newMapfileLocation,
                        mergeFile, true, queued, commitSession, flushId, mincReason);
                span.stop();

                if (needsSplit()) {
                    tabletServer.executeSplit(Tablet.this);
                } else {
                    initiateMajorCompaction(MajorCompactionReason.NORMAL);
                }
            } catch (Throwable t) {
                log.error("Unknown error during minor compaction for extent: " + getExtent(), t);
                throw new RuntimeException(t);
            } finally {
                minorCompactionInProgress = false;
                minorCompaction.data("extent", extent.toString());
                minorCompaction.data("numEntries", Long.toString(this.stats.getNumEntries()));
                minorCompaction.data("size", Long.toString(this.stats.getSize()));
                minorCompaction.stop();
            }
        }
    }

    private synchronized MinorCompactionTask prepareForMinC(long flushId, MinorCompactionReason mincReason) {
        CommitSession oldCommitSession = tabletMemory.prepareForMinC();
        otherLogs = currentLogs;
        currentLogs = new HashSet<DfsLogger>();

        FileRef mergeFile = datafileManager.reserveMergingMinorCompactionFile();

        return new MinorCompactionTask(mergeFile, oldCommitSession, flushId, mincReason);

    }

    void flush(long tableFlushID) {
        boolean updateMetadata = false;
        boolean initiateMinor = false;

        try {

            synchronized (this) {

                // only want one thing at a time to update flush ID to ensure that metadata table and tablet in memory state are consistent
                if (updatingFlushID)
                    return;

                if (lastFlushID >= tableFlushID)
                    return;

                if (closing || closed || tabletMemory.memoryReservedForMinC())
                    return;

                if (tabletMemory.getMemTable().getNumEntries() == 0) {
                    lastFlushID = tableFlushID;
                    updatingFlushID = true;
                    updateMetadata = true;
                } else
                    initiateMinor = true;
            }

            if (updateMetadata) {
                Credentials creds = SystemCredentials.get();
                // if multiple threads were allowed to update this outside of a sync block, then it would be
                // a race condition
                MetadataTableUtil.updateTabletFlushID(extent, tableFlushID, creds, tabletServer.getLock());
            } else if (initiateMinor)
                initiateMinorCompaction(tableFlushID, MinorCompactionReason.USER);

        } finally {
            if (updateMetadata) {
                synchronized (this) {
                    updatingFlushID = false;
                    this.notifyAll();
                }
            }
        }

    }

    boolean initiateMinorCompaction(MinorCompactionReason mincReason) {
        if (isClosed()) {
            // don't bother trying to get flush id if closed... could be closed after this check but that is ok... just trying to cut down on uneeded log messages....
            return false;
        }

        // get the flush id before the new memmap is made available for write
        long flushId;
        try {
            flushId = getFlushID();
        } catch (NoNodeException e) {
            log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage());
            return false;
        }
        return initiateMinorCompaction(flushId, mincReason);
    }

    boolean minorCompactNow(MinorCompactionReason mincReason) {
        long flushId;
        try {
            flushId = getFlushID();
        } catch (NoNodeException e) {
            log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage());
            return false;
        }
        MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason);
        if (mct == null)
            return false;
        mct.run();
        return true;
    }

    boolean initiateMinorCompaction(long flushId, MinorCompactionReason mincReason) {
        MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason);
        if (mct == null)
            return false;
        tabletResources.executeMinorCompaction(mct);
        return true;
    }

    private MinorCompactionTask createMinorCompactionTask(long flushId, MinorCompactionReason mincReason) {
        MinorCompactionTask mct;
        long t1, t2;

        StringBuilder logMessage = null;

        try {
            synchronized (this) {
                t1 = System.currentTimeMillis();

                if (closing || closed || majorCompactionWaitingToStart || tabletMemory.memoryReservedForMinC()
                        || tabletMemory.getMemTable().getNumEntries() == 0 || updatingFlushID) {

                    logMessage = new StringBuilder();

                    logMessage.append(extent.toString());
                    logMessage.append(" closing " + closing);
                    logMessage.append(" closed " + closed);
                    logMessage.append(" majorCompactionWaitingToStart " + majorCompactionWaitingToStart);
                    if (tabletMemory != null)
                        logMessage.append(
                                " tabletMemory.memoryReservedForMinC() " + tabletMemory.memoryReservedForMinC());
                    if (tabletMemory != null && tabletMemory.getMemTable() != null)
                        logMessage.append(" tabletMemory.getMemTable().getNumEntries() "
                                + tabletMemory.getMemTable().getNumEntries());
                    logMessage.append(" updatingFlushID " + updatingFlushID);

                    return null;
                }
                // We're still recovering log entries
                if (datafileManager == null) {
                    logMessage = new StringBuilder();
                    logMessage.append(extent.toString());
                    logMessage.append(" datafileManager " + datafileManager);
                    return null;
                }

                mct = prepareForMinC(flushId, mincReason);
                t2 = System.currentTimeMillis();
            }
        } finally {
            // log outside of sync block
            if (logMessage != null && log.isDebugEnabled())
                log.debug(logMessage);
        }

        log.debug(String.format("MinC initiate lock %.2f secs", (t2 - t1) / 1000.0));
        return mct;
    }

    long getFlushID() throws NoNodeException {
        try {
            String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID()
                    + Constants.ZTABLES + "/" + extent.getTableId() + Constants.ZTABLE_FLUSH_ID;
            return Long.parseLong(new String(ZooReaderWriter.getRetryingInstance().getData(zTablePath, null)));
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (NumberFormatException nfe) {
            throw new RuntimeException(nfe);
        } catch (KeeperException ke) {
            if (ke instanceof NoNodeException) {
                throw (NoNodeException) ke;
            } else {
                throw new RuntimeException(ke);
            }
        }
    }

    long getCompactionCancelID() {
        String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID()
                + Constants.ZTABLES + "/" + extent.getTableId() + Constants.ZTABLE_COMPACT_CANCEL_ID;

        try {
            return Long.parseLong(new String(ZooReaderWriter.getRetryingInstance().getData(zTablePath, null)));
        } catch (KeeperException e) {
            throw new RuntimeException(e);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }

    Pair<Long, List<IteratorSetting>> getCompactionID() throws NoNodeException {
        try {
            String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID()
                    + Constants.ZTABLES + "/" + extent.getTableId() + Constants.ZTABLE_COMPACT_ID;

            String[] tokens = new String(ZooReaderWriter.getRetryingInstance().getData(zTablePath, null))
                    .split(",");
            long compactID = Long.parseLong(tokens[0]);

            CompactionIterators iters = new CompactionIterators();

            if (tokens.length > 1) {
                Hex hex = new Hex();
                ByteArrayInputStream bais = new ByteArrayInputStream(
                        hex.decode(tokens[1].split("=")[1].getBytes()));
                DataInputStream dis = new DataInputStream(bais);

                try {
                    iters.readFields(dis);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }

                KeyExtent ke = new KeyExtent(extent.getTableId(), iters.getEndRow(), iters.getStartRow());

                if (!ke.overlaps(extent)) {
                    // only use iterators if compaction range overlaps
                    iters = new CompactionIterators();
                }
            }

            return new Pair<Long, List<IteratorSetting>>(compactID, iters.getIterators());
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (NumberFormatException nfe) {
            throw new RuntimeException(nfe);
        } catch (KeeperException ke) {
            if (ke instanceof NoNodeException) {
                throw (NoNodeException) ke;
            } else {
                throw new RuntimeException(ke);
            }
        } catch (DecoderException e) {
            throw new RuntimeException(e);
        }
    }

    public synchronized void waitForMinC() {
        tabletMemory.waitForMinC();
    }

    static class TConstraintViolationException extends Exception {
        private static final long serialVersionUID = 1L;
        private Violations violations;
        private List<Mutation> violators;
        private List<Mutation> nonViolators;
        private CommitSession commitSession;

        TConstraintViolationException(Violations violations, List<Mutation> violators, List<Mutation> nonViolators,
                CommitSession commitSession) {
            this.violations = violations;
            this.violators = violators;
            this.nonViolators = nonViolators;
            this.commitSession = commitSession;
        }

        Violations getViolations() {
            return violations;
        }

        List<Mutation> getViolators() {
            return violators;
        }

        List<Mutation> getNonViolators() {
            return nonViolators;
        }

        CommitSession getCommitSession() {
            return commitSession;
        }
    }

    private synchronized CommitSession finishPreparingMutations(long time) {
        if (writesInProgress < 0) {
            throw new IllegalStateException("waitingForLogs < 0 " + writesInProgress);
        }

        if (closed || tabletMemory == null) {
            // log.debug("tablet closed, can't commit");
            return null;
        }

        writesInProgress++;
        CommitSession commitSession = tabletMemory.getCommitSession();
        commitSession.incrementCommitsInProgress();
        commitSession.updateMaxCommittedTime(time);
        return commitSession;
    }

    public void checkConstraints() {
        ConstraintChecker cc = constraintChecker.get();

        if (cc.classLoaderChanged()) {
            ConstraintChecker ncc = new ConstraintChecker(getTableConfiguration());
            constraintChecker.compareAndSet(cc, ncc);
        }
    }

    public CommitSession prepareMutationsForCommit(TservConstraintEnv cenv, List<Mutation> mutations)
            throws TConstraintViolationException {

        ConstraintChecker cc = constraintChecker.get();

        List<Mutation> violators = null;
        Violations violations = new Violations();
        cenv.setExtent(extent);
        for (Mutation mutation : mutations) {
            Violations more = cc.check(cenv, mutation);
            if (more != null) {
                violations.add(more);
                if (violators == null)
                    violators = new ArrayList<Mutation>();
                violators.add(mutation);
            }
        }

        long time = tabletTime.setUpdateTimes(mutations);

        if (!violations.isEmpty()) {

            HashSet<Mutation> violatorsSet = new HashSet<Mutation>(violators);
            ArrayList<Mutation> nonViolators = new ArrayList<Mutation>();

            for (Mutation mutation : mutations) {
                if (!violatorsSet.contains(mutation)) {
                    nonViolators.add(mutation);
                }
            }

            CommitSession commitSession = null;

            if (nonViolators.size() > 0) {
                // if everything is a violation, then it is expected that
                // code calling this will not log or commit
                commitSession = finishPreparingMutations(time);
                if (commitSession == null)
                    return null;
            }

            throw new TConstraintViolationException(violations, violators, nonViolators, commitSession);
        }

        return finishPreparingMutations(time);
    }

    public synchronized void abortCommit(CommitSession commitSession, List<Mutation> value) {
        if (writesInProgress <= 0) {
            throw new IllegalStateException("waitingForLogs <= 0 " + writesInProgress);
        }

        if (closeComplete || tabletMemory == null) {
            throw new IllegalStateException("aborting commit when tablet is closed");
        }

        commitSession.decrementCommitsInProgress();
        writesInProgress--;
        if (writesInProgress == 0)
            this.notifyAll();
    }

    public void commit(CommitSession commitSession, List<Mutation> mutations) {

        int totalCount = 0;
        long totalBytes = 0;

        // write the mutation to the in memory table
        for (Mutation mutation : mutations) {
            totalCount += mutation.size();
            totalBytes += mutation.numBytes();
        }

        tabletMemory.mutate(commitSession, mutations);

        synchronized (this) {
            if (writesInProgress < 1) {
                throw new IllegalStateException(
                        "commiting mutations after logging, but not waiting for any log messages");
            }

            if (closed && closeComplete) {
                throw new IllegalStateException("tablet closed with outstanding messages to the logger");
            }

            tabletMemory.updateMemoryUsageStats();

            // decrement here in case an exception is thrown below
            writesInProgress--;
            if (writesInProgress == 0)
                this.notifyAll();

            commitSession.decrementCommitsInProgress();

            numEntries += totalCount;
            numEntriesInMemory += totalCount;
            ingestCount += totalCount;
            ingestBytes += totalBytes;
        }
    }

    /**
     * Closes the mapfiles associated with a Tablet. If saveState is true, a minor compaction is performed.
     */
    public void close(boolean saveState) throws IOException {
        initiateClose(saveState, false, false);
        completeClose(saveState, true);
    }

    void initiateClose(boolean saveState, boolean queueMinC, boolean disableWrites) {

        if (!saveState && queueMinC) {
            throw new IllegalArgumentException(
                    "Not saving state on close and requesting minor compactions queue does not make sense");
        }

        log.debug("initiateClose(saveState=" + saveState + " queueMinC=" + queueMinC + " disableWrites="
                + disableWrites + ") " + getExtent());

        MinorCompactionTask mct = null;

        synchronized (this) {
            if (closed || closing || closeComplete) {
                String msg = "Tablet " + getExtent() + " already";
                if (closed)
                    msg += " closed";
                if (closing)
                    msg += " closing";
                if (closeComplete)
                    msg += " closeComplete";
                throw new IllegalStateException(msg);
            }

            // enter the closing state, no splits, minor, or major compactions can start
            // should cause running major compactions to stop
            closing = true;
            this.notifyAll();

            // determines if inserts and queries can still continue while minor compacting
            closed = disableWrites;

            // wait for major compactions to finish, setting closing to
            // true should cause any running major compactions to abort
            while (majorCompactionInProgress) {
                try {
                    this.wait(50);
                } catch (InterruptedException e) {
                    log.error(e.toString());
                }
            }

            while (updatingFlushID) {
                try {
                    this.wait(50);
                } catch (InterruptedException e) {
                    log.error(e.toString());
                }
            }

            if (!saveState || tabletMemory.getMemTable().getNumEntries() == 0) {
                return;
            }

            tabletMemory.waitForMinC();

            try {
                mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
            } catch (NoNodeException e) {
                throw new RuntimeException(e);
            }

            if (queueMinC) {
                tabletResources.executeMinorCompaction(mct);
                return;
            }

        }

        // do minor compaction outside of synch block so that tablet can be read and written to while
        // compaction runs
        mct.run();
    }

    private boolean closeCompleting = false;

    synchronized void completeClose(boolean saveState, boolean completeClose) throws IOException {

        if (!closing || closeComplete || closeCompleting) {
            throw new IllegalStateException("closing = " + closing + " closed = " + closed + " closeComplete = "
                    + closeComplete + " closeCompleting = " + closeCompleting);
        }

        log.debug("completeClose(saveState=" + saveState + " completeClose=" + completeClose + ") " + getExtent());

        // ensure this method is only called once, also guards against multiple
        // threads entering the method at the same time
        closeCompleting = true;
        closed = true;

        // modify dataSourceDeletions so scans will try to switch data sources and fail because the tablet is closed
        dataSourceDeletions.incrementAndGet();

        for (ScanDataSource activeScan : activeScans) {
            activeScan.interrupt();
        }

        // wait for reads and writes to complete
        while (writesInProgress > 0 || activeScans.size() > 0) {
            try {
                this.wait(50);
            } catch (InterruptedException e) {
                log.error(e.toString());
            }
        }

        tabletMemory.waitForMinC();

        if (saveState && tabletMemory.getMemTable().getNumEntries() > 0) {
            try {
                prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
            } catch (NoNodeException e) {
                throw new RuntimeException(e);
            }
        }

        if (saveState) {
            // at this point all tablet data is flushed, so do a consistency check
            RuntimeException err = null;
            for (int i = 0; i < 5; i++) {
                try {
                    closeConsistencyCheck();
                    err = null;
                } catch (RuntimeException t) {
                    err = t;
                    log.error("Consistency check fails, retrying " + t);
                    UtilWaitThread.sleep(500);
                }
            }
            if (err != null) {
                ProblemReports.getInstance().report(new ProblemReport(extent.getTableId().toString(),
                        ProblemType.TABLET_LOAD, this.extent.toString(), err));
                log.error(
                        "Tablet closed consistency check has failed for " + this.extent + " giving up and closing");
            }
        }

        try {
            tabletMemory.getMemTable().delete(0);
        } catch (Throwable t) {
            log.error("Failed to delete mem table : " + t.getMessage(), t);
        }

        tabletMemory = null;

        // close map files
        tabletResources.close();

        log.log(TLevel.TABLET_HIST, extent + " closed");

        acuTableConf.removeObserver(configObserver);

        closeComplete = completeClose;
    }

    private void closeConsistencyCheck() {

        if (tabletMemory.getMemTable().getNumEntries() != 0) {
            String msg = "Closed tablet " + extent + " has " + tabletMemory.getMemTable().getNumEntries()
                    + " entries in memory";
            log.error(msg);
            throw new RuntimeException(msg);
        }

        if (tabletMemory.memoryReservedForMinC()) {
            String msg = "Closed tablet " + extent + " has minor compacting memory";
            log.error(msg);
            throw new RuntimeException(msg);
        }

        try {
            Pair<List<LogEntry>, SortedMap<FileRef, DataFileValue>> fileLog = MetadataTableUtil
                    .getFileAndLogEntries(SystemCredentials.get(), extent);

            if (fileLog.getFirst().size() != 0) {
                String msg = "Closed tablet " + extent + " has walog entries in " + MetadataTable.NAME + " "
                        + fileLog.getFirst();
                log.error(msg);
                throw new RuntimeException(msg);
            }

            if (extent.isRootTablet()) {
                if (!fileLog.getSecond().keySet().equals(datafileManager.getDatafileSizes().keySet())) {
                    String msg = "Data file in " + RootTable.NAME + " differ from in memory data " + extent + "  "
                            + fileLog.getSecond().keySet() + "  " + datafileManager.getDatafileSizes().keySet();
                    log.error(msg);
                    throw new RuntimeException(msg);
                }
            } else {
                if (!fileLog.getSecond().equals(datafileManager.getDatafileSizes())) {
                    String msg = "Data file in " + MetadataTable.NAME + " differ from in memory data " + extent
                            + "  " + fileLog.getSecond() + "  " + datafileManager.getDatafileSizes();
                    log.error(msg);
                    throw new RuntimeException(msg);
                }
            }

        } catch (Exception e) {
            String msg = "Failed to do close consistency check for tablet " + extent;
            log.error(msg, e);
            throw new RuntimeException(msg, e);

        }

        if (otherLogs.size() != 0 || currentLogs.size() != 0) {
            String msg = "Closed tablet " + extent + " has walog entries in memory currentLogs = " + currentLogs
                    + "  otherLogs = " + otherLogs;
            log.error(msg);
            throw new RuntimeException(msg);
        }

        // TODO check lastFlushID and lostCompactID - ACCUMULO-1290
    }

    /**
     * Returns a Path object representing the tablet's location on the DFS.
     * 
     * @return location
     */
    public Path getLocation() {
        return location;
    }

    private class CompactionRunner implements Runnable, Comparable<CompactionRunner> {

        long queued;
        long start;
        boolean failed = false;
        private MajorCompactionReason reason;

        public CompactionRunner(MajorCompactionReason reason) {
            queued = System.currentTimeMillis();
            this.reason = reason;
        }

        @Override
        public void run() {
            CompactionStats majCStats = null;

            if (tabletServer.isMajorCompactionDisabled()) {
                // this will make compaction task that were queued when shutdown was
                // initiated exit
                majorCompactionQueued.remove(reason);
                return;
            }

            try {
                timer.incrementStatusMajor();
                start = System.currentTimeMillis();
                majCStats = majorCompact(reason);

                // if there is more work to be done, queue another major compaction
                synchronized (Tablet.this) {
                    if (reason == MajorCompactionReason.NORMAL && needsMajorCompaction(reason))
                        initiateMajorCompaction(reason);
                }

            } catch (RuntimeException E) {
                failed = true;
            } finally {
                long count = 0;
                if (majCStats != null) {
                    count = majCStats.getEntriesRead();
                }

                timer.updateTime(Operation.MAJOR, queued, start, count, failed);
            }
        }

        // We used to synchronize on the Tablet before fetching this information,
        // but this method is called by the compaction queue thread to re-order the compactions.
        // The compaction queue holds a lock during this sort.
        // A tablet lock can be held while putting itself on the queue, so we can't lock the tablet
        // while pulling information used to sort the tablets in the queue, or we may get deadlocked.
        // See ACCUMULO-1110.
        private int getNumFiles() {
            return datafileManager.datafileSizes.size();
        }

        @Override
        public int compareTo(CompactionRunner o) {
            int cmp = reason.compareTo(o.reason);
            if (cmp != 0)
                return cmp;

            if (reason == MajorCompactionReason.USER || reason == MajorCompactionReason.CHOP) {
                // for these types of compactions want to do the oldest first
                cmp = (int) (queued - o.queued);
                if (cmp != 0)
                    return cmp;
            }

            return o.getNumFiles() - this.getNumFiles();
        }
    }

    synchronized boolean initiateMajorCompaction(MajorCompactionReason reason) {

        if (closing || closed || !needsMajorCompaction(reason) || majorCompactionInProgress
                || majorCompactionQueued.contains(reason)) {
            return false;
        }

        majorCompactionQueued.add(reason);

        tabletResources.executeMajorCompaction(getExtent(), new CompactionRunner(reason));

        return false;
    }

    /**
     * Returns true if a major compaction should be performed on the tablet.
     * 
     */
    public boolean needsMajorCompaction(MajorCompactionReason reason) {
        if (majorCompactionInProgress)
            return false;
        if (reason == MajorCompactionReason.CHOP || reason == MajorCompactionReason.USER)
            return true;
        return tabletResources.needsMajorCompaction(datafileManager.getDatafileSizes(), reason);
    }

    private class CompactionTuple {
        private Map<FileRef, Long> filesToCompact;
        private boolean compactAll;

        public CompactionTuple(Map<FileRef, Long> filesToCompact, boolean doAll) {
            this.filesToCompact = filesToCompact;
            compactAll = doAll;
        }

        public Map<FileRef, Long> getFilesToCompact() {
            return filesToCompact;
        }

        public boolean getCompactAll() {
            return compactAll;
        }
    }

    /**
     * Returns list of files that need to be compacted by major compactor
     */

    private CompactionTuple getFilesToCompact(MajorCompactionReason reason, Map<FileRef, Pair<Key, Key>> falks) {
        SortedMap<FileRef, DataFileValue> files = datafileManager.getDatafileSizes();

        Map<FileRef, Long> toCompact;
        if (reason == MajorCompactionReason.CHOP) {
            toCompact = findChopFiles(files, falks);
        } else {
            toCompact = tabletResources.findMapFilesToCompact(files, reason);
        }
        if (toCompact == null)
            return null;
        return new CompactionTuple(toCompact, toCompact.size() == files.size());
    }

    private Map<FileRef, Pair<Key, Key>> getFirstAndLastKeys(SortedMap<FileRef, DataFileValue> files)
            throws IOException {
        FileOperations fileFactory = FileOperations.getInstance();

        Map<FileRef, Pair<Key, Key>> falks = new HashMap<FileRef, Pair<Key, Key>>();

        for (Entry<FileRef, DataFileValue> entry : files.entrySet()) {
            FileRef file = entry.getKey();
            FileSystem ns = fs.getFileSystemByPath(file.path());
            FileSKVIterator openReader = fileFactory.openReader(file.path().toString(), true, ns, ns.getConf(),
                    acuTableConf);
            try {
                Key first = openReader.getFirstKey();
                Key last = openReader.getLastKey();
                falks.put(file, new Pair<Key, Key>(first, last));
            } finally {
                openReader.close();
            }
        }
        return falks;
    }

    private Map<FileRef, Long> findChopFiles(SortedMap<FileRef, DataFileValue> files,
            Map<FileRef, Pair<Key, Key>> falks) {

        Map<FileRef, Long> result = new HashMap<FileRef, Long>();

        for (Entry<FileRef, DataFileValue> entry : files.entrySet()) {
            FileRef file = entry.getKey();

            Pair<Key, Key> pair = falks.get(file);
            if (pair == null) {
                // file was created or imported after we obtained the first an last keys... there
                // are a few options here... throw an exception which will cause the compaction to
                // retry and also cause ugly error message that the admin has to ignore... could
                // go get the first and last key, but this code is called while the tablet lock
                // is held... or just compact the file....
                result.put(file, entry.getValue().getSize());
            } else {
                Key first = pair.getFirst();
                Key last = pair.getSecond();
                // If first and last are null, it's an empty file. Add it to the compact set so it goes away.
                if ((first == null && last == null) || !this.extent.contains(first.getRow())
                        || !this.extent.contains(last.getRow())) {
                    result.put(file, entry.getValue().getSize());
                }
            }
        }
        return result;

    }

    /**
     * Returns an int representing the total block size of the mapfiles served by this tablet.
     * 
     * @return size
     */
    // this is the size of just the mapfiles
    public long estimateTabletSize() {
        long size = 0L;

        for (DataFileValue sz : datafileManager.getDatafileSizes().values())
            size += sz.getSize();

        return size;
    }

    private boolean sawBigRow = false;
    private long timeOfLastMinCWhenBigFreakinRowWasSeen = 0;
    private long timeOfLastImportWhenBigFreakinRowWasSeen = 0;
    private long splitCreationTime;

    private static class SplitRowSpec {
        double splitRatio;
        Text row;

        SplitRowSpec(double splitRatio, Text row) {
            this.splitRatio = splitRatio;
            this.row = row;
        }
    }

    private SplitRowSpec findSplitRow(Collection<FileRef> files) {

        // never split the root tablet
        // check if we already decided that we can never split
        // check to see if we're big enough to split

        long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
        if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) {
            return null;
        }

        // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred.
        if (sawBigRow) {
            if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime
                    || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) {
                // a minor compaction or map file import has occurred... check again
                sawBigRow = false;
            } else {
                // nothing changed, do not split
                return null;
            }
        }

        SortedMap<Double, Key> keys = null;

        try {
            // we should make .25 below configurable
            keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(),
                    extent.getEndRow(), files, .25);
        } catch (IOException e) {
            log.error("Failed to find midpoint " + e.getMessage());
            return null;
        }

        // check to see if one row takes up most of the tablet, in which case we can not split
        try {

            Text lastRow;
            if (extent.getEndRow() == null) {
                Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files);
                lastRow = lastKey.getRow();
            } else {
                lastRow = extent.getEndRow();
            }

            // check to see that the midPoint is not equal to the end key
            if (keys.get(.5).compareRow(lastRow) == 0) {
                if (keys.firstKey() < .5) {
                    Key candidate = keys.get(keys.firstKey());
                    if (candidate.compareRow(lastRow) != 0) {
                        // we should use this ratio in split size estimations
                        if (log.isTraceEnabled())
                            log.trace(String.format(
                                    "Splitting at %6.2f instead of .5, row at .5 is same as end row%n",
                                    keys.firstKey()));
                        return new SplitRowSpec(keys.firstKey(), candidate.getRow());
                    }

                }

                log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow);

                sawBigRow = true;
                timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
                timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

                return null;
            }
            Key mid = keys.get(.5);
            Text text = (mid == null) ? null : mid.getRow();
            SortedMap<Double, Key> firstHalf = keys.headMap(.5);
            if (firstHalf.size() > 0) {
                Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow();
                Text shorter = new Text();
                int trunc = longestCommonLength(text, beforeMid);
                shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1));
                text = shorter;
            }
            return new SplitRowSpec(.5, text);
        } catch (IOException e) {
            // don't split now, but check again later
            log.error("Failed to find lastkey " + e.getMessage());
            return null;
        }
    }

    private static int longestCommonLength(Text text, Text beforeMid) {
        int common = 0;
        while (common < text.getLength() && common < beforeMid.getLength()
                && text.getBytes()[common] == beforeMid.getBytes()[common]) {
            common++;
        }
        return common;
    }

    /**
     * Returns true if this tablet needs to be split
     * 
     */
    public synchronized boolean needsSplit() {
        boolean ret;

        if (closing || closed)
            ret = false;
        else
            ret = findSplitRow(datafileManager.getFiles()) != null;

        return ret;
    }

    // BEGIN PRIVATE METHODS RELATED TO MAJOR COMPACTION

    private boolean isCompactionEnabled() {
        return !closing && !tabletServer.isMajorCompactionDisabled();
    }

    private CompactionStats _majorCompact(MajorCompactionReason reason)
            throws IOException, CompactionCanceledException {

        boolean propogateDeletes;

        long t1, t2, t3;

        // acquire first and last key info outside of tablet lock
        Map<FileRef, Pair<Key, Key>> falks = null;
        if (reason == MajorCompactionReason.CHOP)
            falks = getFirstAndLastKeys(datafileManager.getDatafileSizes());

        Map<FileRef, Long> filesToCompact;

        int maxFilesToCompact = acuTableConf.getCount(Property.TSERV_MAJC_THREAD_MAXOPEN);

        CompactionStats majCStats = new CompactionStats();

        synchronized (this) {
            // plan all that work that needs to be done in the sync block... then do the actual work
            // outside the sync block

            t1 = System.currentTimeMillis();

            majorCompactionWaitingToStart = true;

            tabletMemory.waitForMinC();

            t2 = System.currentTimeMillis();

            majorCompactionWaitingToStart = false;
            notifyAll();

            if (extent.isRootTablet()) {
                // very important that we call this before doing major compaction,
                // otherwise deleted compacted files could possible be brought back
                // at some point if the file they were compacted to was legitimately
                // removed by a major compaction
                cleanUpFiles(fs, fs.listStatus(this.location), false);
            }

            // getFilesToCompact() and cleanUpFiles() both
            // do dir listings, which means two calls to the namenode
            // we should refactor so that there is only one call
            CompactionTuple ret = getFilesToCompact(reason, falks);
            if (ret == null) {
                // nothing to compact
                return majCStats;
            }
            filesToCompact = ret.getFilesToCompact();

            if (!ret.getCompactAll()) {
                // since not all files are being compacted, we want to propagate delete entries
                propogateDeletes = true;
            } else {
                propogateDeletes = false;
            }

            t3 = System.currentTimeMillis();

            datafileManager.reserveMajorCompactingFiles(filesToCompact.keySet());

        }

        try {

            log.debug(String.format("MajC initiate lock %.2f secs, wait %.2f secs", (t3 - t2) / 1000.0,
                    (t2 - t1) / 1000.0));

            Pair<Long, List<IteratorSetting>> compactionId = null;
            if (!propogateDeletes) {
                // compacting everything, so update the compaction id in !METADATA
                try {
                    compactionId = getCompactionID();
                } catch (NoNodeException e) {
                    throw new RuntimeException(e);
                }
            }

            List<IteratorSetting> compactionIterators = new ArrayList<IteratorSetting>();
            if (compactionId != null) {
                if (reason == MajorCompactionReason.USER) {
                    if (getCompactionCancelID() >= compactionId.getFirst()) {
                        // compaction was canceled
                        return majCStats;
                    }

                    synchronized (this) {
                        if (lastCompactID >= compactionId.getFirst())
                            // already compacted
                            return majCStats;
                    }
                }

                compactionIterators = compactionId.getSecond();
            }

            // need to handle case where only one file is being major compacted
            while (filesToCompact.size() > 0) {

                int numToCompact = maxFilesToCompact;

                if (filesToCompact.size() > maxFilesToCompact && filesToCompact.size() < 2 * maxFilesToCompact) {
                    // on the second to last compaction pass, compact the minimum amount of files possible
                    numToCompact = filesToCompact.size() - maxFilesToCompact + 1;
                }

                Set<FileRef> smallestFiles = removeSmallest(filesToCompact, numToCompact);

                FileRef fileName = getNextMapFilename(
                        (filesToCompact.size() == 0 && !propogateDeletes) ? "A" : "C");
                FileRef compactTmpName = new FileRef(fileName.path().toString() + "_tmp");

                Span span = Trace.start("compactFiles");
                try {

                    CompactionEnv cenv = new CompactionEnv() {
                        @Override
                        public boolean isCompactionEnabled() {
                            return Tablet.this.isCompactionEnabled();
                        }

                        @Override
                        public IteratorScope getIteratorScope() {
                            return IteratorScope.majc;
                        }
                    };

                    HashMap<FileRef, DataFileValue> copy = new HashMap<FileRef, DataFileValue>(
                            datafileManager.getDatafileSizes());
                    if (!copy.keySet().containsAll(smallestFiles))
                        throw new IllegalStateException("Cannot find data file values for " + smallestFiles);

                    copy.keySet().retainAll(smallestFiles);

                    log.debug("Starting MajC " + extent + " (" + reason + ") " + copy.keySet() + " --> "
                            + compactTmpName + "  " + compactionIterators);

                    // always propagate deletes, unless last batch
                    Compactor compactor = new Compactor(conf, fs, copy, null, compactTmpName,
                            filesToCompact.size() == 0 ? propogateDeletes : true, acuTableConf, extent, cenv,
                            compactionIterators, reason);

                    CompactionStats mcs = compactor.call();

                    span.data("files", "" + smallestFiles.size());
                    span.data("read", "" + mcs.getEntriesRead());
                    span.data("written", "" + mcs.getEntriesWritten());
                    majCStats.add(mcs);

                    datafileManager.bringMajorCompactionOnline(smallestFiles, compactTmpName, fileName,
                            filesToCompact.size() == 0 && compactionId != null ? compactionId.getFirst() : null,
                            new DataFileValue(mcs.getFileSize(), mcs.getEntriesWritten()));

                    // when major compaction produces a file w/ zero entries, it will be deleted... do not want
                    // to add the deleted file
                    if (filesToCompact.size() > 0 && mcs.getEntriesWritten() > 0) {
                        filesToCompact.put(fileName, mcs.getFileSize());
                    }
                } finally {
                    span.stop();
                }

            }

            return majCStats;
        } finally {
            synchronized (Tablet.this) {
                datafileManager.clearMajorCompactingFile();
            }
        }
    }

    private Set<FileRef> removeSmallest(Map<FileRef, Long> filesToCompact, int maxFilesToCompact) {
        // ensure this method works properly when multiple files have the same size

        PriorityQueue<Pair<FileRef, Long>> fileHeap = new PriorityQueue<Pair<FileRef, Long>>(filesToCompact.size(),
                new Comparator<Pair<FileRef, Long>>() {
                    @Override
                    public int compare(Pair<FileRef, Long> o1, Pair<FileRef, Long> o2) {
                        if (o1.getSecond() == o2.getSecond())
                            return o1.getFirst().compareTo(o2.getFirst());
                        if (o1.getSecond() < o2.getSecond())
                            return -1;
                        return 1;
                    }
                });

        for (Iterator<Entry<FileRef, Long>> iterator = filesToCompact.entrySet().iterator(); iterator.hasNext();) {
            Entry<FileRef, Long> entry = iterator.next();
            fileHeap.add(new Pair<FileRef, Long>(entry.getKey(), entry.getValue()));
        }

        Set<FileRef> smallestFiles = new HashSet<FileRef>();
        while (smallestFiles.size() < maxFilesToCompact && fileHeap.size() > 0) {
            Pair<FileRef, Long> pair = fileHeap.remove();
            filesToCompact.remove(pair.getFirst());
            smallestFiles.add(pair.getFirst());
        }

        return smallestFiles;
    }

    // END PRIVATE METHODS RELATED TO MAJOR COMPACTION

    /**
     * Performs a major compaction on the tablet. If needsSplit() returns true, the tablet is split and a reference to the new tablet is returned.
     */

    private CompactionStats majorCompact(MajorCompactionReason reason) {

        CompactionStats majCStats = null;

        // Always trace majC
        Span span = Trace.on("majorCompaction");

        try {
            synchronized (this) {
                // check that compaction is still needed - defer to splitting
                majorCompactionQueued.remove(reason);

                if (closing || closed || !needsMajorCompaction(reason) || majorCompactionInProgress
                        || needsSplit()) {
                    return null;
                }

                majorCompactionInProgress = true;
            }

            majCStats = _majorCompact(reason);
            if (reason == MajorCompactionReason.CHOP) {
                MetadataTableUtil.chopped(getExtent(), this.tabletServer.getLock());
                tabletServer.enqueueMasterMessage(new TabletStatusMessage(TabletLoadState.CHOPPED, extent));
            }
        } catch (CompactionCanceledException mcce) {
            log.debug("Major compaction canceled, extent = " + getExtent());
            throw new RuntimeException(mcce);
        } catch (Throwable t) {
            log.error("MajC Failed, extent = " + getExtent());
            log.error(
                    "MajC Failed, message = " + (t.getMessage() == null ? t.getClass().getName() : t.getMessage()),
                    t);
            throw new RuntimeException(t);
        } finally {
            // ensure we always reset boolean, even
            // when an exception is thrown
            synchronized (this) {
                majorCompactionInProgress = false;
                this.notifyAll();
            }

            Span curr = Trace.currentTrace();
            curr.data("extent", "" + getExtent());
            if (majCStats != null) {
                curr.data("read", "" + majCStats.getEntriesRead());
                curr.data("written", "" + majCStats.getEntriesWritten());
            }
            span.stop();
        }

        return majCStats;
    }

    /**
     * Returns a KeyExtent object representing this tablet's key range.
     * 
     * @return extent
     */
    public KeyExtent getExtent() {
        return extent;
    }

    private synchronized void computeNumEntries() {
        Collection<DataFileValue> vals = datafileManager.getDatafileSizes().values();

        long numEntries = 0;

        for (DataFileValue tableValue : vals) {
            numEntries += tableValue.getNumEntries();
        }

        this.numEntriesInMemory = tabletMemory.getNumEntries();
        numEntries += tabletMemory.getNumEntries();

        this.numEntries = numEntries;
    }

    public long getNumEntries() {
        return numEntries;
    }

    public long getNumEntriesInMemory() {
        return numEntriesInMemory;
    }

    public synchronized boolean isClosing() {
        return closing;
    }

    public synchronized boolean isClosed() {
        return closed;
    }

    public synchronized boolean isCloseComplete() {
        return closeComplete;
    }

    public boolean majorCompactionRunning() {
        return this.majorCompactionInProgress;
    }

    public boolean minorCompactionQueued() {
        return minorCompactionWaitingToStart;
    }

    public boolean minorCompactionRunning() {
        return minorCompactionInProgress;
    }

    public boolean majorCompactionQueued() {
        return majorCompactionQueued.size() > 0;
    }

    /**
     * operations are disallowed while we split which is ok since splitting is fast
     * 
     * a minor compaction should have taken place before calling this so there should be relatively little left to compact
     * 
     * we just need to make sure major compactions aren't occurring if we have the major compactor thread decide who needs splitting we can avoid synchronization
     * issues with major compactions
     * 
     */

    static class SplitInfo {
        String dir;
        SortedMap<FileRef, DataFileValue> datafiles;
        String time;
        long initFlushID;
        long initCompactID;

        SplitInfo(String d, SortedMap<FileRef, DataFileValue> dfv, String time, long initFlushID,
                long initCompactID) {
            this.dir = d;
            this.datafiles = dfv;
            this.time = time;
            this.initFlushID = initFlushID;
            this.initCompactID = initCompactID;
        }

    }

    public TreeMap<KeyExtent, SplitInfo> split(byte[] sp) throws IOException {

        if (sp != null && extent.getEndRow() != null && extent.getEndRow().equals(new Text(sp))) {
            throw new IllegalArgumentException();
        }

        if (extent.isRootTablet()) {
            String msg = "Cannot split root tablet";
            log.warn(msg);
            throw new RuntimeException(msg);
        }

        try {
            initiateClose(true, false, false);
        } catch (IllegalStateException ise) {
            log.debug("File " + extent + " not splitting : " + ise.getMessage());
            return null;
        }

        // obtain this info outside of synch block since it will involve opening
        // the map files... it is ok if the set of map files changes, because
        // this info is used for optimization... it is ok if map files are missing
        // from the set... can still query and insert into the tablet while this
        // map file operation is happening
        Map<FileRef, FileUtil.FileInfo> firstAndLastRows = FileUtil.tryToGetFirstAndLastRows(fs,
                tabletServer.getSystemConfiguration(), datafileManager.getFiles());

        synchronized (this) {
            // java needs tuples ...
            TreeMap<KeyExtent, SplitInfo> newTablets = new TreeMap<KeyExtent, SplitInfo>();

            long t1 = System.currentTimeMillis();

            // choose a split point
            SplitRowSpec splitPoint;
            if (sp == null)
                splitPoint = findSplitRow(datafileManager.getFiles());
            else {
                Text tsp = new Text(sp);
                splitPoint = new SplitRowSpec(
                        FileUtil.estimatePercentageLTE(fs, tabletServer.getSystemConfiguration(),
                                extent.getPrevEndRow(), extent.getEndRow(), datafileManager.getFiles(), tsp),
                        tsp);
            }

            if (splitPoint == null || splitPoint.row == null) {
                log.info("had to abort split because splitRow was null");
                closing = false;
                return null;
            }

            closed = true;
            completeClose(true, false);

            Text midRow = splitPoint.row;
            double splitRatio = splitPoint.splitRatio;

            KeyExtent low = new KeyExtent(extent.getTableId(), midRow, extent.getPrevEndRow());
            KeyExtent high = new KeyExtent(extent.getTableId(), extent.getEndRow(), midRow);

            String lowDirectory = TabletOperations.createTabletDirectory(fs, extent.getTableId().toString(),
                    midRow);

            // write new tablet information to MetadataTable
            SortedMap<FileRef, DataFileValue> lowDatafileSizes = new TreeMap<FileRef, DataFileValue>();
            SortedMap<FileRef, DataFileValue> highDatafileSizes = new TreeMap<FileRef, DataFileValue>();
            List<FileRef> highDatafilesToRemove = new ArrayList<FileRef>();

            MetadataTableUtil.splitDatafiles(extent.getTableId(), midRow, splitRatio, firstAndLastRows,
                    datafileManager.getDatafileSizes(), lowDatafileSizes, highDatafileSizes, highDatafilesToRemove);

            log.debug("Files for low split " + low + "  " + lowDatafileSizes.keySet());
            log.debug("Files for high split " + high + "  " + highDatafileSizes.keySet());

            String time = tabletTime.getMetadataValue();

            // it is possible that some of the bulk loading flags will be deleted after being read below because the bulk load
            // finishes.... therefore split could propogate load flags for a finished bulk load... there is a special iterator
            // on the !METADATA table to clean up this type of garbage
            Map<FileRef, Long> bulkLoadedFiles = MetadataTableUtil.getBulkFilesLoaded(SystemCredentials.get(),
                    extent);

            MetadataTableUtil.splitTablet(high, extent.getPrevEndRow(), splitRatio, SystemCredentials.get(),
                    tabletServer.getLock());
            MetadataTableUtil.addNewTablet(low, lowDirectory, tabletServer.getTabletSession(), lowDatafileSizes,
                    bulkLoadedFiles, SystemCredentials.get(), time, lastFlushID, lastCompactID,
                    tabletServer.getLock());
            MetadataTableUtil.finishSplit(high, highDatafileSizes, highDatafilesToRemove, SystemCredentials.get(),
                    tabletServer.getLock());

            log.log(TLevel.TABLET_HIST, extent + " split " + low + " " + high);

            newTablets.put(high,
                    new SplitInfo(tabletDirectory, highDatafileSizes, time, lastFlushID, lastCompactID));
            newTablets.put(low, new SplitInfo(lowDirectory, lowDatafileSizes, time, lastFlushID, lastCompactID));

            long t2 = System.currentTimeMillis();

            log.debug(String.format("offline split time : %6.2f secs", (t2 - t1) / 1000.0));

            closeComplete = true;

            return newTablets;
        }
    }

    public SortedMap<FileRef, DataFileValue> getDatafiles() {
        return datafileManager.getDatafileSizes();
    }

    public double queryRate() {
        return queryRate.rate();
    }

    public double queryByteRate() {
        return queryByteRate.rate();
    }

    public double ingestRate() {
        return ingestRate.rate();
    }

    public double ingestByteRate() {
        return ingestByteRate.rate();
    }

    public double scanRate() {
        return scannedRate.rate();
    }

    public long totalQueries() {
        return this.queryCount;
    }

    public long totalIngest() {
        return this.ingestCount;
    }

    // synchronized?
    public void updateRates(long now) {
        queryRate.update(now, queryCount);
        queryByteRate.update(now, queryBytes);
        ingestRate.update(now, ingestCount);
        ingestByteRate.update(now, ingestBytes);
        scannedRate.update(now, scannedCount.get());
    }

    public long getSplitCreationTime() {
        return splitCreationTime;
    }

    public void importMapFiles(long tid, Map<FileRef, MapFileInfo> fileMap, boolean setTime) throws IOException {
        Map<FileRef, DataFileValue> entries = new HashMap<FileRef, DataFileValue>(fileMap.size());

        for (FileRef path : fileMap.keySet()) {
            MapFileInfo mfi = fileMap.get(path);
            entries.put(path, new DataFileValue(mfi.estimatedSize, 0l));
        }

        // Clients timeout and will think that this operation failed.
        // Don't do it if we spent too long waiting for the lock
        long now = System.currentTimeMillis();
        synchronized (this) {
            if (closed) {
                throw new IOException("tablet " + extent + " is closed");
            }

            // TODO check seems uneeded now - ACCUMULO-1291
            long lockWait = System.currentTimeMillis() - now;
            if (lockWait > tabletServer.getSystemConfiguration().getTimeInMillis(Property.GENERAL_RPC_TIMEOUT)) {
                throw new IOException("Timeout waiting " + (lockWait / 1000.) + " seconds to get tablet lock");
            }

            if (writesInProgress < 0)
                throw new IllegalStateException("writesInProgress < 0 " + writesInProgress);

            writesInProgress++;
        }

        try {
            datafileManager.importMapFiles(tid, entries, setTime);
            lastMapFileImportTime = System.currentTimeMillis();

            if (needsSplit()) {
                tabletServer.executeSplit(this);
            } else {
                initiateMajorCompaction(MajorCompactionReason.NORMAL);
            }
        } finally {
            synchronized (this) {
                if (writesInProgress < 1)
                    throw new IllegalStateException("writesInProgress < 1 " + writesInProgress);

                writesInProgress--;
                if (writesInProgress == 0)
                    this.notifyAll();
            }
        }
    }

    private Set<DfsLogger> currentLogs = new HashSet<DfsLogger>();

    public Set<String> getCurrentLogs() {
        Set<String> result = new HashSet<String>();
        synchronized (currentLogs) {
            for (DfsLogger log : currentLogs) {
                result.add(log.toString());
            }
        }
        return result;
    }

    private Set<String> beginClearingUnusedLogs() {
        Set<String> doomed = new HashSet<String>();

        ArrayList<String> otherLogsCopy = new ArrayList<String>();
        ArrayList<String> currentLogsCopy = new ArrayList<String>();

        // do not hold tablet lock while acquiring the log lock
        logLock.lock();

        synchronized (this) {
            if (removingLogs)
                throw new IllegalStateException("Attempted to clear logs when removal of logs in progress");

            for (DfsLogger logger : otherLogs) {
                otherLogsCopy.add(logger.toString());
                doomed.add(logger.toString());
            }

            for (DfsLogger logger : currentLogs) {
                currentLogsCopy.add(logger.toString());
                doomed.remove(logger.toString());
            }

            otherLogs = Collections.emptySet();

            if (doomed.size() > 0)
                removingLogs = true;
        }

        // do debug logging outside tablet lock
        for (String logger : otherLogsCopy) {
            log.debug("Logs for memory compacted: " + getExtent() + " " + logger.toString());
        }

        for (String logger : currentLogsCopy) {
            log.debug("Logs for current memory: " + getExtent() + " " + logger);
        }

        return doomed;
    }

    private synchronized void finishClearingUnusedLogs() {
        removingLogs = false;
        logLock.unlock();
    }

    private Set<DfsLogger> otherLogs = Collections.emptySet();
    private boolean removingLogs = false;

    // this lock is basically used to synchronize writing of log info to !METADATA
    private final ReentrantLock logLock = new ReentrantLock();

    public synchronized int getLogCount() {
        return currentLogs.size();
    }

    private boolean beginUpdatingLogsUsed(InMemoryMap memTable, Collection<DfsLogger> more, boolean mincFinish) {

        boolean releaseLock = true;

        // do not hold tablet lock while acquiring the log lock
        logLock.lock();

        try {
            synchronized (this) {

                if (closed && closeComplete) {
                    throw new IllegalStateException("Can not update logs of closed tablet " + extent);
                }

                boolean addToOther;

                if (memTable == tabletMemory.otherMemTable)
                    addToOther = true;
                else if (memTable == tabletMemory.memTable)
                    addToOther = false;
                else
                    throw new IllegalArgumentException("passed in memtable that is not in use");

                if (mincFinish) {
                    if (addToOther)
                        throw new IllegalStateException("Adding to other logs for mincFinish");
                    if (otherLogs.size() != 0)
                        throw new IllegalStateException(
                                "Expect other logs to be 0 when min finish, but its " + otherLogs);

                    // when writing a minc finish event, there is no need to add the log to !METADATA
                    // if nothing has been logged for the tablet since the minor compaction started
                    if (currentLogs.size() == 0)
                        return false;
                }

                int numAdded = 0;
                int numContained = 0;
                for (DfsLogger logger : more) {
                    if (addToOther) {
                        if (otherLogs.add(logger))
                            numAdded++;

                        if (currentLogs.contains(logger))
                            numContained++;
                    } else {
                        if (currentLogs.add(logger))
                            numAdded++;

                        if (otherLogs.contains(logger))
                            numContained++;
                    }
                }

                if (numAdded > 0 && numAdded != more.size()) {
                    // expect to add all or none
                    throw new IllegalArgumentException(
                            "Added subset of logs " + extent + " " + more + " " + currentLogs);
                }

                if (numContained > 0 && numContained != more.size()) {
                    // expect to contain all or none
                    throw new IllegalArgumentException(
                            "Other logs contained subset of logs " + extent + " " + more + " " + otherLogs);
                }

                if (numAdded > 0 && numContained == 0) {
                    releaseLock = false;
                    return true;
                }

                return false;
            }
        } finally {
            if (releaseLock)
                logLock.unlock();
        }
    }

    private void finishUpdatingLogsUsed() {
        logLock.unlock();
    }

    synchronized public void chopFiles() {
        initiateMajorCompaction(MajorCompactionReason.CHOP);
    }

    public void compactAll(long compactionId) {
        boolean updateMetadata = false;

        synchronized (this) {
            if (lastCompactID >= compactionId)
                return;

            if (closing || closed || majorCompactionQueued.contains(MajorCompactionReason.USER)
                    || majorCompactionInProgress)
                return;

            if (datafileManager.getDatafileSizes().size() == 0) {
                // no files, so jsut update the metadata table
                majorCompactionInProgress = true;
                updateMetadata = true;
                lastCompactID = compactionId;
            } else
                initiateMajorCompaction(MajorCompactionReason.USER);
        }

        if (updateMetadata) {
            try {
                // if multiple threads were allowed to update this outside of a sync block, then it would be
                // a race condition
                MetadataTableUtil.updateTabletCompactID(extent, compactionId, SystemCredentials.get(),
                        tabletServer.getLock());
            } finally {
                synchronized (this) {
                    majorCompactionInProgress = false;
                    this.notifyAll();
                }
            }
        }
    }

    public TableConfiguration getTableConfiguration() {
        return tabletServer.getTableConfiguration(extent);
    }
}