Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.tserver.tablet; import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly; import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.PriorityQueue; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantLock; import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.client.Durability; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.admin.CompactionStrategyConfig; import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.DurabilityImpl; import org.apache.accumulo.core.client.impl.Tables; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.ConfigurationCopy; import org.apache.accumulo.core.conf.ConfigurationObserver; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.constraints.Violations; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Column; import org.apache.accumulo.core.data.ColumnUpdate; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.data.impl.KeyExtent; import org.apache.accumulo.core.data.thrift.IterInfo; import org.apache.accumulo.core.data.thrift.MapFileInfo; import org.apache.accumulo.core.file.FileOperations; import org.apache.accumulo.core.file.FileSKVIterator; import org.apache.accumulo.core.iterators.IterationInterruptedException; import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator; import org.apache.accumulo.core.master.thrift.BulkImportState; import org.apache.accumulo.core.master.thrift.TabletLoadState; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.protobuf.ProtobufUtil; import org.apache.accumulo.core.replication.ReplicationConfigurationUtil; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.accumulo.core.tabletserver.log.LogEntry; import org.apache.accumulo.core.tabletserver.thrift.TabletStats; import org.apache.accumulo.core.trace.ProbabilitySampler; import org.apache.accumulo.core.trace.Span; import org.apache.accumulo.core.trace.Trace; import org.apache.accumulo.core.util.LocalityGroupUtil; import org.apache.accumulo.core.util.Pair; import org.apache.accumulo.core.util.ratelimit.RateLimiter; import org.apache.accumulo.server.ServerConstants; import org.apache.accumulo.server.conf.TableConfiguration; import org.apache.accumulo.server.fs.FileRef; import org.apache.accumulo.server.fs.VolumeManager; import org.apache.accumulo.server.fs.VolumeManager.FileType; import org.apache.accumulo.server.fs.VolumeUtil; import org.apache.accumulo.server.fs.VolumeUtil.TabletFiles; import org.apache.accumulo.server.master.state.TServerInstance; import org.apache.accumulo.server.master.tableOps.UserCompactionConfig; import org.apache.accumulo.server.metrics.Metrics; import org.apache.accumulo.server.problems.ProblemReport; import org.apache.accumulo.server.problems.ProblemReports; import org.apache.accumulo.server.problems.ProblemType; import org.apache.accumulo.server.replication.StatusUtil; import org.apache.accumulo.server.replication.proto.Replication.Status; import org.apache.accumulo.server.tablets.TabletTime; import org.apache.accumulo.server.tablets.UniqueNameAllocator; import org.apache.accumulo.server.util.FileUtil; import org.apache.accumulo.server.util.MasterMetadataUtil; import org.apache.accumulo.server.util.MetadataTableUtil; import org.apache.accumulo.server.util.ReplicationTableUtil; import org.apache.accumulo.server.zookeeper.ZooReaderWriter; import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader; import org.apache.accumulo.tserver.ConditionCheckerContext.ConditionChecker; import org.apache.accumulo.tserver.InMemoryMap; import org.apache.accumulo.tserver.MinorCompactionReason; import org.apache.accumulo.tserver.TConstraintViolationException; import org.apache.accumulo.tserver.TLevel; import org.apache.accumulo.tserver.TabletServer; import org.apache.accumulo.tserver.TabletServerResourceManager.TabletResourceManager; import org.apache.accumulo.tserver.TabletStatsKeeper; import org.apache.accumulo.tserver.TabletStatsKeeper.Operation; import org.apache.accumulo.tserver.TooManyFilesException; import org.apache.accumulo.tserver.TservConstraintEnv; import org.apache.accumulo.tserver.compaction.CompactionPlan; import org.apache.accumulo.tserver.compaction.CompactionStrategy; import org.apache.accumulo.tserver.compaction.DefaultCompactionStrategy; import org.apache.accumulo.tserver.compaction.MajorCompactionReason; import org.apache.accumulo.tserver.compaction.MajorCompactionRequest; import org.apache.accumulo.tserver.compaction.WriteParameters; import org.apache.accumulo.tserver.constraints.ConstraintChecker; import org.apache.accumulo.tserver.log.DfsLogger; import org.apache.accumulo.tserver.log.MutationReceiver; import org.apache.accumulo.tserver.mastermessage.TabletStatusMessage; import org.apache.accumulo.tserver.metrics.TabletServerMinCMetrics; import org.apache.accumulo.tserver.tablet.Compactor.CompactionCanceledException; import org.apache.accumulo.tserver.tablet.Compactor.CompactionEnv; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.log4j.Logger; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; import com.google.common.annotations.VisibleForTesting; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; /** * * Provide access to a single row range in a living TabletServer. * */ public class Tablet implements TabletCommitter { static private final Logger log = Logger.getLogger(Tablet.class); private final TabletServer tabletServer; private final KeyExtent extent; private final TabletResourceManager tabletResources; private final DatafileManager datafileManager; private final TableConfiguration tableConfiguration; private final String tabletDirectory; private final Path location; // absolute path of this tablets dir private final TabletMemory tabletMemory; private final TabletTime tabletTime; private final Object timeLock = new Object(); private long persistedTime; private TServerInstance lastLocation = null; private volatile boolean tableDirChecked = false; private final AtomicLong dataSourceDeletions = new AtomicLong(0); public long getDataSourceDeletions() { return dataSourceDeletions.get(); } private final Set<ScanDataSource> activeScans = new HashSet<ScanDataSource>(); private static enum CloseState { OPEN, CLOSING, CLOSED, COMPLETE } private volatile CloseState closeState = CloseState.OPEN; private boolean updatingFlushID = false; private long lastFlushID = -1; private long lastCompactID = -1; private static class CompactionWaitInfo { long flushID = -1; long compactionID = -1; } // stores info about user initiated major compaction that is waiting on a minor compaction to finish private final CompactionWaitInfo compactionWaitInfo = new CompactionWaitInfo(); static enum CompactionState { WAITING_TO_START, IN_PROGRESS }; private volatile CompactionState minorCompactionState = null; private volatile CompactionState majorCompactionState = null; private final Set<MajorCompactionReason> majorCompactionQueued = Collections .synchronizedSet(EnumSet.noneOf(MajorCompactionReason.class)); private final AtomicReference<ConstraintChecker> constraintChecker = new AtomicReference<ConstraintChecker>(); private int writesInProgress = 0; private final TabletStatsKeeper timer = new TabletStatsKeeper(); private final Rate queryRate = new Rate(0.95); private long queryCount = 0; private final Rate queryByteRate = new Rate(0.95); private long queryBytes = 0; private final Rate ingestRate = new Rate(0.95); private long ingestCount = 0; private final Rate ingestByteRate = new Rate(0.95); private long ingestBytes = 0; private byte[] defaultSecurityLabel = new byte[0]; private long lastMinorCompactionFinishTime = 0; private long lastMapFileImportTime = 0; private volatile long numEntries = 0; private volatile long numEntriesInMemory = 0; private final Rate scannedRate = new Rate(0.95); private final AtomicLong scannedCount = new AtomicLong(0); private final ConfigurationObserver configObserver; private final Cache<Long, List<FileRef>> bulkImported = CacheBuilder.newBuilder().build(); private final int logId; @Override public int getLogId() { return logId; } public static class LookupResult { public List<Range> unfinishedRanges = new ArrayList<Range>(); public long bytesAdded = 0; public long dataSize = 0; public boolean closed = false; } FileRef getNextMapFilename(String prefix) throws IOException { String extension = FileOperations.getNewFileExtension(tableConfiguration); checkTabletDir(); return new FileRef(location.toString() + "/" + prefix + UniqueNameAllocator.getInstance().getNextName() + "." + extension); } private void checkTabletDir() throws IOException { if (!tableDirChecked) { FileStatus[] files = null; try { files = getTabletServer().getFileSystem().listStatus(location); } catch (FileNotFoundException ex) { // ignored } if (files == null) { if (location.getName().startsWith(Constants.CLONE_PREFIX)) log.debug("Tablet " + extent + " had no dir, creating " + location); // its a clone dir... else log.warn("Tablet " + extent + " had no dir, creating " + location); getTabletServer().getFileSystem().mkdirs(location); } tableDirChecked = true; } } /** * Only visible for testing */ @VisibleForTesting protected Tablet(TabletTime tabletTime, String tabletDirectory, int logId, Path location, DatafileManager datafileManager, TabletServer tabletServer, TabletResourceManager tabletResources, TabletMemory tabletMemory, TableConfiguration tableConfiguration, KeyExtent extent, ConfigurationObserver configObserver) { this.tabletTime = tabletTime; this.tabletDirectory = tabletDirectory; this.logId = logId; this.location = location; this.datafileManager = datafileManager; this.tabletServer = tabletServer; this.tabletResources = tabletResources; this.tabletMemory = tabletMemory; this.tableConfiguration = tableConfiguration; this.extent = extent; this.configObserver = configObserver; this.splitCreationTime = 0; } public Tablet(final TabletServer tabletServer, final KeyExtent extent, final TabletResourceManager trm, TabletData data) throws IOException { this.tabletServer = tabletServer; this.extent = extent; this.tabletResources = trm; this.lastLocation = data.getLastLocation(); this.lastFlushID = data.getFlushID(); this.lastCompactID = data.getCompactID(); this.splitCreationTime = data.getSplitTime(); this.tabletTime = TabletTime.getInstance(data.getTime()); this.persistedTime = tabletTime.getTime(); this.logId = tabletServer.createLogId(extent); TableConfiguration tblConf = tabletServer.getTableConfiguration(extent); if (null == tblConf) { Tables.clearCache(tabletServer.getInstance()); tblConf = tabletServer.getTableConfiguration(extent); requireNonNull(tblConf, "Could not get table configuration for " + extent.getTableId()); } this.tableConfiguration = tblConf; // translate any volume changes VolumeManager fs = tabletServer.getFileSystem(); boolean replicationEnabled = ReplicationConfigurationUtil.isEnabled(extent, this.tableConfiguration); TabletFiles tabletPaths = new TabletFiles(data.getDirectory(), data.getLogEntris(), data.getDataFiles()); tabletPaths = VolumeUtil.updateTabletVolumes(tabletServer, tabletServer.getLock(), fs, extent, tabletPaths, replicationEnabled); // deal with relative path for the directory Path locationPath; if (tabletPaths.dir.contains(":")) { locationPath = new Path(tabletPaths.dir); } else { locationPath = tabletServer.getFileSystem().getFullPath(FileType.TABLE, extent.getTableId() + tabletPaths.dir); } this.location = locationPath; this.tabletDirectory = tabletPaths.dir; for (Entry<Long, List<FileRef>> entry : data.getBulkImported().entrySet()) { this.bulkImported.put(entry.getKey(), new CopyOnWriteArrayList<FileRef>(entry.getValue())); } setupDefaultSecurityLabels(extent); final List<LogEntry> logEntries = tabletPaths.logEntries; final SortedMap<FileRef, DataFileValue> datafiles = tabletPaths.datafiles; tableConfiguration.addObserver(configObserver = new ConfigurationObserver() { private void reloadConstraints() { log.debug("Reloading constraints for extent: " + extent); constraintChecker.set(new ConstraintChecker(tableConfiguration)); } @Override public void propertiesChanged() { reloadConstraints(); try { setupDefaultSecurityLabels(extent); } catch (Exception e) { log.error("Failed to reload default security labels for extent: " + extent.toString()); } } @Override public void propertyChanged(String prop) { if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey())) reloadConstraints(); else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) { try { log.info("Default security labels changed for extent: " + extent.toString()); setupDefaultSecurityLabels(extent); } catch (Exception e) { log.error("Failed to reload default security labels for extent: " + extent.toString()); } } } @Override public void sessionExpired() { log.debug("Session expired, no longer updating per table props..."); } }); tableConfiguration.getNamespaceConfiguration().addObserver(configObserver); tabletMemory = new TabletMemory(this); // Force a load of any per-table properties configObserver.propertiesChanged(); if (!logEntries.isEmpty()) { log.info("Starting Write-Ahead Log recovery for " + this.extent); final AtomicLong entriesUsedOnTablet = new AtomicLong(0); // track max time from walog entries without timestamps final AtomicLong maxTime = new AtomicLong(Long.MIN_VALUE); final CommitSession commitSession = getTabletMemory().getCommitSession(); try { Set<String> absPaths = new HashSet<String>(); for (FileRef ref : datafiles.keySet()) absPaths.add(ref.path().toString()); tabletServer.recover(this.getTabletServer().getFileSystem(), extent, tableConfiguration, logEntries, absPaths, new MutationReceiver() { @Override public void receive(Mutation m) { // LogReader.printMutation(m); Collection<ColumnUpdate> muts = m.getUpdates(); for (ColumnUpdate columnUpdate : muts) { if (!columnUpdate.hasTimestamp()) { // if it is not a user set timestamp, it must have been set // by the system maxTime.set(Math.max(maxTime.get(), columnUpdate.getTimestamp())); } } getTabletMemory().mutate(commitSession, Collections.singletonList(m)); entriesUsedOnTablet.incrementAndGet(); } }); if (maxTime.get() != Long.MIN_VALUE) { tabletTime.useMaxTimeFromWALog(maxTime.get()); } commitSession.updateMaxCommittedTime(tabletTime.getTime()); if (entriesUsedOnTablet.get() == 0) { log.debug("No replayed mutations applied, removing unused entries for " + extent); MetadataTableUtil.removeUnusedWALEntries(getTabletServer(), extent, logEntries, tabletServer.getLock()); // No replication update to be made because the fact that this tablet didn't use any mutations // from the WAL implies nothing about use of this WAL by other tablets. Do nothing. logEntries.clear(); } else if (ReplicationConfigurationUtil.isEnabled(extent, tabletServer.getTableConfiguration(extent))) { // The logs are about to be re-used by this tablet, we need to record that they have data for this extent, // but that they may get more data. logEntries is not cleared which will cause the elements // in logEntries to be added to the currentLogs for this Tablet below. // // This update serves the same purpose as an update during a MinC. We know that the WAL was defined // (written when the WAL was opened) but this lets us know there are mutations written to this WAL // that could potentially be replicated. Because the Tablet is using this WAL, we can be sure that // the WAL isn't closed (WRT replication Status) and thus we're safe to update its progress. Status status = StatusUtil.openWithUnknownLength(); for (LogEntry logEntry : logEntries) { log.debug("Writing updated status to metadata table for " + logEntry.filename + " " + ProtobufUtil.toString(status)); ReplicationTableUtil.updateFiles(tabletServer, extent, logEntry.filename, status); } } } catch (Throwable t) { if (tableConfiguration.getBoolean(Property.TABLE_FAILURES_IGNORE)) { log.warn("Error recovering from log files: ", t); } else { throw new RuntimeException(t); } } // make some closed references that represent the recovered logs currentLogs = new ConcurrentSkipListSet<DfsLogger>(); for (LogEntry logEntry : logEntries) { currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), logEntry.filename, logEntry.getColumnQualifier().toString())); } log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + entriesUsedOnTablet.get() + " mutations applied, " + getTabletMemory().getNumEntries() + " entries created)"); } String contextName = tableConfiguration.get(Property.TABLE_CLASSPATH); if (contextName != null && !contextName.equals("")) { // initialize context classloader, instead of possibly waiting for it to initialize for a scan // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292 AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName); } // do this last after tablet is completely setup because it // could cause major compaction to start datafileManager = new DatafileManager(this, datafiles); computeNumEntries(); getDatafileManager().removeFilesAfterScan(data.getScanFiles()); // look for hints of a failure on the previous tablet server if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) { // look for any temp files hanging around removeOldTemporaryFiles(); } log.log(TLevel.TABLET_HIST, extent + " opened"); } private void removeOldTemporaryFiles() { // remove any temporary files created by a previous tablet server try { for (FileStatus tmp : getTabletServer().getFileSystem().globStatus(new Path(location, "*_tmp"))) { try { log.debug("Removing old temp file " + tmp.getPath()); getTabletServer().getFileSystem().delete(tmp.getPath()); } catch (IOException ex) { log.error("Unable to remove old temp file " + tmp.getPath() + ": " + ex); } } } catch (IOException ex) { log.error("Error scanning for old temp files in " + location); } } private void setupDefaultSecurityLabels(KeyExtent extent) { if (extent.isMeta()) { defaultSecurityLabel = new byte[0]; } else { try { ColumnVisibility cv = new ColumnVisibility( tableConfiguration.get(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY)); this.defaultSecurityLabel = cv.getExpression(); } catch (Exception e) { log.error(e, e); this.defaultSecurityLabel = new byte[0]; } } } private LookupResult lookup(SortedKeyValueIterator<Key, Value> mmfi, List<Range> ranges, HashSet<Column> columnSet, List<KVEntry> results, long maxResultsSize, long batchTimeOut) throws IOException { LookupResult lookupResult = new LookupResult(); boolean exceededMemoryUsage = false; boolean tabletClosed = false; Set<ByteSequence> cfset = null; if (columnSet.size() > 0) cfset = LocalityGroupUtil.families(columnSet); long returnTime = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(batchTimeOut); if (batchTimeOut <= 0 || batchTimeOut == Long.MAX_VALUE) { batchTimeOut = 0; } for (Range range : ranges) { boolean timesUp = batchTimeOut > 0 && System.nanoTime() > returnTime; if (exceededMemoryUsage || tabletClosed || timesUp) { lookupResult.unfinishedRanges.add(range); continue; } int entriesAdded = 0; try { if (cfset != null) mmfi.seek(range, cfset, true); else mmfi.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false); while (mmfi.hasTop()) { Key key = mmfi.getTopKey(); KVEntry kve = new KVEntry(key, mmfi.getTopValue()); results.add(kve); entriesAdded++; lookupResult.bytesAdded += kve.estimateMemoryUsed(); lookupResult.dataSize += kve.numBytes(); exceededMemoryUsage = lookupResult.bytesAdded > maxResultsSize; timesUp = batchTimeOut > 0 && System.nanoTime() > returnTime; if (exceededMemoryUsage || timesUp) { addUnfinishedRange(lookupResult, range, key, false); break; } mmfi.next(); } } catch (TooManyFilesException tmfe) { // treat this as a closed tablet, and let the client retry log.warn("Tablet " + getExtent() + " has too many files, batch lookup can not run"); handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded); tabletClosed = true; } catch (IOException ioe) { if (shutdownInProgress()) { // assume HDFS shutdown hook caused this exception log.debug("IOException while shutdown in progress ", ioe); handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded); tabletClosed = true; } else { throw ioe; } } catch (IterationInterruptedException iie) { if (isClosed()) { handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded); tabletClosed = true; } else { throw iie; } } catch (TabletClosedException tce) { handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded); tabletClosed = true; } } return lookupResult; } private void handleTabletClosedDuringScan(List<KVEntry> results, LookupResult lookupResult, boolean exceededMemoryUsage, Range range, int entriesAdded) { if (exceededMemoryUsage) throw new IllegalStateException("tablet should not exceed memory usage or close, not both"); if (entriesAdded > 0) addUnfinishedRange(lookupResult, range, results.get(results.size() - 1).getKey(), false); else lookupResult.unfinishedRanges.add(range); lookupResult.closed = true; } private void addUnfinishedRange(LookupResult lookupResult, Range range, Key key, boolean inclusiveStartKey) { if (range.getEndKey() == null || key.compareTo(range.getEndKey()) < 0) { Range nlur = new Range(new Key(key), inclusiveStartKey, range.getEndKey(), range.isEndKeyInclusive()); lookupResult.unfinishedRanges.add(nlur); } } public void checkConditions(ConditionChecker checker, Authorizations authorizations, AtomicBoolean iFlag) throws IOException { ScanDataSource dataSource = new ScanDataSource(this, authorizations, this.defaultSecurityLabel, iFlag); try { SortedKeyValueIterator<Key, Value> iter = new SourceSwitchingIterator(dataSource); checker.check(iter); } catch (IOException ioe) { dataSource.close(true); throw ioe; } finally { // code in finally block because always want // to return mapfiles, even when exception is thrown dataSource.close(false); } } public LookupResult lookup(List<Range> ranges, HashSet<Column> columns, Authorizations authorizations, List<KVEntry> results, long maxResultSize, List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, AtomicBoolean interruptFlag, SamplerConfiguration samplerConfig, long batchTimeOut, String classLoaderContext) throws IOException { if (ranges.size() == 0) { return new LookupResult(); } ranges = Range.mergeOverlapping(ranges); if (ranges.size() > 1) { Collections.sort(ranges); } Range tabletRange = extent.toDataRange(); for (Range range : ranges) { // do a test to see if this range falls within the tablet, if it does not // then clip will throw an exception tabletRange.clip(range); } ScanDataSource dataSource = new ScanDataSource(this, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, samplerConfig, batchTimeOut, classLoaderContext); LookupResult result = null; try { SortedKeyValueIterator<Key, Value> iter = new SourceSwitchingIterator(dataSource); result = lookup(iter, ranges, columns, results, maxResultSize, batchTimeOut); return result; } catch (IOException ioe) { dataSource.close(true); throw ioe; } finally { // code in finally block because always want // to return mapfiles, even when exception is thrown dataSource.close(false); synchronized (this) { queryCount += results.size(); if (result != null) queryBytes += result.dataSize; } } } Batch nextBatch(SortedKeyValueIterator<Key, Value> iter, Range range, int num, Set<Column> columns, long batchTimeOut) throws IOException { // log.info("In nextBatch.."); long stopTime = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(batchTimeOut); if (batchTimeOut == Long.MAX_VALUE || batchTimeOut <= 0) { batchTimeOut = 0; } List<KVEntry> results = new ArrayList<KVEntry>(); Key key = null; Value value; long resultSize = 0L; long resultBytes = 0L; long maxResultsSize = tableConfiguration.getMemoryInBytes(Property.TABLE_SCAN_MAXMEM); if (columns.size() == 0) { iter.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false); } else { iter.seek(range, LocalityGroupUtil.families(columns), true); } Key continueKey = null; boolean skipContinueKey = false; boolean endOfTabletReached = false; while (iter.hasTop()) { value = iter.getTopValue(); key = iter.getTopKey(); KVEntry kvEntry = new KVEntry(key, value); // copies key and value results.add(kvEntry); resultSize += kvEntry.estimateMemoryUsed(); resultBytes += kvEntry.numBytes(); boolean timesUp = batchTimeOut > 0 && System.nanoTime() >= stopTime; if (resultSize >= maxResultsSize || results.size() >= num || timesUp) { continueKey = new Key(key); skipContinueKey = true; break; } iter.next(); } if (iter.hasTop() == false) { endOfTabletReached = true; } if (endOfTabletReached) { continueKey = null; } if (endOfTabletReached && results.size() == 0) results = null; return new Batch(skipContinueKey, results, continueKey, resultBytes); } /** * Determine if a JVM shutdown is in progress. * */ boolean shutdownInProgress() { try { Runtime.getRuntime().removeShutdownHook(new Thread(new Runnable() { @Override public void run() { } })); } catch (IllegalStateException ise) { return true; } return false; } public Scanner createScanner(Range range, int num, Set<Column> columns, Authorizations authorizations, List<IterInfo> ssiList, Map<String, Map<String, String>> ssio, boolean isolated, AtomicBoolean interruptFlag, SamplerConfiguration samplerConfig, long batchTimeOut, String classLoaderContext) { // do a test to see if this range falls within the tablet, if it does not // then clip will throw an exception extent.toDataRange().clip(range); ScanOptions opts = new ScanOptions(num, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, isolated, samplerConfig, batchTimeOut, classLoaderContext); return new Scanner(this, range, opts); } DataFileValue minorCompact(VolumeManager fs, InMemoryMap memTable, FileRef tmpDatafile, FileRef newDatafile, FileRef mergeFile, boolean hasQueueTime, long queued, CommitSession commitSession, long flushId, MinorCompactionReason mincReason) { boolean failed = false; long start = System.currentTimeMillis(); timer.incrementStatusMinor(); long count = 0; String oldName = Thread.currentThread().getName(); try { Thread.currentThread().setName("Minor compacting " + this.extent); Span span = Trace.start("write"); CompactionStats stats; try { count = memTable.getNumEntries(); DataFileValue dfv = null; if (mergeFile != null) dfv = getDatafileManager().getDatafileSizes().get(mergeFile); MinorCompactor compactor = new MinorCompactor(tabletServer, this, memTable, mergeFile, dfv, tmpDatafile, mincReason, tableConfiguration); stats = compactor.call(); } finally { span.stop(); } span = Trace.start("bringOnline"); try { getDatafileManager().bringMinorCompactionOnline(tmpDatafile, newDatafile, mergeFile, new DataFileValue(stats.getFileSize(), stats.getEntriesWritten()), commitSession, flushId); } finally { span.stop(); } return new DataFileValue(stats.getFileSize(), stats.getEntriesWritten()); } catch (Exception e) { failed = true; throw new RuntimeException(e); } catch (Error e) { // Weird errors like "OutOfMemoryError" when trying to create the thread for the compaction failed = true; throw new RuntimeException(e); } finally { Thread.currentThread().setName(oldName); try { getTabletMemory().finalizeMinC(); } catch (Throwable t) { log.error("Failed to free tablet memory", t); } if (!failed) { lastMinorCompactionFinishTime = System.currentTimeMillis(); } Metrics minCMetrics = getTabletServer().getMinCMetrics(); if (minCMetrics.isEnabled()) minCMetrics.add(TabletServerMinCMetrics.MINC, (lastMinorCompactionFinishTime - start)); if (hasQueueTime) { timer.updateTime(Operation.MINOR, queued, start, count, failed); if (minCMetrics.isEnabled()) minCMetrics.add(TabletServerMinCMetrics.QUEUE, (start - queued)); } else timer.updateTime(Operation.MINOR, start, count, failed); } } private synchronized MinorCompactionTask prepareForMinC(long flushId, MinorCompactionReason mincReason) { CommitSession oldCommitSession = getTabletMemory().prepareForMinC(); otherLogs = currentLogs; currentLogs = new ConcurrentSkipListSet<DfsLogger>(); FileRef mergeFile = null; if (mincReason != MinorCompactionReason.RECOVERY) { mergeFile = getDatafileManager().reserveMergingMinorCompactionFile(); } double tracePercent = tabletServer.getConfiguration().getFraction(Property.TSERV_MINC_TRACE_PERCENT); return new MinorCompactionTask(this, mergeFile, oldCommitSession, flushId, mincReason, tracePercent); } public void flush(long tableFlushID) { boolean updateMetadata = false; boolean initiateMinor = false; try { synchronized (this) { // only want one thing at a time to update flush ID to ensure that metadata table and tablet in memory state are consistent if (updatingFlushID) return; if (lastFlushID >= tableFlushID) return; if (isClosing() || isClosed() || getTabletMemory().memoryReservedForMinC()) return; if (getTabletMemory().getMemTable().getNumEntries() == 0) { lastFlushID = tableFlushID; updatingFlushID = true; updateMetadata = true; } else initiateMinor = true; } if (updateMetadata) { // if multiple threads were allowed to update this outside of a sync block, then it would be // a race condition MetadataTableUtil.updateTabletFlushID(extent, tableFlushID, tabletServer, getTabletServer().getLock()); } else if (initiateMinor) initiateMinorCompaction(tableFlushID, MinorCompactionReason.USER); } finally { if (updateMetadata) { synchronized (this) { updatingFlushID = false; this.notifyAll(); } } } } public boolean initiateMinorCompaction(MinorCompactionReason mincReason) { if (isClosed()) { // don't bother trying to get flush id if closed... could be closed after this check but that is ok... just trying to cut down on uneeded log messages.... return false; } // get the flush id before the new memmap is made available for write long flushId; try { flushId = getFlushID(); } catch (NoNodeException e) { log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage()); return false; } return initiateMinorCompaction(flushId, mincReason); } public boolean minorCompactNow(MinorCompactionReason mincReason) { long flushId; try { flushId = getFlushID(); } catch (NoNodeException e) { log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage()); return false; } MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason); if (mct == null) return false; mct.run(); return true; } boolean initiateMinorCompaction(long flushId, MinorCompactionReason mincReason) { MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason); if (mct == null) return false; getTabletResources().executeMinorCompaction(mct); return true; } private MinorCompactionTask createMinorCompactionTask(long flushId, MinorCompactionReason mincReason) { MinorCompactionTask mct; long t1, t2; StringBuilder logMessage = null; try { synchronized (this) { t1 = System.currentTimeMillis(); if (isClosing() || isClosed() || majorCompactionState == CompactionState.WAITING_TO_START || getTabletMemory().memoryReservedForMinC() || getTabletMemory().getMemTable().getNumEntries() == 0 || updatingFlushID) { logMessage = new StringBuilder(); logMessage.append(extent.toString()); logMessage.append(" closeState " + closeState); logMessage.append(" majorCompactionState " + majorCompactionState); if (getTabletMemory() != null) logMessage.append(" tabletMemory.memoryReservedForMinC() " + getTabletMemory().memoryReservedForMinC()); if (getTabletMemory() != null && getTabletMemory().getMemTable() != null) logMessage.append(" tabletMemory.getMemTable().getNumEntries() " + getTabletMemory().getMemTable().getNumEntries()); logMessage.append(" updatingFlushID " + updatingFlushID); return null; } mct = prepareForMinC(flushId, mincReason); t2 = System.currentTimeMillis(); } } finally { // log outside of sync block if (logMessage != null && log.isDebugEnabled()) log.debug(logMessage); } log.debug(String.format("MinC initiate lock %.2f secs", (t2 - t1) / 1000.0)); return mct; } public long getFlushID() throws NoNodeException { try { String zTablePath = Constants.ZROOT + "/" + tabletServer.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId() + Constants.ZTABLE_FLUSH_ID; return Long.parseLong(new String(ZooReaderWriter.getInstance().getData(zTablePath, null), UTF_8)); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (NumberFormatException nfe) { throw new RuntimeException(nfe); } catch (KeeperException ke) { if (ke instanceof NoNodeException) { throw (NoNodeException) ke; } else { throw new RuntimeException(ke); } } } long getCompactionCancelID() { String zTablePath = Constants.ZROOT + "/" + tabletServer.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId() + Constants.ZTABLE_COMPACT_CANCEL_ID; try { return Long.parseLong(new String(ZooReaderWriter.getInstance().getData(zTablePath, null), UTF_8)); } catch (KeeperException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } } public Pair<Long, UserCompactionConfig> getCompactionID() throws NoNodeException { try { String zTablePath = Constants.ZROOT + "/" + tabletServer.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId() + Constants.ZTABLE_COMPACT_ID; String[] tokens = new String(ZooReaderWriter.getInstance().getData(zTablePath, null), UTF_8).split(","); long compactID = Long.parseLong(tokens[0]); UserCompactionConfig compactionConfig = new UserCompactionConfig(); if (tokens.length > 1) { Hex hex = new Hex(); ByteArrayInputStream bais = new ByteArrayInputStream( hex.decode(tokens[1].split("=")[1].getBytes(UTF_8))); DataInputStream dis = new DataInputStream(bais); try { compactionConfig.readFields(dis); } catch (IOException e) { throw new RuntimeException(e); } KeyExtent ke = new KeyExtent(extent.getTableId(), compactionConfig.getEndRow(), compactionConfig.getStartRow()); if (!ke.overlaps(extent)) { // only use iterators if compaction range overlaps compactionConfig = new UserCompactionConfig(); } } return new Pair<Long, UserCompactionConfig>(compactID, compactionConfig); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (NumberFormatException nfe) { throw new RuntimeException(nfe); } catch (KeeperException ke) { if (ke instanceof NoNodeException) { throw (NoNodeException) ke; } else { throw new RuntimeException(ke); } } catch (DecoderException e) { throw new RuntimeException(e); } } private synchronized CommitSession finishPreparingMutations(long time) { if (writesInProgress < 0) { throw new IllegalStateException("waitingForLogs < 0 " + writesInProgress); } if (isClosed() || getTabletMemory() == null) { return null; } writesInProgress++; CommitSession commitSession = getTabletMemory().getCommitSession(); commitSession.incrementCommitsInProgress(); commitSession.updateMaxCommittedTime(time); return commitSession; } public void checkConstraints() { ConstraintChecker cc = constraintChecker.get(); if (cc.classLoaderChanged()) { ConstraintChecker ncc = new ConstraintChecker(tableConfiguration); constraintChecker.compareAndSet(cc, ncc); } } public CommitSession prepareMutationsForCommit(TservConstraintEnv cenv, List<Mutation> mutations) throws TConstraintViolationException { ConstraintChecker cc = constraintChecker.get(); List<Mutation> violators = null; Violations violations = new Violations(); cenv.setExtent(extent); for (Mutation mutation : mutations) { Violations more = cc.check(cenv, mutation); if (more != null) { violations.add(more); if (violators == null) violators = new ArrayList<Mutation>(); violators.add(mutation); } } long time = tabletTime.setUpdateTimes(mutations); if (!violations.isEmpty()) { HashSet<Mutation> violatorsSet = new HashSet<Mutation>(violators); ArrayList<Mutation> nonViolators = new ArrayList<Mutation>(); for (Mutation mutation : mutations) { if (!violatorsSet.contains(mutation)) { nonViolators.add(mutation); } } CommitSession commitSession = null; if (nonViolators.size() > 0) { // if everything is a violation, then it is expected that // code calling this will not log or commit commitSession = finishPreparingMutations(time); if (commitSession == null) return null; } throw new TConstraintViolationException(violations, violators, nonViolators, commitSession); } return finishPreparingMutations(time); } @Override public synchronized void abortCommit(CommitSession commitSession, List<Mutation> value) { if (writesInProgress <= 0) { throw new IllegalStateException("waitingForLogs <= 0 " + writesInProgress); } if (isCloseComplete() || getTabletMemory() == null) { throw new IllegalStateException("aborting commit when tablet is closed"); } commitSession.decrementCommitsInProgress(); writesInProgress--; if (writesInProgress == 0) this.notifyAll(); } @Override public void commit(CommitSession commitSession, List<Mutation> mutations) { int totalCount = 0; long totalBytes = 0; // write the mutation to the in memory table for (Mutation mutation : mutations) { totalCount += mutation.size(); totalBytes += mutation.numBytes(); } getTabletMemory().mutate(commitSession, mutations); synchronized (this) { if (writesInProgress < 1) { throw new IllegalStateException( "commiting mutations after logging, but not waiting for any log messages"); } if (isCloseComplete()) { throw new IllegalStateException("tablet closed with outstanding messages to the logger"); } getTabletMemory().updateMemoryUsageStats(); // decrement here in case an exception is thrown below writesInProgress--; if (writesInProgress == 0) this.notifyAll(); commitSession.decrementCommitsInProgress(); numEntries += totalCount; numEntriesInMemory += totalCount; ingestCount += totalCount; ingestBytes += totalBytes; } } /** * Closes the mapfiles associated with a Tablet. If saveState is true, a minor compaction is performed. */ public void close(boolean saveState) throws IOException { initiateClose(saveState, false, false); completeClose(saveState, true); } void initiateClose(boolean saveState, boolean queueMinC, boolean disableWrites) { if (!saveState && queueMinC) { throw new IllegalArgumentException( "Not saving state on close and requesting minor compactions queue does not make sense"); } log.debug("initiateClose(saveState=" + saveState + " queueMinC=" + queueMinC + " disableWrites=" + disableWrites + ") " + getExtent()); MinorCompactionTask mct = null; synchronized (this) { if (isClosed() || isClosing()) { String msg = "Tablet " + getExtent() + " already " + closeState; throw new IllegalStateException(msg); } // enter the closing state, no splits, minor, or major compactions can start // should cause running major compactions to stop closeState = CloseState.CLOSING; this.notifyAll(); // determines if inserts and queries can still continue while minor compacting if (disableWrites) { closeState = CloseState.CLOSED; } // wait for major compactions to finish, setting closing to // true should cause any running major compactions to abort while (isMajorCompactionRunning()) { try { this.wait(50); } catch (InterruptedException e) { log.error(e.toString()); } } while (updatingFlushID) { try { this.wait(50); } catch (InterruptedException e) { log.error(e.toString()); } } if (!saveState || getTabletMemory().getMemTable().getNumEntries() == 0) { return; } getTabletMemory().waitForMinC(); try { mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE); } catch (NoNodeException e) { throw new RuntimeException(e); } if (queueMinC) { getTabletResources().executeMinorCompaction(mct); return; } } // do minor compaction outside of synch block so that tablet can be read and written to while // compaction runs mct.run(); } private boolean closeCompleting = false; synchronized void completeClose(boolean saveState, boolean completeClose) throws IOException { if (!isClosing() || isCloseComplete() || closeCompleting) { throw new IllegalStateException("closeState = " + closeState); } log.debug("completeClose(saveState=" + saveState + " completeClose=" + completeClose + ") " + getExtent()); // ensure this method is only called once, also guards against multiple // threads entering the method at the same time closeCompleting = true; closeState = CloseState.CLOSED; // modify dataSourceDeletions so scans will try to switch data sources and fail because the tablet is closed dataSourceDeletions.incrementAndGet(); for (ScanDataSource activeScan : activeScans) { activeScan.interrupt(); } // wait for reads and writes to complete while (writesInProgress > 0 || activeScans.size() > 0) { try { this.wait(50); } catch (InterruptedException e) { log.error(e.toString()); } } getTabletMemory().waitForMinC(); if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) { try { prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run(); } catch (NoNodeException e) { throw new RuntimeException(e); } } if (saveState) { // at this point all tablet data is flushed, so do a consistency check RuntimeException err = null; for (int i = 0; i < 5; i++) { try { closeConsistencyCheck(); err = null; } catch (RuntimeException t) { err = t; log.error("Consistency check fails, retrying " + t); sleepUninterruptibly(500, TimeUnit.MILLISECONDS); } } if (err != null) { ProblemReports.getInstance(tabletServer).report(new ProblemReport(extent.getTableId(), ProblemType.TABLET_LOAD, this.extent.toString(), err)); log.error( "Tablet closed consistency check has failed for " + this.extent + " giving up and closing"); } } try { getTabletMemory().getMemTable().delete(0); } catch (Throwable t) { log.error("Failed to delete mem table : " + t.getMessage(), t); } getTabletMemory().close(); // close map files getTabletResources().close(); log.log(TLevel.TABLET_HIST, extent + " closed"); tableConfiguration.getNamespaceConfiguration().removeObserver(configObserver); tableConfiguration.removeObserver(configObserver); if (completeClose) closeState = CloseState.COMPLETE; } private void closeConsistencyCheck() { if (getTabletMemory().getMemTable().getNumEntries() != 0) { String msg = "Closed tablet " + extent + " has " + getTabletMemory().getMemTable().getNumEntries() + " entries in memory"; log.error(msg); throw new RuntimeException(msg); } if (getTabletMemory().memoryReservedForMinC()) { String msg = "Closed tablet " + extent + " has minor compacting memory"; log.error(msg); throw new RuntimeException(msg); } try { Pair<List<LogEntry>, SortedMap<FileRef, DataFileValue>> fileLog = MetadataTableUtil .getFileAndLogEntries(tabletServer, extent); if (fileLog.getFirst().size() != 0) { String msg = "Closed tablet " + extent + " has walog entries in " + MetadataTable.NAME + " " + fileLog.getFirst(); log.error(msg); throw new RuntimeException(msg); } if (extent.isRootTablet()) { if (!fileLog.getSecond().keySet().equals(getDatafileManager().getDatafileSizes().keySet())) { String msg = "Data file in " + RootTable.NAME + " differ from in memory data " + extent + " " + fileLog.getSecond().keySet() + " " + getDatafileManager().getDatafileSizes().keySet(); log.error(msg); throw new RuntimeException(msg); } } else { if (!fileLog.getSecond().equals(getDatafileManager().getDatafileSizes())) { String msg = "Data file in " + MetadataTable.NAME + " differ from in memory data " + extent + " " + fileLog.getSecond() + " " + getDatafileManager().getDatafileSizes(); log.error(msg); throw new RuntimeException(msg); } } } catch (Exception e) { String msg = "Failed to do close consistency check for tablet " + extent; log.error(msg, e); throw new RuntimeException(msg, e); } if (otherLogs.size() != 0 || currentLogs.size() != 0) { String msg = "Closed tablet " + extent + " has walog entries in memory currentLogs = " + currentLogs + " otherLogs = " + otherLogs; log.error(msg); throw new RuntimeException(msg); } // TODO check lastFlushID and lostCompactID - ACCUMULO-1290 } /** * Returns a Path object representing the tablet's location on the DFS. * * @return location */ public Path getLocation() { return location; } public synchronized boolean initiateMajorCompaction(MajorCompactionReason reason) { if (isClosing() || isClosed() || !needsMajorCompaction(reason) || isMajorCompactionRunning() || majorCompactionQueued.contains(reason)) { return false; } majorCompactionQueued.add(reason); getTabletResources().executeMajorCompaction(getExtent(), new CompactionRunner(this, reason)); return false; } /** * Returns true if a major compaction should be performed on the tablet. * */ public boolean needsMajorCompaction(MajorCompactionReason reason) { if (isMajorCompactionRunning()) return false; if (reason == MajorCompactionReason.CHOP || reason == MajorCompactionReason.USER) return true; return getTabletResources().needsMajorCompaction(getDatafileManager().getDatafileSizes(), reason); } /** * Returns an int representing the total block size of the files served by this tablet. * * @return size */ // this is the size of just the files public long estimateTabletSize() { long size = 0L; for (DataFileValue sz : getDatafileManager().getDatafileSizes().values()) size += sz.getSize(); return size; } private boolean sawBigRow = false; private long timeOfLastMinCWhenBigFreakinRowWasSeen = 0; private long timeOfLastImportWhenBigFreakinRowWasSeen = 0; private final long splitCreationTime; private SplitRowSpec findSplitRow(Collection<FileRef> files) { // never split the root tablet // check if we already decided that we can never split // check to see if we're big enough to split long splitThreshold = tableConfiguration.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD); long maxEndRow = tableConfiguration.getMemoryInBytes(Property.TABLE_MAX_END_ROW_SIZE); if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) { return null; } // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred. if (sawBigRow) { if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) { // a minor compaction or map file import has occurred... check again sawBigRow = false; } else { // nothing changed, do not split return null; } } SortedMap<Double, Key> keys = null; try { // we should make .25 below configurable keys = FileUtil.findMidPoint(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(files), .25); } catch (IOException e) { log.error("Failed to find midpoint " + e.getMessage()); return null; } // check to see if one row takes up most of the tablet, in which case we can not split try { Text lastRow; if (extent.getEndRow() == null) { Key lastKey = (Key) FileUtil.findLastKey(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), files); lastRow = lastKey.getRow(); } else { lastRow = extent.getEndRow(); } // We expect to get a midPoint for this set of files. If we don't get one, we have a problem. final Key mid = keys.get(.5); if (null == mid) { throw new IllegalStateException("Could not determine midpoint for files"); } // check to see that the midPoint is not equal to the end key if (mid.compareRow(lastRow) == 0) { if (keys.firstKey() < .5) { Key candidate = keys.get(keys.firstKey()); if (candidate.getLength() > maxEndRow) { log.warn("Cannot split tablet " + extent + ", selected split point too long. Length : " + candidate.getLength()); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } if (candidate.compareRow(lastRow) != 0) { // we should use this ratio in split size estimations if (log.isTraceEnabled()) log.trace(String.format( "Splitting at %6.2f instead of .5, row at .5 is same as end row%n", keys.firstKey())); return new SplitRowSpec(keys.firstKey(), candidate.getRow()); } } log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } Text text = mid.getRow(); SortedMap<Double, Key> firstHalf = keys.headMap(.5); if (firstHalf.size() > 0) { Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow(); Text shorter = new Text(); int trunc = longestCommonLength(text, beforeMid); shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1)); text = shorter; } if (text.getLength() > maxEndRow) { log.warn("Cannot split tablet " + extent + ", selected split point too long. Length : " + text.getLength()); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } return new SplitRowSpec(.5, text); } catch (IOException e) { // don't split now, but check again later log.error("Failed to find lastkey " + e.getMessage()); return null; } } private static int longestCommonLength(Text text, Text beforeMid) { int common = 0; while (common < text.getLength() && common < beforeMid.getLength() && text.getBytes()[common] == beforeMid.getBytes()[common]) { common++; } return common; } private Map<FileRef, Pair<Key, Key>> getFirstAndLastKeys(SortedMap<FileRef, DataFileValue> allFiles) throws IOException { Map<FileRef, Pair<Key, Key>> result = new HashMap<FileRef, Pair<Key, Key>>(); FileOperations fileFactory = FileOperations.getInstance(); VolumeManager fs = getTabletServer().getFileSystem(); for (Entry<FileRef, DataFileValue> entry : allFiles.entrySet()) { FileRef file = entry.getKey(); FileSystem ns = fs.getVolumeByPath(file.path()).getFileSystem(); FileSKVIterator openReader = fileFactory.newReaderBuilder() .forFile(file.path().toString(), ns, ns.getConf()) .withTableConfiguration(this.getTableConfiguration()).seekToBeginning().build(); try { Key first = openReader.getFirstKey(); Key last = openReader.getLastKey(); result.put(file, new Pair<Key, Key>(first, last)); } finally { openReader.close(); } } return result; } List<FileRef> findChopFiles(KeyExtent extent, Map<FileRef, Pair<Key, Key>> firstAndLastKeys, Collection<FileRef> allFiles) throws IOException { List<FileRef> result = new ArrayList<FileRef>(); if (firstAndLastKeys == null) { result.addAll(allFiles); return result; } for (FileRef file : allFiles) { Pair<Key, Key> pair = firstAndLastKeys.get(file); if (pair == null) { // file was created or imported after we obtained the first and last keys... there // are a few options here... throw an exception which will cause the compaction to // retry and also cause ugly error message that the admin has to ignore... could // go get the first and last key, but this code is called while the tablet lock // is held... or just compact the file.... result.add(file); } else { Key first = pair.getFirst(); Key last = pair.getSecond(); // If first and last are null, it's an empty file. Add it to the compact set so it goes away. if ((first == null && last == null) || (first != null && !extent.contains(first.getRow())) || (last != null && !extent.contains(last.getRow()))) { result.add(file); } } } return result; } /** * Returns true if this tablet needs to be split * */ public synchronized boolean needsSplit() { if (isClosing() || isClosed()) return false; return findSplitRow(getDatafileManager().getFiles()) != null; } // BEGIN PRIVATE METHODS RELATED TO MAJOR COMPACTION private boolean isCompactionEnabled() { return !isClosing() && !getTabletServer().isMajorCompactionDisabled(); } private CompactionStats _majorCompact(MajorCompactionReason reason) throws IOException, CompactionCanceledException { long t1, t2, t3; Pair<Long, UserCompactionConfig> compactionId = null; CompactionStrategy strategy = null; Map<FileRef, Pair<Key, Key>> firstAndLastKeys = null; if (reason == MajorCompactionReason.USER) { try { compactionId = getCompactionID(); strategy = createCompactionStrategy(compactionId.getSecond().getCompactionStrategy()); } catch (NoNodeException e) { throw new RuntimeException(e); } } else if (reason == MajorCompactionReason.NORMAL || reason == MajorCompactionReason.IDLE) { strategy = Property.createTableInstanceFromPropertyName(tableConfiguration, Property.TABLE_COMPACTION_STRATEGY, CompactionStrategy.class, new DefaultCompactionStrategy()); strategy.init(Property.getCompactionStrategyOptions(tableConfiguration)); } else if (reason == MajorCompactionReason.CHOP) { firstAndLastKeys = getFirstAndLastKeys(getDatafileManager().getDatafileSizes()); } else { throw new IllegalArgumentException("Unknown compaction reason " + reason); } if (strategy != null) { MajorCompactionRequest request = new MajorCompactionRequest(extent, reason, getTabletServer().getFileSystem(), tableConfiguration); request.setFiles(getDatafileManager().getDatafileSizes()); strategy.gatherInformation(request); } Map<FileRef, DataFileValue> filesToCompact = null; int maxFilesToCompact = tableConfiguration.getCount(Property.TSERV_MAJC_THREAD_MAXOPEN); CompactionStats majCStats = new CompactionStats(); CompactionPlan plan = null; boolean propogateDeletes = false; boolean updateCompactionID = false; synchronized (this) { // plan all that work that needs to be done in the sync block... then do the actual work // outside the sync block t1 = System.currentTimeMillis(); majorCompactionState = CompactionState.WAITING_TO_START; getTabletMemory().waitForMinC(); t2 = System.currentTimeMillis(); majorCompactionState = CompactionState.IN_PROGRESS; notifyAll(); VolumeManager fs = getTabletServer().getFileSystem(); if (extent.isRootTablet()) { // very important that we call this before doing major compaction, // otherwise deleted compacted files could possible be brought back // at some point if the file they were compacted to was legitimately // removed by a major compaction RootFiles.cleanupReplacement(fs, fs.listStatus(this.location), false); } SortedMap<FileRef, DataFileValue> allFiles = getDatafileManager().getDatafileSizes(); List<FileRef> inputFiles = new ArrayList<FileRef>(); if (reason == MajorCompactionReason.CHOP) { // enforce rules: files with keys outside our range need to be compacted inputFiles.addAll(findChopFiles(extent, firstAndLastKeys, allFiles.keySet())); } else { MajorCompactionRequest request = new MajorCompactionRequest(extent, reason, fs, tableConfiguration); request.setFiles(allFiles); plan = strategy.getCompactionPlan(request); if (plan != null) { plan.validate(allFiles.keySet()); inputFiles.addAll(plan.inputFiles); } } if (inputFiles.isEmpty()) { if (reason == MajorCompactionReason.USER) { if (compactionId.getSecond().getIterators().isEmpty()) { log.debug("No-op major compaction by USER on 0 input files because no iterators present."); lastCompactID = compactionId.getFirst(); updateCompactionID = true; } else { log.debug("Major compaction by USER on 0 input files with iterators."); filesToCompact = new HashMap<>(); } } else { return majCStats; } } else { // If no original files will exist at the end of the compaction, we do not have to propogate deletes Set<FileRef> droppedFiles = new HashSet<>(); droppedFiles.addAll(inputFiles); if (plan != null) droppedFiles.addAll(plan.deleteFiles); propogateDeletes = !(droppedFiles.equals(allFiles.keySet())); log.debug("Major compaction plan: " + plan + " propogate deletes : " + propogateDeletes); filesToCompact = new HashMap<>(allFiles); filesToCompact.keySet().retainAll(inputFiles); getDatafileManager().reserveMajorCompactingFiles(filesToCompact.keySet()); } t3 = System.currentTimeMillis(); } try { log.debug(String.format("MajC initiate lock %.2f secs, wait %.2f secs", (t3 - t2) / 1000.0, (t2 - t1) / 1000.0)); if (updateCompactionID) { MetadataTableUtil.updateTabletCompactID(extent, compactionId.getFirst(), tabletServer, getTabletServer().getLock()); return majCStats; } if (!propogateDeletes && compactionId == null) { // compacting everything, so update the compaction id in metadata try { compactionId = getCompactionID(); if (compactionId.getSecond().getCompactionStrategy() != null) { compactionId = null; // TODO maybe return unless chop? } } catch (NoNodeException e) { throw new RuntimeException(e); } } List<IteratorSetting> compactionIterators = new ArrayList<IteratorSetting>(); if (compactionId != null) { if (reason == MajorCompactionReason.USER) { if (getCompactionCancelID() >= compactionId.getFirst()) { // compaction was canceled return majCStats; } compactionIterators = compactionId.getSecond().getIterators(); synchronized (this) { if (lastCompactID >= compactionId.getFirst()) // already compacted return majCStats; } } } // need to handle case where only one file is being major compacted // ACCUMULO-3645 run loop at least once, even if filesToCompact.isEmpty() do { int numToCompact = maxFilesToCompact; if (filesToCompact.size() > maxFilesToCompact && filesToCompact.size() < 2 * maxFilesToCompact) { // on the second to last compaction pass, compact the minimum amount of files possible numToCompact = filesToCompact.size() - maxFilesToCompact + 1; } Set<FileRef> smallestFiles = removeSmallest(filesToCompact, numToCompact); FileRef fileName = getNextMapFilename( (filesToCompact.size() == 0 && !propogateDeletes) ? "A" : "C"); FileRef compactTmpName = new FileRef(fileName.path().toString() + "_tmp"); AccumuloConfiguration tableConf = createTableConfiguration(tableConfiguration, plan); Span span = Trace.start("compactFiles"); try { CompactionEnv cenv = new CompactionEnv() { @Override public boolean isCompactionEnabled() { return Tablet.this.isCompactionEnabled(); } @Override public IteratorScope getIteratorScope() { return IteratorScope.majc; } @Override public RateLimiter getReadLimiter() { return getTabletServer().getMajorCompactionReadLimiter(); } @Override public RateLimiter getWriteLimiter() { return getTabletServer().getMajorCompactionWriteLimiter(); } }; HashMap<FileRef, DataFileValue> copy = new HashMap<FileRef, DataFileValue>( getDatafileManager().getDatafileSizes()); if (!copy.keySet().containsAll(smallestFiles)) throw new IllegalStateException("Cannot find data file values for " + smallestFiles); copy.keySet().retainAll(smallestFiles); log.debug("Starting MajC " + extent + " (" + reason + ") " + copy.keySet() + " --> " + compactTmpName + " " + compactionIterators); // always propagate deletes, unless last batch boolean lastBatch = filesToCompact.isEmpty(); Compactor compactor = new Compactor(tabletServer, this, copy, null, compactTmpName, lastBatch ? propogateDeletes : true, cenv, compactionIterators, reason.ordinal(), tableConf); CompactionStats mcs = compactor.call(); span.data("files", "" + smallestFiles.size()); span.data("read", "" + mcs.getEntriesRead()); span.data("written", "" + mcs.getEntriesWritten()); majCStats.add(mcs); if (lastBatch && plan != null && plan.deleteFiles != null) { smallestFiles.addAll(plan.deleteFiles); } getDatafileManager().bringMajorCompactionOnline(smallestFiles, compactTmpName, fileName, filesToCompact.size() == 0 && compactionId != null ? compactionId.getFirst() : null, new DataFileValue(mcs.getFileSize(), mcs.getEntriesWritten())); // when major compaction produces a file w/ zero entries, it will be deleted... do not want // to add the deleted file if (filesToCompact.size() > 0 && mcs.getEntriesWritten() > 0) { filesToCompact.put(fileName, new DataFileValue(mcs.getFileSize(), mcs.getEntriesWritten())); } } finally { span.stop(); } } while (filesToCompact.size() > 0); return majCStats; } finally { synchronized (Tablet.this) { getDatafileManager().clearMajorCompactingFile(); } } } protected AccumuloConfiguration createTableConfiguration(TableConfiguration base, CompactionPlan plan) { if (plan == null || plan.writeParameters == null) return base; WriteParameters p = plan.writeParameters; ConfigurationCopy result = new ConfigurationCopy(base); if (p.getHdfsBlockSize() > 0) result.set(Property.TABLE_FILE_BLOCK_SIZE, "" + p.getHdfsBlockSize()); if (p.getBlockSize() > 0) result.set(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE, "" + p.getBlockSize()); if (p.getIndexBlockSize() > 0) result.set(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX, "" + p.getIndexBlockSize()); if (p.getCompressType() != null) result.set(Property.TABLE_FILE_COMPRESSION_TYPE, p.getCompressType()); if (p.getReplication() != 0) result.set(Property.TABLE_FILE_REPLICATION, "" + p.getReplication()); return result; } private Set<FileRef> removeSmallest(Map<FileRef, DataFileValue> filesToCompact, int maxFilesToCompact) { // ensure this method works properly when multiple files have the same size // short-circuit; also handles zero files case if (filesToCompact.size() <= maxFilesToCompact) { Set<FileRef> smallestFiles = new HashSet<FileRef>(filesToCompact.keySet()); filesToCompact.clear(); return smallestFiles; } PriorityQueue<Pair<FileRef, Long>> fileHeap = new PriorityQueue<Pair<FileRef, Long>>(filesToCompact.size(), new Comparator<Pair<FileRef, Long>>() { @Override public int compare(Pair<FileRef, Long> o1, Pair<FileRef, Long> o2) { if (o1.getSecond() == o2.getSecond()) return o1.getFirst().compareTo(o2.getFirst()); if (o1.getSecond() < o2.getSecond()) return -1; return 1; } }); for (Iterator<Entry<FileRef, DataFileValue>> iterator = filesToCompact.entrySet().iterator(); iterator .hasNext();) { Entry<FileRef, DataFileValue> entry = iterator.next(); fileHeap.add(new Pair<FileRef, Long>(entry.getKey(), entry.getValue().getSize())); } Set<FileRef> smallestFiles = new HashSet<FileRef>(); while (smallestFiles.size() < maxFilesToCompact && fileHeap.size() > 0) { Pair<FileRef, Long> pair = fileHeap.remove(); filesToCompact.remove(pair.getFirst()); smallestFiles.add(pair.getFirst()); } return smallestFiles; } // END PRIVATE METHODS RELATED TO MAJOR COMPACTION /** * Performs a major compaction on the tablet. If needsSplit() returns true, the tablet is split and a reference to the new tablet is returned. */ CompactionStats majorCompact(MajorCompactionReason reason, long queued) { CompactionStats majCStats = null; boolean success = false; long start = System.currentTimeMillis(); timer.incrementStatusMajor(); synchronized (this) { // check that compaction is still needed - defer to splitting majorCompactionQueued.remove(reason); if (isClosing() || isClosed() || !needsMajorCompaction(reason) || isMajorCompactionRunning() || needsSplit()) { return null; } majorCompactionState = CompactionState.WAITING_TO_START; } Span span = null; try { double tracePercent = tabletServer.getConfiguration().getFraction(Property.TSERV_MAJC_TRACE_PERCENT); ProbabilitySampler sampler = new ProbabilitySampler(tracePercent); span = Trace.on("majorCompaction", sampler); majCStats = _majorCompact(reason); if (reason == MajorCompactionReason.CHOP) { MetadataTableUtil.chopped(getTabletServer(), getExtent(), this.getTabletServer().getLock()); getTabletServer().enqueueMasterMessage(new TabletStatusMessage(TabletLoadState.CHOPPED, extent)); } success = true; } catch (CompactionCanceledException cce) { log.debug("Major compaction canceled, extent = " + getExtent()); } catch (IOException ioe) { log.error("MajC Failed, extent = " + getExtent(), ioe); } catch (RuntimeException e) { log.error("MajC Unexpected exception, extent = " + getExtent(), e); } finally { // ensure we always reset boolean, even // when an exception is thrown synchronized (this) { majorCompactionState = null; this.notifyAll(); } if (span != null) { span.data("extent", "" + getExtent()); if (majCStats != null) { span.data("read", "" + majCStats.getEntriesRead()); span.data("written", "" + majCStats.getEntriesWritten()); } span.stop(); } } long count = 0; if (majCStats != null) count = majCStats.getEntriesRead(); timer.updateTime(Operation.MAJOR, queued, start, count, !success); return majCStats; } @Override public KeyExtent getExtent() { return extent; } synchronized void computeNumEntries() { Collection<DataFileValue> vals = getDatafileManager().getDatafileSizes().values(); long numEntries = 0; for (DataFileValue tableValue : vals) { numEntries += tableValue.getNumEntries(); } this.numEntriesInMemory = getTabletMemory().getNumEntries(); numEntries += getTabletMemory().getNumEntries(); this.numEntries = numEntries; } public long getNumEntries() { return numEntries; } public long getNumEntriesInMemory() { return numEntriesInMemory; } public synchronized boolean isClosing() { return closeState == CloseState.CLOSING; } public synchronized boolean isClosed() { return closeState == CloseState.CLOSED || closeState == CloseState.COMPLETE; } public synchronized boolean isCloseComplete() { return closeState == CloseState.COMPLETE; } public boolean isMajorCompactionRunning() { return majorCompactionState != null; } public boolean isMinorCompactionQueued() { return minorCompactionState == CompactionState.WAITING_TO_START; } public boolean isMinorCompactionRunning() { return minorCompactionState == CompactionState.IN_PROGRESS; } public boolean isMajorCompactionQueued() { return majorCompactionQueued.size() > 0; } public TreeMap<KeyExtent, TabletData> split(byte[] sp) throws IOException { if (sp != null && extent.getEndRow() != null && extent.getEndRow().equals(new Text(sp))) { throw new IllegalArgumentException(); } if (sp != null && sp.length > tableConfiguration.getMemoryInBytes(Property.TABLE_MAX_END_ROW_SIZE)) { String msg = "Cannot split tablet " + extent + ", selected split point too long. Length : " + sp.length; log.warn(msg); throw new IOException(msg); } if (extent.isRootTablet()) { String msg = "Cannot split root tablet"; log.warn(msg); throw new RuntimeException(msg); } try { initiateClose(true, false, false); } catch (IllegalStateException ise) { log.debug("File " + extent + " not splitting : " + ise.getMessage()); return null; } // obtain this info outside of synch block since it will involve opening // the map files... it is ok if the set of map files changes, because // this info is used for optimization... it is ok if map files are missing // from the set... can still query and insert into the tablet while this // map file operation is happening Map<FileRef, FileUtil.FileInfo> firstAndLastRows = FileUtil.tryToGetFirstAndLastRows( getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), getDatafileManager().getFiles()); synchronized (this) { // java needs tuples ... TreeMap<KeyExtent, TabletData> newTablets = new TreeMap<KeyExtent, TabletData>(); long t1 = System.currentTimeMillis(); // choose a split point SplitRowSpec splitPoint; if (sp == null) splitPoint = findSplitRow(getDatafileManager().getFiles()); else { Text tsp = new Text(sp); splitPoint = new SplitRowSpec(FileUtil.estimatePercentageLTE(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(getDatafileManager().getFiles()), tsp), tsp); } if (splitPoint == null || splitPoint.row == null) { log.info("had to abort split because splitRow was null"); closeState = CloseState.OPEN; return null; } closeState = CloseState.CLOSING; completeClose(true, false); Text midRow = splitPoint.row; double splitRatio = splitPoint.splitRatio; KeyExtent low = new KeyExtent(extent.getTableId(), midRow, extent.getPrevEndRow()); KeyExtent high = new KeyExtent(extent.getTableId(), extent.getEndRow(), midRow); String lowDirectory = createTabletDirectory(getTabletServer().getFileSystem(), extent.getTableId(), midRow); // write new tablet information to MetadataTable SortedMap<FileRef, DataFileValue> lowDatafileSizes = new TreeMap<FileRef, DataFileValue>(); SortedMap<FileRef, DataFileValue> highDatafileSizes = new TreeMap<FileRef, DataFileValue>(); List<FileRef> highDatafilesToRemove = new ArrayList<FileRef>(); MetadataTableUtil.splitDatafiles(extent.getTableId(), midRow, splitRatio, firstAndLastRows, getDatafileManager().getDatafileSizes(), lowDatafileSizes, highDatafileSizes, highDatafilesToRemove); log.debug("Files for low split " + low + " " + lowDatafileSizes.keySet()); log.debug("Files for high split " + high + " " + highDatafileSizes.keySet()); String time = tabletTime.getMetadataValue(); MetadataTableUtil.splitTablet(high, extent.getPrevEndRow(), splitRatio, getTabletServer(), getTabletServer().getLock()); MasterMetadataUtil.addNewTablet(getTabletServer(), low, lowDirectory, getTabletServer().getTabletSession(), lowDatafileSizes, getBulkIngestedFiles(), time, lastFlushID, lastCompactID, getTabletServer().getLock()); MetadataTableUtil.finishSplit(high, highDatafileSizes, highDatafilesToRemove, getTabletServer(), getTabletServer().getLock()); log.log(TLevel.TABLET_HIST, extent + " split " + low + " " + high); newTablets.put(high, new TabletData(tabletDirectory, highDatafileSizes, time, lastFlushID, lastCompactID, lastLocation, getBulkIngestedFiles())); newTablets.put(low, new TabletData(lowDirectory, lowDatafileSizes, time, lastFlushID, lastCompactID, lastLocation, getBulkIngestedFiles())); long t2 = System.currentTimeMillis(); log.debug(String.format("offline split time : %6.2f secs", (t2 - t1) / 1000.0)); closeState = CloseState.COMPLETE; return newTablets; } } public SortedMap<FileRef, DataFileValue> getDatafiles() { return getDatafileManager().getDatafileSizes(); } public double queryRate() { return queryRate.rate(); } public double queryByteRate() { return queryByteRate.rate(); } public double ingestRate() { return ingestRate.rate(); } public double ingestByteRate() { return ingestByteRate.rate(); } public double scanRate() { return scannedRate.rate(); } public long totalQueries() { return this.queryCount; } // synchronized? public void updateRates(long now) { queryRate.update(now, queryCount); queryByteRate.update(now, queryBytes); ingestRate.update(now, ingestCount); ingestByteRate.update(now, ingestBytes); scannedRate.update(now, scannedCount.get()); } public long getSplitCreationTime() { return splitCreationTime; } public void importMapFiles(long tid, Map<FileRef, MapFileInfo> fileMap, boolean setTime) throws IOException { Map<FileRef, DataFileValue> entries = new HashMap<FileRef, DataFileValue>(fileMap.size()); List<String> files = new ArrayList<>(); for (Entry<FileRef, MapFileInfo> entry : fileMap.entrySet()) { entries.put(entry.getKey(), new DataFileValue(entry.getValue().estimatedSize, 0l)); files.add(entry.getKey().path().toString()); } // Clients timeout and will think that this operation failed. // Don't do it if we spent too long waiting for the lock long now = System.currentTimeMillis(); synchronized (this) { if (isClosed()) { throw new IOException("tablet " + extent + " is closed"); } // TODO check seems uneeded now - ACCUMULO-1291 long lockWait = System.currentTimeMillis() - now; if (lockWait > getTabletServer().getConfiguration().getTimeInMillis(Property.GENERAL_RPC_TIMEOUT)) { throw new IOException("Timeout waiting " + (lockWait / 1000.) + " seconds to get tablet lock"); } List<FileRef> alreadyImported = bulkImported.getIfPresent(tid); if (alreadyImported != null) { for (FileRef entry : alreadyImported) { if (fileMap.remove(entry) != null) { log.info("Ignoring import of bulk file already imported: " + entry); } } } if (fileMap.isEmpty()) { return; } if (writesInProgress < 0) { throw new IllegalStateException("writesInProgress < 0 " + writesInProgress); } writesInProgress++; } tabletServer.updateBulkImportState(files, BulkImportState.LOADING); try { getDatafileManager().importMapFiles(tid, entries, setTime); lastMapFileImportTime = System.currentTimeMillis(); if (needsSplit()) { getTabletServer().executeSplit(this); } else { initiateMajorCompaction(MajorCompactionReason.NORMAL); } } finally { synchronized (this) { if (writesInProgress < 1) throw new IllegalStateException("writesInProgress < 1 " + writesInProgress); writesInProgress--; if (writesInProgress == 0) this.notifyAll(); try { bulkImported.get(tid, new Callable<List<FileRef>>() { @Override public List<FileRef> call() throws Exception { return new ArrayList<FileRef>(); } }).addAll(fileMap.keySet()); } catch (Exception ex) { log.info(ex.toString(), ex); } tabletServer.removeBulkImportState(files); } } } private ConcurrentSkipListSet<DfsLogger> currentLogs = new ConcurrentSkipListSet<DfsLogger>(); // currentLogs may be updated while a tablet is otherwise locked public Set<DfsLogger> getCurrentLogFiles() { return new HashSet<DfsLogger>(currentLogs); } Set<String> beginClearingUnusedLogs() { Set<String> doomed = new HashSet<String>(); ArrayList<String> otherLogsCopy = new ArrayList<String>(); ArrayList<String> currentLogsCopy = new ArrayList<String>(); // do not hold tablet lock while acquiring the log lock logLock.lock(); synchronized (this) { if (removingLogs) throw new IllegalStateException("Attempted to clear logs when removal of logs in progress"); for (DfsLogger logger : otherLogs) { otherLogsCopy.add(logger.toString()); doomed.add(logger.getMeta()); } for (DfsLogger logger : currentLogs) { currentLogsCopy.add(logger.toString()); doomed.remove(logger.getMeta()); } otherLogs = Collections.emptySet(); if (doomed.size() > 0) removingLogs = true; } // do debug logging outside tablet lock for (String logger : otherLogsCopy) { log.debug("Logs for memory compacted: " + getExtent() + " " + logger.toString()); } for (String logger : currentLogsCopy) { log.debug("Logs for current memory: " + getExtent() + " " + logger); } for (String logger : doomed) { log.debug("Logs to be destroyed: " + getExtent() + " " + logger); } return doomed; } synchronized void finishClearingUnusedLogs() { removingLogs = false; logLock.unlock(); } private Set<DfsLogger> otherLogs = Collections.emptySet(); private boolean removingLogs = false; // this lock is basically used to synchronize writing of log info to metadata private final ReentrantLock logLock = new ReentrantLock(); public int getLogCount() { return currentLogs.size(); } // don't release the lock if this method returns true for success; instead, the caller should clean up by calling finishUpdatingLogsUsed() @Override public boolean beginUpdatingLogsUsed(InMemoryMap memTable, DfsLogger more, boolean mincFinish) { boolean releaseLock = true; // do not hold tablet lock while acquiring the log lock logLock.lock(); try { synchronized (this) { if (isCloseComplete()) { throw new IllegalStateException("Can not update logs of closed tablet " + extent); } boolean addToOther; if (memTable == getTabletMemory().getMinCMemTable()) addToOther = true; else if (memTable == getTabletMemory().getMemTable()) addToOther = false; else throw new IllegalArgumentException("passed in memtable that is not in use"); if (mincFinish) { if (addToOther) throw new IllegalStateException("Adding to other logs for mincFinish"); if (otherLogs.size() != 0) throw new IllegalStateException( "Expect other logs to be 0 when min finish, but its " + otherLogs); // when writing a minc finish event, there is no need to add the log to metadata // if nothing has been logged for the tablet since the minor compaction started if (currentLogs.size() == 0) return !releaseLock; } int numAdded = 0; int numContained = 0; if (addToOther) { if (otherLogs.add(more)) numAdded++; if (currentLogs.contains(more)) numContained++; } else { if (currentLogs.add(more)) numAdded++; if (otherLogs.contains(more)) numContained++; } if (numAdded > 0 && numAdded != 1) { // expect to add all or none throw new IllegalArgumentException( "Added subset of logs " + extent + " " + more + " " + currentLogs); } if (numContained > 0 && numContained != 1) { // expect to contain all or none throw new IllegalArgumentException( "Other logs contained subset of logs " + extent + " " + more + " " + otherLogs); } if (numAdded > 0 && numContained == 0) { releaseLock = false; } return !releaseLock; } } finally { if (releaseLock) logLock.unlock(); } } @Override public void finishUpdatingLogsUsed() { logLock.unlock(); } synchronized public void chopFiles() { initiateMajorCompaction(MajorCompactionReason.CHOP); } private CompactionStrategy createCompactionStrategy(CompactionStrategyConfig strategyConfig) { String context = tableConfiguration.get(Property.TABLE_CLASSPATH); String clazzName = strategyConfig.getClassName(); try { Class<? extends CompactionStrategy> clazz; if (context != null && !context.equals("")) clazz = AccumuloVFSClassLoader.getContextManager().loadClass(context, clazzName, CompactionStrategy.class); else clazz = AccumuloVFSClassLoader.loadClass(clazzName, CompactionStrategy.class); CompactionStrategy strategy = clazz.newInstance(); strategy.init(strategyConfig.getOptions()); return strategy; } catch (Exception e) { throw new RuntimeException(e); } } public void compactAll(long compactionId, UserCompactionConfig compactionConfig) { boolean updateMetadata = false; synchronized (this) { if (lastCompactID >= compactionId) return; if (isMinorCompactionRunning()) { // want to wait for running minc to finish before starting majc, see ACCUMULO-3041 if (compactionWaitInfo.compactionID == compactionId) { if (lastFlushID == compactionWaitInfo.flushID) return; } else { compactionWaitInfo.compactionID = compactionId; compactionWaitInfo.flushID = lastFlushID; return; } } if (isClosing() || isClosed() || majorCompactionQueued.contains(MajorCompactionReason.USER) || isMajorCompactionRunning()) return; CompactionStrategyConfig strategyConfig = compactionConfig.getCompactionStrategy(); CompactionStrategy strategy = createCompactionStrategy(strategyConfig); MajorCompactionRequest request = new MajorCompactionRequest(extent, MajorCompactionReason.USER, getTabletServer().getFileSystem(), tableConfiguration); request.setFiles(getDatafileManager().getDatafileSizes()); try { if (strategy.shouldCompact(request)) { initiateMajorCompaction(MajorCompactionReason.USER); } else { majorCompactionState = CompactionState.IN_PROGRESS; updateMetadata = true; lastCompactID = compactionId; } } catch (IOException e) { throw new RuntimeException(e); } } if (updateMetadata) { try { // if multiple threads were allowed to update this outside of a sync block, then it would be // a race condition MetadataTableUtil.updateTabletCompactID(extent, compactionId, getTabletServer(), getTabletServer().getLock()); } finally { synchronized (this) { majorCompactionState = null; this.notifyAll(); } } } } @Override public TableConfiguration getTableConfiguration() { return tableConfiguration; } @Override public Durability getDurability() { return DurabilityImpl.fromString(getTableConfiguration().get(Property.TABLE_DURABILITY)); } @Override public void updateMemoryUsageStats(long size, long mincSize) { getTabletResources().updateMemoryUsageStats(this, size, mincSize); } public long incrementDataSourceDeletions() { return dataSourceDeletions.incrementAndGet(); } synchronized public void updateQueryStats(int size, long numBytes) { queryCount += size; queryBytes += numBytes; } TabletServer getTabletServer() { return tabletServer; } public void updatePersistedTime(long bulkTime, Map<FileRef, DataFileValue> paths, long tid) { synchronized (timeLock) { if (bulkTime > persistedTime) persistedTime = bulkTime; MetadataTableUtil.updateTabletDataFile(tid, extent, paths, tabletTime.getMetadataValue(persistedTime), getTabletServer(), getTabletServer().getLock()); } } public void updateTabletDataFile(long maxCommittedTime, FileRef newDatafile, FileRef absMergeFile, DataFileValue dfv, Set<String> unusedWalLogs, Set<FileRef> filesInUseByScans, long flushId) { synchronized (timeLock) { if (maxCommittedTime > persistedTime) persistedTime = maxCommittedTime; String time = tabletTime.getMetadataValue(persistedTime); MasterMetadataUtil.updateTabletDataFile(getTabletServer(), extent, newDatafile, absMergeFile, dfv, time, filesInUseByScans, tabletServer.getClientAddressString(), tabletServer.getLock(), unusedWalLogs, lastLocation, flushId); } } TabletResourceManager getTabletResources() { return tabletResources; } DatafileManager getDatafileManager() { return datafileManager; } TabletMemory getTabletMemory() { return tabletMemory; } public long getAndUpdateTime() { return tabletTime.getAndUpdateTime(); } public void flushComplete(long flushId) { lastLocation = null; dataSourceDeletions.incrementAndGet(); tabletMemory.finishedMinC(); lastFlushID = flushId; computeNumEntries(); } public TServerInstance resetLastLocation() { TServerInstance result = lastLocation; lastLocation = null; return result; } synchronized public void addActiveScans(ScanDataSource scanDataSource) { activeScans.add(scanDataSource); } public int removeScan(ScanDataSource scanDataSource) { activeScans.remove(scanDataSource); return activeScans.size(); } synchronized public void setLastCompactionID(Long compactionId) { if (compactionId != null) this.lastCompactID = compactionId; } public void removeMajorCompactionQueuedReason(MajorCompactionReason reason) { majorCompactionQueued.remove(reason); } public void minorCompactionWaitingToStart() { minorCompactionState = CompactionState.WAITING_TO_START; } public void minorCompactionStarted() { minorCompactionState = CompactionState.IN_PROGRESS; } public void minorCompactionComplete() { minorCompactionState = null; } public TabletStats getTabletStats() { return timer.getTabletStats(); } public AtomicLong getScannedCounter() { return scannedCount; } private static String createTabletDirectory(VolumeManager fs, String tableId, Text endRow) { String lowDirectory; UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); String volume = fs.choose(Optional.of(tableId), ServerConstants.getBaseUris()) + Constants.HDFS_TABLES_DIR + Path.SEPARATOR; while (true) { try { if (endRow == null) { lowDirectory = Constants.DEFAULT_TABLET_LOCATION; Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory); if (fs.exists(lowDirectoryPath) || fs.mkdirs(lowDirectoryPath)) { FileSystem pathFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem(); return lowDirectoryPath.makeQualified(pathFs.getUri(), pathFs.getWorkingDirectory()) .toString(); } log.warn("Failed to create " + lowDirectoryPath + " for unknown reason"); } else { lowDirectory = "/" + Constants.GENERATED_TABLET_DIRECTORY_PREFIX + namer.getNextName(); Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory); if (fs.exists(lowDirectoryPath)) throw new IllegalStateException("Dir exist when it should not " + lowDirectoryPath); if (fs.mkdirs(lowDirectoryPath)) { FileSystem lowDirectoryFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem(); return lowDirectoryPath .makeQualified(lowDirectoryFs.getUri(), lowDirectoryFs.getWorkingDirectory()) .toString(); } } } catch (IOException e) { log.warn(e); } log.warn("Failed to create dir for tablet in table " + tableId + " in volume " + volume + " + will retry ..."); sleepUninterruptibly(3, TimeUnit.SECONDS); } } public Map<Long, List<FileRef>> getBulkIngestedFiles() { return new HashMap<Long, List<FileRef>>(bulkImported.asMap()); } public void cleanupBulkLoadedFiles(Set<Long> tids) { for (Long tid : tids) { bulkImported.invalidate(tid); } } }