org.apache.cassandra.db.Table.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.cassandra.db.Table.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.cassandra.db;

import java.io.IOError;
import java.lang.management.ManagementFactory;
import java.util.*;
import java.io.IOException;
import java.io.File;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.Future;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;

import org.apache.cassandra.config.*;
import org.apache.cassandra.db.commitlog.CommitLog;
import org.apache.cassandra.dht.LocalToken;
import org.apache.cassandra.io.sstable.SSTableDeletingReference;
import org.apache.cassandra.io.sstable.SSTableReader;
import org.apache.cassandra.io.util.FileUtils;

import javax.management.MBeanServer;
import javax.management.ObjectName;

import org.apache.commons.lang.ArrayUtils;

import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.db.filter.*;
import org.apache.cassandra.utils.FBUtilities;
import org.cliffc.high_scale_lib.NonBlockingHashMap;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Table {
    public static final String SYSTEM_TABLE = "system";

    private static final Logger logger = LoggerFactory.getLogger(Table.class);
    private static final String SNAPSHOT_SUBDIR_NAME = "snapshots";
    /* accesses to CFS.memtable should acquire this for thread safety.  only switchMemtable should aquire the writeLock. */
    static final ReentrantReadWriteLock flusherLock = new ReentrantReadWriteLock(true);

    private static Timer flushTimer = new Timer("FLUSH-TIMER");

    // This is a result of pushing down the point in time when storage directories get created.  It used to happen in
    // CassandraDaemon, but it is possible to call Table.open without a running daemon, so it made sense to ensure
    // proper directories here.
    static {
        try {
            DatabaseDescriptor.createAllDirectories();
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        }
    }

    /** Table objects, one per keyspace.  only one instance should ever exist for any given keyspace. */
    private static final Map<String, Table> instances = new NonBlockingHashMap<String, Table>();

    /* Table name. */
    public final String name;
    /* ColumnFamilyStore per column family */
    public final Map<Integer, ColumnFamilyStore> columnFamilyStores = new HashMap<Integer, ColumnFamilyStore>(); // TODO make private again
    // cache application CFs since Range queries ask for them a _lot_
    private SortedSet<String> applicationColumnFamilies;
    private final TimerTask flushTask;
    private final Object[] indexLocks;

    public static Table open(String table) {
        Table tableInstance = instances.get(table);
        if (tableInstance == null) {
            // instantiate the Table.  we could use putIfAbsent but it's important to making sure it is only done once
            // per keyspace, so we synchronize and re-check before doing it.
            synchronized (Table.class) {
                tableInstance = instances.get(table);
                if (tableInstance == null) {
                    // do some housekeeping on the column families.
                    Collection<Runnable> systemTableUpdates = new ArrayList<Runnable>();
                    for (CFMetaData cfm : DatabaseDescriptor.getTableDefinition(table).cfMetaData().values()) {
                        ColumnFamilyStore.scrubDataDirectories(table, cfm.cfName);
                        systemTableUpdates.addAll(ColumnFamilyStore.deleteCompactedFiles(table, cfm.cfName));
                    }
                    tableInstance = new Table(table);
                    instances.put(table, tableInstance);

                    for (Runnable r : systemTableUpdates)
                        r.run();
                }
            }
        }
        return tableInstance;
    }

    public static Table clear(String table) throws IOException {
        synchronized (Table.class) {
            Table t = instances.remove(table);
            if (t != null)
                t.flushTask.cancel();
            return t;
        }
    }

    public Collection<ColumnFamilyStore> getColumnFamilyStores() {
        return Collections.unmodifiableCollection(columnFamilyStores.values());
    }

    public ColumnFamilyStore getColumnFamilyStore(String cfName) {
        Integer id = CFMetaData.getId(name, cfName);
        if (id == null)
            throw new IllegalArgumentException(String.format("Unknown table/cf pair (%s.%s)", name, cfName));
        return columnFamilyStores.get(id);
    }

    /**
     * Do a cleanup of keys that do not belong locally.
     */
    public void forceCleanup() {
        if (name.equals(SYSTEM_TABLE))
            throw new RuntimeException("Cleanup of the system table is neither necessary nor wise");

        for (ColumnFamilyStore cfStore : columnFamilyStores.values())
            cfStore.forceCleanup();
    }

    /**
     * Take a snapshot of the entire set of column families with a given timestamp.
     * 
     * @param clientSuppliedName the tag associated with the name of the snapshot.  This
     *                           value can be null.
     */
    public void snapshot(String clientSuppliedName) {
        String snapshotName = getTimestampedSnapshotName(clientSuppliedName);

        for (ColumnFamilyStore cfStore : columnFamilyStores.values()) {
            cfStore.snapshot(snapshotName);
        }
    }

    /**
     * @param clientSuppliedName; may be null.
     * @return
     */
    public static String getTimestampedSnapshotName(String clientSuppliedName) {
        String snapshotName = Long.toString(System.currentTimeMillis());
        if (clientSuppliedName != null && !clientSuppliedName.equals("")) {
            snapshotName = snapshotName + "-" + clientSuppliedName;
        }
        return snapshotName;
    }

    /**
     * Clear all the snapshots for a given table.
     */
    public void clearSnapshot() throws IOException {
        for (String dataDirPath : DatabaseDescriptor.getAllDataFileLocations()) {
            String snapshotPath = dataDirPath + File.separator + name + File.separator + SNAPSHOT_SUBDIR_NAME;
            File snapshotDir = new File(snapshotPath);
            if (snapshotDir.exists()) {
                if (logger.isDebugEnabled())
                    logger.debug("Removing snapshot directory " + snapshotPath);
                FileUtils.deleteRecursive(snapshotDir);
            }
        }
    }

    /*
     * This method is an ADMIN operation to force compaction
     * of all SSTables on disk. 
     */
    public void forceCompaction() {
        for (ColumnFamilyStore cfStore : columnFamilyStores.values())
            CompactionManager.instance.submitMajor(cfStore);
    }

    /**
     * @return A list of open SSTableReaders (TODO: ensure that the caller doesn't modify these).
     */
    public List<SSTableReader> getAllSSTables() {
        List<SSTableReader> list = new ArrayList<SSTableReader>();
        for (ColumnFamilyStore cfStore : columnFamilyStores.values())
            list.addAll(cfStore.getSSTables());
        return list;
    }

    private Table(String table) {
        name = table;
        indexLocks = new Object[DatabaseDescriptor.getConcurrentWriters() * 8];
        for (int i = 0; i < indexLocks.length; i++)
            indexLocks[i] = new Object();
        // create data directories.
        for (String dataDir : DatabaseDescriptor.getAllDataFileLocations()) {
            try {
                String keyspaceDir = dataDir + File.separator + table;
                FileUtils.createDirectory(keyspaceDir);

                // remove the deprecated streaming directory.
                File streamingDir = new File(keyspaceDir, "stream");
                if (streamingDir.exists())
                    FileUtils.deleteRecursive(streamingDir);
            } catch (IOException ex) {
                throw new IOError(ex);
            }
        }

        for (CFMetaData cfm : new ArrayList<CFMetaData>(
                DatabaseDescriptor.getTableDefinition(table).cfMetaData().values())) {
            ColumnFamilyStore cfs = ColumnFamilyStore.createColumnFamilyStore(table, cfm.cfName);
            columnFamilyStores.put(cfm.cfId, cfs);
        }

        // check 10x as often as the lifetime, so we can exceed lifetime by 10% at most
        int checkMs = DatabaseDescriptor.getMemtableLifetimeMS() / 10;
        flushTask = new TimerTask() {
            public void run() {
                for (ColumnFamilyStore cfs : columnFamilyStores.values()) {
                    cfs.forceFlushIfExpired();
                }
            }
        };
        flushTimer.schedule(flushTask, checkMs, checkMs);
    }

    /** removes a cf from internal structures (doesn't change disk files). */
    public void dropCf(Integer cfId) throws IOException {
        assert columnFamilyStores.containsKey(cfId);
        ColumnFamilyStore cfs = columnFamilyStores.remove(cfId);
        if (cfs != null) {
            try {
                cfs.forceBlockingFlush();
            } catch (ExecutionException e) {
                throw new IOException(e);
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            cfs.unregisterMBean();
        }
    }

    /** adds a cf to internal structures, ends up creating disk files). */
    public void initCf(Integer cfId, String cfName) {
        assert !columnFamilyStores.containsKey(cfId) : String.format(
                "tried to init %s as %s, but already used by %s", cfName, cfId, columnFamilyStores.get(cfId));
        columnFamilyStores.put(cfId, ColumnFamilyStore.createColumnFamilyStore(name, cfName));
    }

    /** basically a combined drop and add */
    public void renameCf(Integer cfId, String newName) throws IOException {
        dropCf(cfId);
        initCf(cfId, newName);
    }

    public Row getRow(QueryFilter filter) throws IOException {
        ColumnFamilyStore cfStore = getColumnFamilyStore(filter.getColumnFamilyName());
        ColumnFamily columnFamily = cfStore.getColumnFamily(filter);
        return new Row(filter.key, columnFamily);
    }

    /**
     * This method adds the row to the Commit Log associated with this table.
     * Once this happens the data associated with the individual column families
     * is also written to the column family store's memtable.
    */
    public void apply(RowMutation mutation, Object serializedMutation, boolean writeCommitLog) throws IOException {
        HashMap<ColumnFamilyStore, Memtable> memtablesToFlush = new HashMap<ColumnFamilyStore, Memtable>(2);

        // write the mutation to the commitlog and memtables
        flusherLock.readLock().lock();
        try {
            if (writeCommitLog)
                CommitLog.instance().add(mutation, serializedMutation);

            DecoratedKey key = StorageService.getPartitioner().decorateKey(mutation.key());
            for (ColumnFamily cf : mutation.getColumnFamilies()) {
                ColumnFamilyStore cfs = columnFamilyStores.get(cf.id());
                if (cfs == null) {
                    logger.error("Attempting to mutate non-existant column family " + cf.id());
                    continue;
                }

                SortedSet<byte[]> mutatedIndexedColumns = null;
                for (byte[] column : cfs.getIndexedColumns()) {
                    if (cf.getColumnNames().contains(column)) {
                        if (mutatedIndexedColumns == null)
                            mutatedIndexedColumns = new TreeSet<byte[]>(FBUtilities.byteArrayComparator);
                        mutatedIndexedColumns.add(column);
                    }
                }

                if (mutatedIndexedColumns == null) {
                    // just update the actual value, no extra synchronization
                    applyCF(cfs, key, cf, memtablesToFlush);
                } else {
                    synchronized (indexLockFor(mutation.key())) {
                        // read old indexed values
                        QueryFilter filter = QueryFilter.getNamesFilter(key,
                                new QueryPath(cfs.getColumnFamilyName()), mutatedIndexedColumns);
                        ColumnFamily oldIndexedColumns = cfs.getColumnFamily(filter);

                        // ignore obsolete column updates
                        if (oldIndexedColumns != null) {
                            for (IColumn oldColumn : oldIndexedColumns) {
                                if (cfs.metadata.reconciler
                                        .reconcile((Column) oldColumn, (Column) cf.getColumn(oldColumn.name()))
                                        .equals(oldColumn)) {
                                    cf.remove(oldColumn.name());
                                    mutatedIndexedColumns.remove(oldColumn.name());
                                    oldIndexedColumns.remove(oldColumn.name());
                                }
                            }
                        }

                        // apply the mutation
                        applyCF(cfs, key, cf, memtablesToFlush);

                        // add new index entries
                        for (byte[] columnName : mutatedIndexedColumns) {
                            IColumn column = cf.getColumn(columnName);
                            DecoratedKey<LocalToken> valueKey = cfs.getIndexKeyFor(columnName, column.value());
                            ColumnFamily cfi = cfs.newIndexedColumnFamily(columnName);
                            cfi.addColumn(new Column(mutation.key(), ArrayUtils.EMPTY_BYTE_ARRAY, column.clock()));
                            applyCF(cfs.getIndexedColumnFamilyStore(columnName), valueKey, cfi, memtablesToFlush);
                        }

                        // remove the old index entries
                        if (oldIndexedColumns != null) {
                            int localDeletionTime = (int) (System.currentTimeMillis() / 1000);
                            for (Map.Entry<byte[], IColumn> entry : oldIndexedColumns.getColumnsMap().entrySet()) {
                                byte[] columnName = entry.getKey();
                                IColumn column = entry.getValue();
                                DecoratedKey<LocalToken> valueKey = cfs.getIndexKeyFor(columnName, column.value());
                                ColumnFamily cfi = cfs.newIndexedColumnFamily(columnName);
                                cfi.deleteColumn(mutation.key(), localDeletionTime, column.clock());
                                applyCF(cfs.getIndexedColumnFamilyStore(columnName), valueKey, cfi,
                                        memtablesToFlush);
                            }
                        }
                    }
                }

                ColumnFamily cachedRow = cfs.getRawCachedRow(key);
                if (cachedRow != null)
                    cachedRow.addAll(cf);
            }
        } finally {
            flusherLock.readLock().unlock();
        }

        // flush memtables that got filled up.  usually mTF will be empty and this will be a no-op
        for (Map.Entry<ColumnFamilyStore, Memtable> entry : memtablesToFlush.entrySet())
            entry.getKey().maybeSwitchMemtable(entry.getValue(), writeCommitLog);
    }

    public void applyIndexedCF(ColumnFamilyStore indexedCfs, DecoratedKey rowKey, DecoratedKey indexedKey,
            ColumnFamily indexedColumnFamily) {
        Memtable memtableToFlush;
        flusherLock.readLock().lock();
        try {
            synchronized (indexLockFor(rowKey.key)) {
                memtableToFlush = indexedCfs.apply(indexedKey, indexedColumnFamily);
            }
        } finally {
            flusherLock.readLock().unlock();
        }

        if (memtableToFlush != null)
            indexedCfs.maybeSwitchMemtable(memtableToFlush, false);
    }

    private Object indexLockFor(byte[] key) {
        return indexLocks[Math.abs(Arrays.hashCode(key) % indexLocks.length)];
    }

    private static void applyCF(ColumnFamilyStore cfs, DecoratedKey key, ColumnFamily columnFamily,
            HashMap<ColumnFamilyStore, Memtable> memtablesToFlush) {
        Memtable memtableToFlush = cfs.apply(key, columnFamily);
        if (memtableToFlush != null)
            memtablesToFlush.put(cfs, memtableToFlush);
    }

    public List<Future<?>> flush() throws IOException {
        List<Future<?>> futures = new ArrayList<Future<?>>();
        for (Integer cfId : columnFamilyStores.keySet()) {
            Future<?> future = columnFamilyStores.get(cfId).forceFlush();
            if (future != null)
                futures.add(future);
        }
        return futures;
    }

    // for binary load path.  skips commitlog.
    void load(RowMutation rowMutation) throws IOException {
        DecoratedKey key = StorageService.getPartitioner().decorateKey(rowMutation.key());
        for (ColumnFamily columnFamily : rowMutation.getColumnFamilies()) {
            Collection<IColumn> columns = columnFamily.getSortedColumns();
            for (IColumn column : columns) {
                ColumnFamilyStore cfStore = columnFamilyStores.get(FBUtilities.byteArrayToInt(column.name()));
                cfStore.applyBinary(key, column.value());
            }
        }
    }

    public String getDataFileLocation(long expectedCompactedFileSize) {
        String path = DatabaseDescriptor.getDataFileLocationForTable(name, expectedCompactedFileSize);
        if (path == null) {
            // retry after GCing to force unmap of compacted SSTables so they can be deleted
            StorageService.instance.requestGC();
            try {
                Thread.sleep(SSTableDeletingReference.RETRY_DELAY * 2);
            } catch (InterruptedException e) {
                throw new AssertionError(e);
            }
            path = DatabaseDescriptor.getDataFileLocationForTable(name, expectedCompactedFileSize);
        }
        return path;
    }

    public static String getSnapshotPath(String dataDirPath, String tableName, String snapshotName) {
        return dataDirPath + File.separator + tableName + File.separator + SNAPSHOT_SUBDIR_NAME + File.separator
                + snapshotName;
    }

    public static Iterable<Table> all() {
        Function<String, Table> transformer = new Function<String, Table>() {
            public Table apply(String tableName) {
                return Table.open(tableName);
            }
        };
        return Iterables.transform(DatabaseDescriptor.getTables(), transformer);
    }

    /**
     * Performs a synchronous truncate operation, effectively deleting all data
     * from the column family cfname
     * @param cfname
     * @throws IOException
     * @throws ExecutionException
     * @throws InterruptedException
     */
    public void truncate(String cfname) throws InterruptedException, ExecutionException, IOException {
        logger.debug("Truncating...");
        ColumnFamilyStore cfs = getColumnFamilyStore(cfname);
        // truncate, blocking
        cfs.truncate().get();
        logger.debug("Truncation done.");
    }
}