org.apache.cassandra.io.sstable.SSTableWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.cassandra.io.sstable.SSTableWriter.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.cassandra.io.sstable;

import java.io.*;
import java.util.Set;
import java.util.concurrent.ExecutionException;

import org.apache.commons.lang.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.*;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.io.AbstractCompactedRow;
import org.apache.cassandra.io.util.BufferedRandomAccessFile;
import org.apache.cassandra.io.util.SegmentedFile;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.BloomFilter;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.WrappedRunnable;

public class SSTableWriter extends SSTable {
    private static Logger logger = LoggerFactory.getLogger(SSTableWriter.class);

    private IndexWriter iwriter;
    private SegmentedFile.Builder dbuilder;
    private final BufferedRandomAccessFile dataFile;
    private DecoratedKey lastWrittenKey;

    public SSTableWriter(String filename, long keyCount) throws IOException {
        this(filename, keyCount, DatabaseDescriptor.getCFMetaData(Descriptor.fromFilename(filename)),
                StorageService.getPartitioner());
    }

    public SSTableWriter(String filename, long keyCount, CFMetaData metadata, IPartitioner partitioner)
            throws IOException {
        super(filename, metadata, partitioner);
        iwriter = new IndexWriter(desc, partitioner, keyCount);
        dbuilder = SegmentedFile.getBuilder(DatabaseDescriptor.getDiskAccessMode());
        dataFile = new BufferedRandomAccessFile(getFilename(), "rw",
                DatabaseDescriptor.getInMemoryCompactionLimit());
    }

    private long beforeAppend(DecoratedKey decoratedKey) throws IOException {
        if (decoratedKey == null) {
            throw new IOException("Keys must not be null.");
        }
        if (lastWrittenKey != null && lastWrittenKey.compareTo(decoratedKey) > 0) {
            logger.info("Last written key : " + lastWrittenKey);
            logger.info("Current key : " + decoratedKey);
            logger.info("Writing into file " + getFilename());
            throw new IOException("Keys must be written in ascending order.");
        }
        return (lastWrittenKey == null) ? 0 : dataFile.getFilePointer();
    }

    private void afterAppend(DecoratedKey decoratedKey, long dataPosition) throws IOException {
        lastWrittenKey = decoratedKey;

        if (logger.isTraceEnabled())
            logger.trace("wrote " + decoratedKey + " at " + dataPosition);
        dbuilder.addPotentialBoundary(dataPosition);
        iwriter.afterAppend(decoratedKey, dataPosition);
    }

    public void append(AbstractCompactedRow row) throws IOException {
        long currentPosition = beforeAppend(row.key);
        FBUtilities.writeShortByteArray(row.key.key, dataFile);
        row.write(dataFile);
        estimatedRowSize.add(dataFile.getFilePointer() - currentPosition);
        estimatedColumnCount.add(row.columnCount());
        afterAppend(row.key, currentPosition);
    }

    public void append(DecoratedKey decoratedKey, ColumnFamily cf) throws IOException {
        long startPosition = beforeAppend(decoratedKey);
        FBUtilities.writeShortByteArray(decoratedKey.key, dataFile);
        // write placeholder for the row size, since we don't know it yet
        long sizePosition = dataFile.getFilePointer();
        dataFile.writeLong(-1);
        // write out row data
        int columnCount = ColumnFamily.serializer().serializeWithIndexes(cf, dataFile);
        // seek back and write the row size (not including the size Long itself)
        long endPosition = dataFile.getFilePointer();
        dataFile.seek(sizePosition);
        dataFile.writeLong(endPosition - (sizePosition + 8));
        // finally, reset for next row
        dataFile.seek(endPosition);
        afterAppend(decoratedKey, startPosition);
        estimatedRowSize.add(endPosition - startPosition);
        estimatedColumnCount.add(columnCount);
    }

    public void append(DecoratedKey decoratedKey, byte[] value) throws IOException {
        long currentPosition = beforeAppend(decoratedKey);
        FBUtilities.writeShortByteArray(decoratedKey.key, dataFile);
        assert value.length > 0;
        dataFile.writeLong(value.length);
        dataFile.write(value);
        afterAppend(decoratedKey, currentPosition);
    }

    public SSTableReader closeAndOpenReader() throws IOException {
        return closeAndOpenReader(System.currentTimeMillis());
    }

    public SSTableReader closeAndOpenReader(long maxDataAge) throws IOException {
        // index and filter
        iwriter.close();

        // main data
        dataFile.close(); // calls force

        // remove the 'tmp' marker from all components
        final Descriptor newdesc = rename(desc);

        Runnable runnable = new WrappedRunnable() {
            protected void runMayThrow() throws IOException {
                StatisticsTable.persistSSTableStatistics(newdesc, estimatedRowSize, estimatedColumnCount);
            }
        };
        ColumnFamilyStore.submitPostFlush(runnable);

        // finalize in-memory state for the reader
        SegmentedFile ifile = iwriter.builder.complete(newdesc.filenameFor(SSTable.COMPONENT_INDEX));
        SegmentedFile dfile = dbuilder.complete(newdesc.filenameFor(SSTable.COMPONENT_DATA));
        SSTableReader sstable = SSTableReader.internalOpen(newdesc, metadata, partitioner, ifile, dfile,
                iwriter.summary, iwriter.bf, maxDataAge, estimatedRowSize, estimatedColumnCount);
        iwriter = null;
        dbuilder = null;
        return sstable;
    }

    static Descriptor rename(Descriptor tmpdesc) {
        Descriptor newdesc = tmpdesc.asTemporary(false);
        try {
            for (String component : components)
                FBUtilities.renameWithConfirm(tmpdesc.filenameFor(component), newdesc.filenameFor(component));
        } catch (IOException e) {
            throw new IOError(e);
        }
        return newdesc;
    }

    public long getFilePointer() {
        return dataFile.getFilePointer();
    }

    /**
     * @return An estimate of the number of keys contained in the given data file.
     */
    private static long estimateRows(Descriptor desc, BufferedRandomAccessFile dfile) throws IOException {
        // collect sizes for the first 1000 keys, or first 100 megabytes of data
        final int SAMPLES_CAP = 1000, BYTES_CAP = (int) Math.min(100000000, dfile.length());
        int keys = 0;
        long dataPosition = 0;
        while (dataPosition < BYTES_CAP && keys < SAMPLES_CAP) {
            dfile.seek(dataPosition);
            FBUtilities.readShortByteArray(dfile);
            long dataSize = SSTableReader.readRowSize(dfile, desc);
            dataPosition = dfile.getFilePointer() + dataSize;
            keys++;
        }
        dfile.seek(0);
        return dfile.length() / (dataPosition / keys);
    }

    /**
     * If either of the index or filter files are missing, rebuilds both.
     * TODO: Builds most of the in-memory state of the sstable, but doesn't actually open it.
     */
    private static void maybeRecover(Descriptor desc) throws IOException {
        logger.debug("In maybeRecover with Descriptor {}", desc);
        File ifile = new File(desc.filenameFor(SSTable.COMPONENT_INDEX));
        File ffile = new File(desc.filenameFor(SSTable.COMPONENT_FILTER));
        if (ifile.exists() && ffile.exists())
            // nothing to do
            return;

        ColumnFamilyStore cfs = Table.open(desc.ksname).getColumnFamilyStore(desc.cfname);
        Set<byte[]> indexedColumns = cfs.getIndexedColumns();
        // remove existing files
        ifile.delete();
        ffile.delete();

        // open the data file for input, and an IndexWriter for output
        BufferedRandomAccessFile dfile = new BufferedRandomAccessFile(desc.filenameFor(SSTable.COMPONENT_DATA), "r",
                8 * 1024 * 1024);
        IndexWriter iwriter;
        long estimatedRows;
        try {
            estimatedRows = estimateRows(desc, dfile);
            iwriter = new IndexWriter(desc, StorageService.getPartitioner(), estimatedRows);
        } catch (IOException e) {
            dfile.close();
            throw e;
        }

        // build the index and filter
        long rows = 0;
        try {
            DecoratedKey key;
            long dataPosition = 0;
            while (dataPosition < dfile.length()) {
                key = SSTableReader.decodeKey(StorageService.getPartitioner(), desc,
                        FBUtilities.readShortByteArray(dfile));
                long dataSize = SSTableReader.readRowSize(dfile, desc);
                if (!indexedColumns.isEmpty()) {
                    // skip bloom filter and column index
                    dfile.readFully(new byte[dfile.readInt()]);
                    dfile.readFully(new byte[dfile.readInt()]);

                    // index the column data
                    ColumnFamily cf = ColumnFamily.create(desc.ksname, desc.cfname);
                    ColumnFamily.serializer().deserializeFromSSTableNoColumns(cf, dfile);
                    int columns = dfile.readInt();
                    for (int i = 0; i < columns; i++) {
                        IColumn iColumn = cf.getColumnSerializer().deserialize(dfile);
                        if (indexedColumns.contains(iColumn.name())) {
                            DecoratedKey valueKey = cfs.getIndexKeyFor(iColumn.name(), iColumn.value());
                            ColumnFamily indexedCf = cfs.newIndexedColumnFamily(iColumn.name());
                            indexedCf.addColumn(new Column(key.key, ArrayUtils.EMPTY_BYTE_ARRAY, iColumn.clock()));
                            logger.debug("adding indexed column row mutation for key {}", valueKey);
                            Table.open(desc.ksname).applyIndexedCF(cfs.getIndexedColumnFamilyStore(iColumn.name()),
                                    key, valueKey, indexedCf);
                        }
                    }
                }

                iwriter.afterAppend(key, dataPosition);
                dataPosition = dfile.getFilePointer() + dataSize;
                dfile.seek(dataPosition);
                rows++;
            }

            for (byte[] column : cfs.getIndexedColumns()) {
                try {
                    cfs.getIndexedColumnFamilyStore(column).forceBlockingFlush();
                } catch (ExecutionException e) {
                    throw new RuntimeException(e);
                } catch (InterruptedException e) {
                    throw new AssertionError(e);
                }
            }
        } finally {
            try {
                dfile.close();
                iwriter.close();
            } catch (IOException e) {
                logger.error("Failed to close data or index file during recovery of " + desc, e);
            }
        }

        logger.debug("estimated row count was %s of real count", ((double) estimatedRows) / rows);
    }

    /**
     * Removes the given SSTable from temporary status and opens it, rebuilding the non-essential portions of the
     * file if necessary.
     */
    public static SSTableReader recoverAndOpen(Descriptor desc) throws IOException {
        if (!desc.isLatestVersion)
            // TODO: streaming between different versions will fail: need support for
            // recovering other versions to provide a stable streaming api
            throw new RuntimeException(String.format("Cannot recover SSTable with version %s (current version %s).",
                    desc.version, Descriptor.CURRENT_VERSION));

        maybeRecover(desc);
        return SSTableReader.open(rename(desc));
    }

    /**
     * Encapsulates writing the index and filter for an SSTable. The state of this object is not valid until it has been closed.
     */
    static class IndexWriter {
        private final BufferedRandomAccessFile indexFile;
        public final Descriptor desc;
        public final IPartitioner partitioner;
        public final SegmentedFile.Builder builder;
        public final IndexSummary summary;
        public final BloomFilter bf;

        IndexWriter(Descriptor desc, IPartitioner part, long keyCount) throws IOException {
            this.desc = desc;
            this.partitioner = part;
            indexFile = new BufferedRandomAccessFile(desc.filenameFor(SSTable.COMPONENT_INDEX), "rw",
                    8 * 1024 * 1024);
            builder = SegmentedFile.getBuilder(DatabaseDescriptor.getIndexAccessMode());
            summary = new IndexSummary();
            bf = BloomFilter.getFilter(keyCount, 15);
        }

        public void afterAppend(DecoratedKey key, long dataPosition) throws IOException {
            bf.add(key.key);
            long indexPosition = indexFile.getFilePointer();
            FBUtilities.writeShortByteArray(key.key, indexFile);
            indexFile.writeLong(dataPosition);
            if (logger.isTraceEnabled())
                logger.trace("wrote index of " + key + " at " + indexPosition);

            summary.maybeAddEntry(key, indexPosition);
            builder.addPotentialBoundary(indexPosition);
        }

        /**
         * Closes the index and bloomfilter, making the public state of this writer valid for consumption.
         */
        public void close() throws IOException {
            // bloom filter
            FileOutputStream fos = new FileOutputStream(desc.filenameFor(SSTable.COMPONENT_FILTER));
            DataOutputStream stream = new DataOutputStream(fos);
            BloomFilter.serializer().serialize(bf, stream);
            stream.flush();
            fos.getFD().sync();
            stream.close();

            // index
            indexFile.getChannel().force(true);
            indexFile.close();

            // finalize in-memory index state
            summary.complete();
        }
    }
}