org.stem.db.compaction.CompactionManager.java Source code

Java tutorial

Introduction

Here is the source code for org.stem.db.compaction.CompactionManager.java

Source

/*
 * Copyright 2014 Alexey Plotnik
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.stem.db.compaction;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.stem.client.MetaStoreClient;
import org.stem.db.*;
import org.stem.domain.BlobDescriptor;
import org.stem.domain.ExtendedBlobDescriptor;
import org.stem.transport.ops.WriteBlobMessage;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Queue;

public class CompactionManager {

    private static final Logger logger = LoggerFactory.getLogger(CompactionManager.class);

    public static final CompactionManager instance;
    public static final String TMP_DIR = System.getProperty("java.io.tmpdir"); // TODO: move to StorageNodeDescriptor
    private float threshold = StorageNodeDescriptor.getCompactionThreshold();
    private static MetaStoreClient client;

    static {
        instance = new CompactionManager();
    }

    public CompactionManager() {
        client = new MetaStoreClient(StorageNodeDescriptor.cluster().getMetaStoreContactPoints());
        client.start(); // TODO: close ?
    }

    public void performMajorCompaction() {
        try {
            Collection<MountPoint> mountPoints = Layout.getInstance().getMountPoints().values();
            for (MountPoint mp : mountPoints) {
                performSinglePassCompaction(mp); // TODO: every disk should be compacted in a separate thread
            }
        } catch (Exception e) {
            throw new RuntimeException("Compaction was stopped unexpectedly", e);
        }
    }

    // TODO: update DataTracker
    private void performSinglePassCompaction(MountPoint mp) throws IOException {
        // TODO: lock?
        if (!exceedThreshold(mp))
            return;

        // Get FULL files ready for compaction
        Collection<FatFile> scanReadyFFs = mp.findReadyForCompaction();

        if (!exceedCandidatesThreshold(scanReadyFFs))
            return;

        Queue<FatFile> originalFFs = new LinkedList<FatFile>();

        FatFile temporaryFF = null;
        int iterated = 0;
        int omitted = 0;
        for (FatFile currentFF : scanReadyFFs) {
            FFScanner scanner = new FFScanner(currentFF);

            while (scanner.hasNext()) {
                iterated += 1;
                Blob blob = scanner.next();
                String blobKey = Hex.encodeHexString(blob.key());
                if (blob.deleted()) {
                    omitted += 1;
                    mp.getDataTracker().removeDeletes(blob.key(), blob.size(), currentFF.id);
                    logger.info("key 0x{} omitted as deleted", Hex.encodeHexString(blob.key()));
                    continue;
                }

                ExtendedBlobDescriptor localDescriptor = new ExtendedBlobDescriptor(blob.key(), blob.size(),
                        mp.uuid, blob.getDescriptor());
                ExtendedBlobDescriptor remoteDescriptor = client.readMeta(blob.key(), mp.uuid);
                if (null == remoteDescriptor) {
                    omitted += 1;
                    logger.info("key 0x{} omitted as no meta info", Hex.encodeHexString(blob.key()));
                    continue;
                }
                // As we eventual consistent then: if blob.hasInvalidOffset -> continue
                if (!descriptorsAreConsistent(localDescriptor, remoteDescriptor)) {
                    logger.info("key 0x{} omitted as inconsistent meta", Hex.encodeHexString(blob.key()));
                    continue;
                }

                if (null == temporaryFF) {
                    temporaryFF = createTemporaryFF(currentFF.id);
                }

                if (temporaryFF.hasSpaceFor(blob)) {
                    BlobDescriptor descriptor = temporaryFF.writeBlob(blob); // TODO: hold descriptors for a subsequent MetaStore updates
                    logger.info("key 0x{} is written to temporaryFF", Hex.encodeHexString(blob.key()));
                    continue;
                }

                // If we are here then we can't write blob to temporary file because the temporaryFF is full

                // mark temporaryFF FULL
                temporaryFF.writeIndex();
                temporaryFF.writeFullMarker();

                // Replace original FF with temporary FF
                FatFile originalFF = originalFFs.poll();
                replaceFF(originalFF, temporaryFF);
                updateMeta(originalFF, mp);
                markAllOriginalFFsAsBlank(originalFFs, mp);

                temporaryFF.close(); // TODO: this must be strictly synchronized
                FileUtils.forceDelete(new File(temporaryFF.getPath())); // remove file
                temporaryFF = null;

                // Once temporary file exceeded its capacity create another one
                temporaryFF = createTemporaryFF(currentFF.id);
                // And write blob to it
                BlobDescriptor descriptor = temporaryFF.writeBlob(blob); // TODO: hold descriptors for a subsequent MetaStore updates
            }

            originalFFs.add(currentFF); // When compaction finish this file would be marked as BLANK
        }

        // All candidates are iterated
        // Write the rest of TMP FatFile to StorageNode as usual and mark iterated FFs as BLANK
        if (null != temporaryFF) {
            FFScanner scanner = new FFScanner(temporaryFF);
            int restBlobs = 0;
            while (scanner.hasNext()) {

                restBlobs += 1;
                Blob blob = scanner.next();
                WriteBlobMessage message = new WriteBlobMessage(mp.uuid, blob.key(), blob.data());// TODO: direct access to fields?
                mp.getDataTracker().remove(blob.key(), blob.size());

                // TODO: too heterogeneous. Should be Blob.Descriptor or something like that
                StorageService.instance.write(message);
                logger.info("key 0x{} moved", Hex.encodeHexString(blob.key()));
            }
            temporaryFF.close();
            FileUtils.forceDelete(new File(temporaryFF.getPath())); // remove file
            temporaryFF = null;
        }

        // Mark the rest of files as BLANK
        markAllOriginalFFsAsBlank(originalFFs, mp);
        if (null != temporaryFF) {
            FileUtils.forceDelete(new File(temporaryFF.getPath())); // remove file
            temporaryFF = null;
        }

        // TODO: delete temporary file
    }

    private static boolean descriptorsAreConsistent(ExtendedBlobDescriptor local, ExtendedBlobDescriptor remote) {
        return local.getDisk().equals(remote.getDisk()) & local.getBodyOffset() == remote.getBodyOffset()
                & local.getLength() == remote.getLength();
    }

    private static void markAllOriginalFFsAsBlank(Queue<FatFile> originalFFs, MountPoint mp) throws IOException {
        while (!originalFFs.isEmpty()) {
            FatFile ff = originalFFs.poll();
            ff.reallocate();
            StorageService.instance.submitFF(ff, mp); // TODO: inside we must re-count DataTracker
        }
    }

    private static boolean exceedThreshold(MountPoint mp) {
        long deletesSizeInBytes = mp.getDataTracker().getDeletesSizeInBytes();
        long totalSizeInBytes = mp.getDataTracker().getTotalSizeInBytes();

        float ratio = (float) deletesSizeInBytes / totalSizeInBytes; // TODO: zero check

        if (ratio < StorageNodeDescriptor.getCompactionThreshold())
            return false;

        return true;
    }

    private boolean exceedCandidatesThreshold(Collection<FatFile> candidates) {
        if (candidates.isEmpty())
            return false;

        if (candidates.size() < 2)
            return false; // Yes, we have files ready for compaction, with a single file we have nothing to do

        return true;
    }

    private static void replaceFF(FatFile original, FatFile replacement) throws IOException {
        assert original.size() == replacement.size();
        FileOutputStream out = new FileOutputStream(original.getPath());
        FileInputStream in = new FileInputStream(replacement.getPath());
        IOUtils.copy(in, out);
        in.close();
        out.close();
    }

    // TODO: mount point should be extracted from FatFile instance because the last one is attached to the first one
    private static void updateMeta(FatFile original, MountPoint mp) {
        // Update meta
        FFExtendedScanner scanner = new FFExtendedScanner(original);
        while (scanner.hasNext()) {
            ExtendedBlobDescriptor d = scanner.next();
            d.setDisk(mp.uuid);

            client.updateMeta(d.getKey(), d.getDisk(), original.id, d.getBodyOffset(), d.getLength());
        }
    }

    private static FatFile createTemporaryFF(int id) throws IOException {
        String path = TMP_DIR + File.separator + FatFileAllocator.buildFileName(id);
        File file = new File(path);
        FileUtils.deleteQuietly(file);
        FatFile ff = FatFileAllocator.create(path, StorageNodeDescriptor.getFatFileSizeInMb());// TODO: is file exists

        //if (0 == ff.getPointer())  // This line and the same line in WriteController must be encapsulated in FatFile class
        ff.markActive();
        return ff;
    }
}