org.apache.jackrabbit.oak.plugins.segment.SegmentDataStoreBlobGCIT.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.jackrabbit.oak.plugins.segment.SegmentDataStoreBlobGCIT.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.plugins.segment;

import static org.apache.commons.io.FileUtils.byteCountToDisplaySize;
import static org.apache.jackrabbit.oak.commons.FixturesHelper.Fixture.SEGMENT_MK;
import static org.apache.jackrabbit.oak.commons.FixturesHelper.getFixtures;
import static org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType.REPOSITORY;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assume.assumeTrue;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.blob.GarbageCollectorFileState;
import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
import org.apache.jackrabbit.oak.plugins.blob.SharedDataStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils;
import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
import org.apache.jackrabbit.oak.plugins.identifier.ClusterRepositoryInfo;
import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
import org.apache.jackrabbit.oak.plugins.segment.file.FileStore;
import org.apache.jackrabbit.oak.spi.blob.BlobStore;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Tests for SegmentNodeStore DataStore GC
 */
@RunWith(Parameterized.class)
public class SegmentDataStoreBlobGCIT {
    private static final Logger log = LoggerFactory.getLogger(SegmentDataStoreBlobGCIT.class);

    private final boolean usePersistedMap;

    SegmentNodeStore nodeStore;
    FileStore store;
    DataStoreBlobStore blobStore;
    Date startDate;

    @Rule
    public TemporaryFolder folder = new TemporaryFolder(new File("target"));

    @BeforeClass
    public static void assumptions() {
        assumeTrue(getFixtures().contains(SEGMENT_MK));
    }

    @Parameterized.Parameters
    public static List<Boolean[]> fixtures() {
        return ImmutableList.of(new Boolean[] { true }, new Boolean[] { false });
    }

    public SegmentDataStoreBlobGCIT(boolean usePersistedMap) {
        this.usePersistedMap = usePersistedMap;
    }

    protected SegmentNodeStore getNodeStore(BlobStore blobStore) throws IOException {
        if (nodeStore == null) {
            FileStore.Builder builder = FileStore.builder(getWorkDir()).withBlobStore(blobStore)
                    .withMaxFileSize(256).withCacheSize(64).withMemoryMapping(false);
            store = builder.build();
            CompactionStrategy compactionStrategy = new CompactionStrategy(false, true,
                    CompactionStrategy.CleanupType.CLEAN_OLD, 0, CompactionStrategy.MEMORY_THRESHOLD_DEFAULT) {
                @Override
                public boolean compacted(@Nonnull Callable<Boolean> setHead) throws Exception {
                    return setHead.call();
                }
            };
            compactionStrategy.setPersistCompactionMap(usePersistedMap);
            store.setCompactionStrategy(compactionStrategy);
            nodeStore = SegmentNodeStore.builder(store).build();
        }
        return nodeStore;
    }

    private File getWorkDir() {
        return folder.getRoot();
    }

    public DataStoreState setUp() throws Exception {
        if (blobStore == null) {
            blobStore = DataStoreUtils.getBlobStore(folder.newFolder());
        }
        nodeStore = getNodeStore(blobStore);
        startDate = new Date();

        NodeBuilder a = nodeStore.getRoot().builder();

        /* Create garbage by creating in-lined blobs (size < 16KB) */
        int number = 4000;
        NodeBuilder content = a.child("content");
        for (int i = 0; i < number; i++) {
            NodeBuilder c = content.child("x" + i);
            for (int j = 0; j < 5; j++) {
                c.setProperty("p" + j, nodeStore.createBlob(randomStream(j, 16384)));
            }
        }
        nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);

        final long dataSize = store.size();
        log.info("File store dataSize {}", byteCountToDisplaySize(dataSize));

        // 2. Now remove the nodes to generate garbage
        content = a.child("content");
        for (int i = 0; i < 2000; i++) {
            NodeBuilder c = content.child("x" + i);
            for (int j = 0; j < 5; j++) {
                c.removeProperty("p" + j);
            }
        }
        nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);

        /* Create and delete nodes with blobs stored in DS*/
        int maxDeleted = 5;
        int numBlobs = 10;
        List<Integer> processed = Lists.newArrayList();
        Random rand = new Random();
        for (int i = 0; i < maxDeleted; i++) {
            int n = rand.nextInt(numBlobs);
            if (!processed.contains(n)) {
                processed.add(n);
            }
        }

        DataStoreState state = new DataStoreState();
        for (int i = 0; i < numBlobs; i++) {
            SegmentBlob b = (SegmentBlob) nodeStore.createBlob(randomStream(i, 18342));
            Iterator<String> idIter = blobStore.resolveChunks(b.getBlobId());
            while (idIter.hasNext()) {
                String chunk = idIter.next();
                state.blobsAdded.add(chunk);
                if (!processed.contains(i)) {
                    state.blobsPresent.add(chunk);
                }
            }
            a.child("c" + i).setProperty("x", b);
        }

        nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
        log.info("Created blobs : {}", state.blobsAdded.size());

        for (int id : processed) {
            delete("c" + id);
        }
        log.info("Deleted nodes : {}", processed.size());

        // Sleep a little to make eligible for cleanup
        TimeUnit.MILLISECONDS.sleep(5);
        store.maybeCompact(false);
        store.cleanup();

        return state;
    }

    private HashSet<String> addInlined() throws Exception {
        HashSet<String> set = new HashSet<String>();
        NodeBuilder a = nodeStore.getRoot().builder();
        int number = 4;
        for (int i = 0; i < number; i++) {
            Blob b = nodeStore.createBlob(randomStream(i, 16514));
            a.child("cinline" + i).setProperty("x", b);
        }
        nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
        return set;
    }

    private HashSet<String> addNodeSpecialChars() throws Exception {
        List<String> specialCharSets = Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg");
        HashSet<String> set = new HashSet<String>();
        NodeBuilder a = nodeStore.getRoot().builder();
        for (int i = 0; i < specialCharSets.size(); i++) {
            SegmentBlob b = (SegmentBlob) nodeStore.createBlob(randomStream(i, 18432));
            NodeBuilder n = a.child("cspecial");
            n.child(specialCharSets.get(i)).setProperty("x", b);
            Iterator<String> idIter = blobStore.resolveChunks(b.getBlobId());
            set.addAll(Lists.newArrayList(idIter));
        }
        nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
        return set;
    }

    private class DataStoreState {
        Set<String> blobsAdded = Sets.newHashSet();
        Set<String> blobsPresent = Sets.newHashSet();
    }

    private void delete(String nodeId) throws CommitFailedException {
        NodeBuilder builder = nodeStore.getRoot().builder();
        builder.child(nodeId).remove();

        nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
    }

    @Test
    public void gc() throws Exception {
        DataStoreState state = setUp();
        log.info("{} blobs that should remain after gc : {}", state.blobsPresent.size(), state.blobsPresent);
        log.info("{} blobs for nodes which are deleted : {}", state.blobsPresent.size(), state.blobsPresent);
        Set<String> existingAfterGC = gcInternal(0);
        assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());
    }

    @Test
    public void noGc() throws Exception {
        DataStoreState state = setUp();
        log.info("{} blobs that should remain after gc : {}", state.blobsAdded.size(), state.blobsAdded);
        log.info("{} blobs for nodes which are deleted : {}", state.blobsPresent.size(), state.blobsPresent);
        Set<String> existingAfterGC = gcInternal(86400);
        assertTrue(Sets.symmetricDifference(state.blobsAdded, existingAfterGC).isEmpty());
    }

    @Test
    public void gcSpecialChar() throws Exception {
        DataStoreState state = setUp();
        Set<String> specialCharNodeBlobs = addNodeSpecialChars();
        state.blobsAdded.addAll(specialCharNodeBlobs);
        state.blobsPresent.addAll(specialCharNodeBlobs);
        Set<String> existingAfterGC = gcInternal(0);
        assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());
    }

    @Test
    public void consistencyCheckInit() throws Exception {
        DataStoreState state = setUp();
        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
        MarkSweepGarbageCollector gcObj = init(86400, executor);
        long candidates = gcObj.checkConsistency();
        assertEquals(1, executor.getTaskCount());
        assertEquals(0, candidates);
    }

    @Test
    public void consistencyCheckWithGc() throws Exception {
        DataStoreState state = setUp();
        Set<String> existingAfterGC = gcInternal(0);
        assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());

        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
        MarkSweepGarbageCollector gcObj = init(86400, executor);
        long candidates = gcObj.checkConsistency();
        assertEquals(1, executor.getTaskCount());
        assertEquals(0, candidates);
    }

    @Test
    public void consistencyCheckWithRenegadeDelete() throws Exception {
        DataStoreState state = setUp();

        // Simulate faulty state by deleting some blobs directly
        Random rand = new Random(87);
        List<String> existing = Lists.newArrayList(state.blobsPresent);

        long count = blobStore.countDeleteChunks(ImmutableList.of(existing.get(rand.nextInt(existing.size()))), 0);

        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
        MarkSweepGarbageCollector gcObj = init(86400, executor);
        long candidates = gcObj.checkConsistency();
        assertEquals(1, executor.getTaskCount());
        assertEquals(count, candidates);
    }

    @Test
    public void gcLongRunningBlobCollection() throws Exception {
        DataStoreState state = setUp();
        log.info("{} Blobs added {}", state.blobsAdded.size(), state.blobsAdded);
        log.info("{} Blobs should be present {}", state.blobsPresent.size(), state.blobsPresent);

        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
        String repoId = null;
        if (SharedDataStoreUtils.isShared(store.getBlobStore())) {
            repoId = ClusterRepositoryInfo.getOrCreateId(nodeStore);
            ((SharedDataStore) store.getBlobStore()).addMetadataRecord(new ByteArrayInputStream(new byte[0]),
                    REPOSITORY.getNameFromId(repoId));
        }
        TestGarbageCollector gc = new TestGarbageCollector(new SegmentBlobReferenceRetriever(store.getTracker()),
                (GarbageCollectableBlobStore) store.getBlobStore(), executor, folder.newFolder().getAbsolutePath(),
                5, 5000, repoId);
        gc.collectGarbage(false);
        Set<String> existingAfterGC = iterate();
        log.info("{} Blobs existing after gc {}", existingAfterGC.size(), existingAfterGC);

        assertTrue(Sets.difference(state.blobsPresent, existingAfterGC).isEmpty());
        assertEquals(gc.additionalBlobs, Sets.symmetricDifference(state.blobsPresent, existingAfterGC));
    }

    @Test
    public void gcWithInlined() throws Exception {
        blobStore = new DataStoreBlobStore(DataStoreUtils.createFDS(new File(getWorkDir(), "datastore"), 16516));
        DataStoreState state = setUp();
        addInlined();
        log.info("{} blobs that should remain after gc : {}", state.blobsAdded.size(), state.blobsAdded);
        log.info("{} blobs for nodes which are deleted : {}", state.blobsPresent.size(), state.blobsPresent);
        Set<String> existingAfterGC = gcInternal(0);
        assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());
    }

    @Test
    public void consistencyCheckInlined() throws Exception {
        blobStore = new DataStoreBlobStore(DataStoreUtils.createFDS(new File(getWorkDir(), "datastore"), 16516));
        DataStoreState state = setUp();
        addInlined();
        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
        MarkSweepGarbageCollector gcObj = init(86400, executor);
        long candidates = gcObj.checkConsistency();
        assertEquals(1, executor.getTaskCount());
        assertEquals(0, candidates);
    }

    private Set<String> gcInternal(long maxBlobGcInSecs) throws Exception {
        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
        MarkSweepGarbageCollector gc = init(maxBlobGcInSecs, executor);
        gc.collectGarbage(false);

        assertEquals(0, executor.getTaskCount());
        Set<String> existingAfterGC = iterate();
        log.info("{} blobs existing after gc : {}", existingAfterGC.size(), existingAfterGC);
        return existingAfterGC;
    }

    private MarkSweepGarbageCollector init(long blobGcMaxAgeInSecs, ThreadPoolExecutor executor) throws Exception {
        String repoId = null;
        if (SharedDataStoreUtils.isShared(store.getBlobStore())) {
            repoId = ClusterRepositoryInfo.getOrCreateId(nodeStore);
            ((SharedDataStore) store.getBlobStore()).addMetadataRecord(new ByteArrayInputStream(new byte[0]),
                    REPOSITORY.getNameFromId(repoId));
        }
        MarkSweepGarbageCollector gc = new MarkSweepGarbageCollector(
                new SegmentBlobReferenceRetriever(store.getTracker()),
                (GarbageCollectableBlobStore) store.getBlobStore(), executor, folder.newFolder().getAbsolutePath(),
                2048, blobGcMaxAgeInSecs, repoId);
        return gc;
    }

    protected Set<String> iterate() throws Exception {
        Iterator<String> cur = blobStore.getAllChunkIds(0);

        Set<String> existing = Sets.newHashSet();
        while (cur.hasNext()) {
            existing.add(cur.next());
        }
        return existing;
    }

    @After
    public void close() throws Exception {
        if (store != null) {
            store.close();
        }
        DataStoreUtils.cleanup(blobStore.getDataStore(), startDate);
    }

    static InputStream randomStream(int seed, int size) {
        Random r = new Random(seed);
        byte[] data = new byte[size];
        r.nextBytes(data);
        return new ByteArrayInputStream(data);
    }

    /**
    * Waits for some time and adds additional blobs after blob referenced identified to simulate
    * long running blob id collection phase.
     */
    class TestGarbageCollector extends MarkSweepGarbageCollector {
        long maxLastModifiedInterval;
        String root;
        GarbageCollectableBlobStore blobStore;
        Set<String> additionalBlobs;

        public TestGarbageCollector(BlobReferenceRetriever marker, GarbageCollectableBlobStore blobStore,
                Executor executor, String root, int batchCount, long maxLastModifiedInterval,
                @Nullable String repositoryId) throws IOException {
            super(marker, blobStore, executor, root, batchCount, maxLastModifiedInterval, repositoryId);
            this.root = root;
            this.blobStore = blobStore;
            this.maxLastModifiedInterval = maxLastModifiedInterval;
            this.additionalBlobs = Sets.newHashSet();
        }

        @Override
        protected void markAndSweep(boolean markOnly) throws Exception {
            boolean threw = true;
            GarbageCollectorFileState fs = new GarbageCollectorFileState(root);
            try {
                Stopwatch sw = Stopwatch.createStarted();
                LOG.info("Starting Test Blob garbage collection");

                // Sleep a little more than the max interval to get over the interval for valid blobs
                Thread.sleep(maxLastModifiedInterval + 100);
                LOG.info("Slept {} to make blobs old", maxLastModifiedInterval + 100);

                long markStart = System.currentTimeMillis();
                mark(fs);
                LOG.info("Mark finished");

                additionalBlobs = createAdditional();

                if (!markOnly) {
                    Thread.sleep(maxLastModifiedInterval + 100);
                    LOG.info("Slept {} to make additional blobs old", maxLastModifiedInterval + 100);

                    long deleteCount = sweep(fs, markStart);
                    threw = false;

                    LOG.info("Blob garbage collection completed in {}. Number of blobs deleted [{}]", sw.toString(),
                            deleteCount, maxLastModifiedInterval);
                }
            } finally {
                if (!LOG.isTraceEnabled()) {
                    Closeables.close(fs, threw);
                }
            }
        }

        public HashSet<String> createAdditional() throws Exception {
            HashSet<String> blobSet = new HashSet<String>();
            NodeBuilder a = nodeStore.getRoot().builder();
            int number = 5;
            for (int i = 0; i < number; i++) {
                SegmentBlob b = (SegmentBlob) nodeStore.createBlob(randomStream(100 + i, 16516));
                a.child("cafter" + i).setProperty("x", b);
                Iterator<String> idIter = ((GarbageCollectableBlobStore) blobStore).resolveChunks(b.getBlobId());
                while (idIter.hasNext()) {
                    String chunk = idIter.next();
                    blobSet.add(chunk);
                }
            }
            log.info("{} Additional created {}", blobSet.size(), blobSet);

            nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
            return blobSet;
        }
    }
}