com.uber.hoodie.io.strategy.TestHoodieCompactionStrategy.java Source code

Java tutorial

Introduction

Here is the source code for com.uber.hoodie.io.strategy.TestHoodieCompactionStrategy.java

Source

/*
 *  Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.uber.hoodie.io.strategy;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import com.beust.jcommander.internal.Lists;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.uber.hoodie.avro.model.HoodieCompactionOperation;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieLogFile;
import com.uber.hoodie.config.HoodieCompactionConfig;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.io.compact.strategy.BoundedIOCompactionStrategy;
import com.uber.hoodie.io.compact.strategy.DayBasedCompactionStrategy;
import com.uber.hoodie.io.compact.strategy.LogFileSizeBasedCompactionStrategy;
import com.uber.hoodie.io.compact.strategy.UnBoundedCompactionStrategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.Assert;
import org.junit.Test;

public class TestHoodieCompactionStrategy {

    private static final long MB = 1024 * 1024L;
    private String[] partitionPaths = { "2017/01/01", "2017/01/02", "2017/01/03" };

    @Test
    public void testUnBounded() {
        Map<Long, List<Long>> sizesMap = Maps.newHashMap();
        sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB));
        sizesMap.put(110 * MB, Lists.newArrayList());
        sizesMap.put(100 * MB, Lists.newArrayList(MB));
        sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB));
        UnBoundedCompactionStrategy strategy = new UnBoundedCompactionStrategy();
        HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp")
                .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).build())
                .build();
        List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
        List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations,
                new ArrayList<>());
        assertEquals("UnBounded should not re-order or filter", operations, returned);
    }

    @Test
    public void testBoundedIOSimple() {
        Map<Long, List<Long>> sizesMap = Maps.newHashMap();
        sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB));
        sizesMap.put(110 * MB, Lists.newArrayList());
        sizesMap.put(100 * MB, Lists.newArrayList(MB));
        sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB));
        BoundedIOCompactionStrategy strategy = new BoundedIOCompactionStrategy();
        HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp")
                .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy)
                        .withTargetIOPerCompactionInMB(400).build())
                .build();
        List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
        List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations,
                new ArrayList<>());

        assertTrue("BoundedIOCompaction should have resulted in fewer compactions",
                returned.size() < operations.size());
        assertEquals("BoundedIOCompaction should have resulted in 2 compactions being chosen", 2, returned.size());
        // Total size of all the log files
        Long returnedSize = returned.stream().map(s -> s.getMetrics().get(BoundedIOCompactionStrategy.TOTAL_IO_MB))
                .map(s -> s.longValue()).reduce((size1, size2) -> size1 + size2).orElse(0L);
        assertEquals("Should chose the first 2 compactions which should result in a total IO of 690 MB", 610,
                (long) returnedSize);
    }

    @Test
    public void testLogFileSizeCompactionSimple() {
        Map<Long, List<Long>> sizesMap = Maps.newHashMap();
        sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB));
        sizesMap.put(110 * MB, Lists.newArrayList());
        sizesMap.put(100 * MB, Lists.newArrayList(MB));
        sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB));
        LogFileSizeBasedCompactionStrategy strategy = new LogFileSizeBasedCompactionStrategy();
        HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp")
                .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy)
                        .withTargetIOPerCompactionInMB(400).build())
                .build();
        List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
        List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations,
                new ArrayList<>());

        assertTrue("LogFileSizeBasedCompactionStrategy should have resulted in fewer compactions",
                returned.size() < operations.size());
        assertEquals("LogFileSizeBasedCompactionStrategy should have resulted in 1 compaction", 1, returned.size());
        // Total size of all the log files
        Long returnedSize = returned.stream().map(s -> s.getMetrics().get(BoundedIOCompactionStrategy.TOTAL_IO_MB))
                .map(s -> s.longValue()).reduce((size1, size2) -> size1 + size2).orElse(0L);
        assertEquals("Should chose the first 2 compactions which should result in a total IO of 690 MB", 1204,
                (long) returnedSize);
    }

    @Test
    public void testPartitionAwareCompactionSimple() {
        Map<Long, List<Long>> sizesMap = Maps.newHashMap();
        sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB));
        sizesMap.put(110 * MB, Lists.newArrayList());
        sizesMap.put(100 * MB, Lists.newArrayList(MB));
        sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB));

        Map<Long, String> keyToPartitionMap = new ImmutableMap.Builder().put(120 * MB, partitionPaths[2])
                .put(110 * MB, partitionPaths[2]).put(100 * MB, partitionPaths[1]).put(90 * MB, partitionPaths[0])
                .build();

        DayBasedCompactionStrategy strategy = new DayBasedCompactionStrategy();
        HoodieWriteConfig writeConfig = HoodieWriteConfig
                .newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder()
                        .withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(1).build())
                .build();
        List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap,
                keyToPartitionMap);
        List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations,
                new ArrayList<>());

        assertTrue("DayBasedCompactionStrategy should have resulted in fewer compactions",
                returned.size() < operations.size());
        Assert.assertEquals("DayBasedCompactionStrategy should have resulted in fewer compactions", returned.size(),
                2);

        int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(),
                returned.get(0).getPartitionPath());
        // Either the partition paths are sorted in descending order or they are equal
        assertTrue("DayBasedCompactionStrategy should sort partitions in descending order", comparision >= 0);
    }

    private List<HoodieCompactionOperation> createCompactionOperations(HoodieWriteConfig config,
            Map<Long, List<Long>> sizesMap) {
        Map<Long, String> keyToParitionMap = sizesMap.entrySet().stream()
                .map(e -> Pair.of(e.getKey(), partitionPaths[new Random().nextInt(partitionPaths.length - 1)]))
                .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
        return createCompactionOperations(config, sizesMap, keyToParitionMap);
    }

    private List<HoodieCompactionOperation> createCompactionOperations(HoodieWriteConfig config,
            Map<Long, List<Long>> sizesMap, Map<Long, String> keyToPartitionMap) {
        List<HoodieCompactionOperation> operations = Lists.newArrayList(sizesMap.size());

        sizesMap.forEach((k, v) -> {
            HoodieDataFile df = TestHoodieDataFile.newDataFile(k);
            String partitionPath = keyToPartitionMap.get(k);
            List<HoodieLogFile> logFiles = v.stream().map(TestHoodieLogFile::newLogFile)
                    .collect(Collectors.toList());
            operations.add(new HoodieCompactionOperation(df.getCommitTime(),
                    logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList()), df.getPath(),
                    df.getFileId(), partitionPath, config.getCompactionStrategy().captureMetrics(config,
                            Optional.of(df), partitionPath, logFiles)));
        });
        return operations;
    }
}