com.uber.hoodie.common.util.CompactionUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.uber.hoodie.common.util.CompactionUtils.java

Source

/*
 *  Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.uber.hoodie.common.util;

import com.uber.hoodie.avro.model.HoodieCompactionOperation;
import com.uber.hoodie.avro.model.HoodieCompactionPlan;
import com.uber.hoodie.common.model.CompactionOperation;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.exception.HoodieException;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.Pair;

/**
 * Helper class to generate compaction plan from FileGroup/FileSlice abstraction
 */
public class CompactionUtils {

    /**
     * Generate compaction operation from file-slice
     *
     * @param partitionPath          Partition path
     * @param fileSlice              File Slice
     * @param metricsCaptureFunction Metrics Capture function
     * @return Compaction Operation
     */
    public static HoodieCompactionOperation buildFromFileSlice(String partitionPath, FileSlice fileSlice,
            Optional<Function<Pair<String, FileSlice>, Map<String, Double>>> metricsCaptureFunction) {
        HoodieCompactionOperation.Builder builder = HoodieCompactionOperation.newBuilder();
        builder.setPartitionPath(partitionPath);
        builder.setFileId(fileSlice.getFileId());
        builder.setBaseInstantTime(fileSlice.getBaseInstantTime());
        builder.setDeltaFilePaths(
                fileSlice.getLogFiles().map(lf -> lf.getPath().toString()).collect(Collectors.toList()));
        if (fileSlice.getDataFile().isPresent()) {
            builder.setDataFilePath(fileSlice.getDataFile().get().getPath());
        }

        if (metricsCaptureFunction.isPresent()) {
            builder.setMetrics(metricsCaptureFunction.get().apply(Pair.of(partitionPath, fileSlice)));
        }
        return builder.build();
    }

    /**
     * Generate compaction plan from file-slices
     *
     * @param partitionFileSlicePairs list of partition file-slice pairs
     * @param extraMetadata           Extra Metadata
     * @param metricsCaptureFunction  Metrics Capture function
     */
    public static HoodieCompactionPlan buildFromFileSlices(List<Pair<String, FileSlice>> partitionFileSlicePairs,
            Optional<Map<String, String>> extraMetadata,
            Optional<Function<Pair<String, FileSlice>, Map<String, Double>>> metricsCaptureFunction) {
        HoodieCompactionPlan.Builder builder = HoodieCompactionPlan.newBuilder();
        extraMetadata.ifPresent(m -> builder.setExtraMetadata(m));
        builder.setOperations(partitionFileSlicePairs.stream()
                .map(pfPair -> buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction))
                .collect(Collectors.toList()));
        return builder.build();
    }

    /**
     * Build Avro generated Compaction operation payload from compaction operation POJO for serialization
     */
    public static HoodieCompactionOperation buildHoodieCompactionOperation(CompactionOperation op) {
        return HoodieCompactionOperation.newBuilder().setFileId(op.getFileId())
                .setBaseInstantTime(op.getBaseInstantTime()).setPartitionPath(op.getPartitionPath())
                .setDataFilePath(op.getDataFilePath().isPresent() ? op.getDataFilePath().get() : null)
                .setDeltaFilePaths(op.getDeltaFilePaths()).setMetrics(op.getMetrics()).build();
    }

    /**
     * Build Compaction operation payload from Avro version for using in Spark executors
     *
     * @param hc HoodieCompactionOperation
     */
    public static CompactionOperation buildCompactionOperation(HoodieCompactionOperation hc) {
        return CompactionOperation.convertFromAvroRecordInstance(hc);
    }

    /**
     * Get all pending compaction plans along with their instants
     *
     * @param metaClient Hoodie Meta Client
     */
    public static List<Pair<HoodieInstant, HoodieCompactionPlan>> getAllPendingCompactionPlans(
            HoodieTableMetaClient metaClient) {
        List<HoodieInstant> pendingCompactionInstants = metaClient.getActiveTimeline()
                .filterPendingCompactionTimeline().getInstants().collect(Collectors.toList());
        return pendingCompactionInstants.stream().map(instant -> {
            try {
                HoodieCompactionPlan compactionPlan = AvroUtils
                        .deserializeCompactionPlan(metaClient.getActiveTimeline()
                                .getInstantAuxiliaryDetails(
                                        HoodieTimeline.getCompactionRequestedInstant(instant.getTimestamp()))
                                .get());
                return Pair.of(instant, compactionPlan);
            } catch (IOException e) {
                throw new HoodieException(e);
            }
        }).collect(Collectors.toList());
    }

    /**
     * Get all file-ids with pending Compaction operations and their target compaction instant time
     *
     * @param metaClient Hoodie Table Meta Client
     */
    public static Map<String, Pair<String, HoodieCompactionOperation>> getAllPendingCompactionOperations(
            HoodieTableMetaClient metaClient) {
        List<Pair<HoodieInstant, HoodieCompactionPlan>> pendingCompactionPlanWithInstants = getAllPendingCompactionPlans(
                metaClient);

        Map<String, Pair<String, HoodieCompactionOperation>> fileIdToPendingCompactionWithInstantMap = new HashMap<>();
        pendingCompactionPlanWithInstants.stream().flatMap(instantPlanPair -> {
            HoodieInstant instant = instantPlanPair.getKey();
            HoodieCompactionPlan compactionPlan = instantPlanPair.getValue();
            List<HoodieCompactionOperation> ops = compactionPlan.getOperations();
            if (null != ops) {
                return ops.stream().map(op -> {
                    return Pair.of(op.getFileId(), Pair.of(instant.getTimestamp(), op));
                });
            } else {
                return Stream.empty();
            }
        }).forEach(pair -> {
            // Defensive check to ensure a single-fileId does not have more than one pending compaction
            if (fileIdToPendingCompactionWithInstantMap.containsKey(pair.getKey())) {
                String msg = "Hoodie File Id (" + pair.getKey()
                        + ") has more thant 1 pending compactions. Instants: " + pair.getValue() + ", "
                        + fileIdToPendingCompactionWithInstantMap.get(pair.getKey());
                throw new IllegalStateException(msg);
            }
            fileIdToPendingCompactionWithInstantMap.put(pair.getKey(), pair.getValue());
        });
        return fileIdToPendingCompactionWithInstantMap;
    }
}