com.uber.hoodie.table.HoodieCopyOnWriteTable.java Source code

Java tutorial

Introduction

Here is the source code for com.uber.hoodie.table.HoodieCopyOnWriteTable.java

Source

/*
 * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *          http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.table;

import com.google.common.collect.Maps;
import com.google.common.hash.Hashing;
import com.uber.hoodie.WriteStatus;
import com.uber.hoodie.avro.model.HoodieCompactionPlan;
import com.uber.hoodie.common.HoodieCleanStat;
import com.uber.hoodie.common.HoodieRollbackStat;
import com.uber.hoodie.common.model.HoodieCommitMetadata;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.model.HoodieWriteStat;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor;
import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.exception.HoodieException;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.HoodieNotSupportedException;
import com.uber.hoodie.exception.HoodieUpsertException;
import com.uber.hoodie.func.CopyOnWriteLazyInsertIterable;
import com.uber.hoodie.func.ParquetReaderIterator;
import com.uber.hoodie.func.SparkBoundedInMemoryExecutor;
import com.uber.hoodie.io.HoodieCleanHelper;
import com.uber.hoodie.io.HoodieCreateHandle;
import com.uber.hoodie.io.HoodieMergeHandle;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.spark.Partitioner;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import scala.Option;
import scala.Tuple2;

/**
 * Implementation of a very heavily read-optimized Hoodie Table where
 * <p>
 * INSERTS - Produce new files, block aligned to desired size (or) Merge with the smallest existing
 * file, to expand it
 * <p>
 * UPDATES - Produce a new version of the file, just replacing the updated records with new values
 */
public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends HoodieTable<T> {

    private static Logger logger = LogManager.getLogger(HoodieCopyOnWriteTable.class);

    public HoodieCopyOnWriteTable(HoodieWriteConfig config, JavaSparkContext jsc) {
        super(config, jsc);
    }

    private static PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat> deleteFilesFunc(
            HoodieTable table) {
        return (PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat>) iter -> {
            Map<String, PartitionCleanStat> partitionCleanStatMap = new HashMap<>();

            FileSystem fs = table.getMetaClient().getFs();
            while (iter.hasNext()) {
                Tuple2<String, String> partitionDelFileTuple = iter.next();
                String partitionPath = partitionDelFileTuple._1();
                String deletePathStr = partitionDelFileTuple._2();
                Boolean deletedFileResult = deleteFileAndGetResult(fs, deletePathStr);
                if (!partitionCleanStatMap.containsKey(partitionPath)) {
                    partitionCleanStatMap.put(partitionPath, new PartitionCleanStat(partitionPath));
                }
                PartitionCleanStat partitionCleanStat = partitionCleanStatMap.get(partitionPath);
                partitionCleanStat.addDeleteFilePatterns(deletePathStr);
                partitionCleanStat.addDeletedFileResult(deletePathStr, deletedFileResult);
            }

            return partitionCleanStatMap.entrySet().stream().map(e -> new Tuple2<>(e.getKey(), e.getValue()))
                    .collect(Collectors.toList()).iterator();
        };
    }

    private static PairFlatMapFunction<String, String, String> getFilesToDeleteFunc(HoodieTable table,
            HoodieWriteConfig config) {
        return (PairFlatMapFunction<String, String, String>) partitionPathToClean -> {
            HoodieCleanHelper cleaner = new HoodieCleanHelper(table, config);
            return cleaner.getDeletePaths(partitionPathToClean).stream()
                    .map(deleteFile -> new Tuple2<>(partitionPathToClean, deleteFile.toString())).iterator();
        };
    }

    private static Boolean deleteFileAndGetResult(FileSystem fs, String deletePathStr) throws IOException {
        Path deletePath = new Path(deletePathStr);
        logger.debug("Working on delete path :" + deletePath);
        boolean deleteResult = fs.delete(deletePath, false);
        if (deleteResult) {
            logger.debug("Cleaned file at path :" + deletePath);
        }
        return deleteResult;
    }

    @Override
    public Partitioner getUpsertPartitioner(WorkloadProfile profile) {
        if (profile == null) {
            throw new HoodieUpsertException("Need workload profile to construct the upsert partitioner.");
        }
        return new UpsertPartitioner(profile);
    }

    @Override
    public Partitioner getInsertPartitioner(WorkloadProfile profile) {
        return getUpsertPartitioner(profile);
    }

    @Override
    public boolean isWorkloadProfileNeeded() {
        return true;
    }

    @Override
    public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String commitTime) {
        throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table");
    }

    @Override
    public JavaRDD<WriteStatus> compact(JavaSparkContext jsc, String compactionInstantTime,
            HoodieCompactionPlan compactionPlan) {
        throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table");
    }

    public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
            Iterator<HoodieRecord<T>> recordItr) throws IOException {
        // these are updates
        HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, fileId, recordItr);
        return handleUpdateInternal(upsertHandle, commitTime, fileId);
    }

    public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
            Map<String, HoodieRecord<T>> keyToNewRecords, Optional<HoodieDataFile> dataFileOpt) throws IOException {
        // these are updates
        HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, fileId, keyToNewRecords, dataFileOpt);
        return handleUpdateInternal(upsertHandle, commitTime, fileId);
    }

    protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String commitTime,
            String fileId) throws IOException {
        if (upsertHandle.getOldFilePath() == null) {
            throw new HoodieUpsertException(
                    "Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId);
        } else {
            AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema());
            ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath())
                    .withConf(getHadoopConf()).build();
            BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
            try {
                wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader),
                        new UpdateHandler(upsertHandle), x -> x);
                wrapper.execute();
            } catch (Exception e) {
                throw new HoodieException(e);
            } finally {
                reader.close();
                upsertHandle.close();
                if (null != wrapper) {
                    wrapper.shutdownNow();
                }
            }
        }

        //TODO(vc): This needs to be revisited
        if (upsertHandle.getWriteStatus().getPartitionPath() == null) {
            logger.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
                    + upsertHandle.getWriteStatus());
        }
        return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
    }

    protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId,
            Iterator<HoodieRecord<T>> recordItr) {
        return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileId);
    }

    protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId,
            Map<String, HoodieRecord<T>> keyToNewRecords, Optional<HoodieDataFile> dataFileToBeMerged) {
        return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged);
    }

    public Iterator<List<WriteStatus>> handleInsert(String commitTime, Iterator<HoodieRecord<T>> recordItr)
            throws Exception {
        return new CopyOnWriteLazyInsertIterable<>(recordItr, config, commitTime, this);
    }

    public Iterator<List<WriteStatus>> handleInsert(String commitTime, String partitionPath, String fileId,
            Iterator<HoodieRecord<T>> recordItr) {
        HoodieCreateHandle createHandle = new HoodieCreateHandle(config, commitTime, this, partitionPath, fileId,
                recordItr);
        createHandle.write();
        return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator();
    }

    @SuppressWarnings("unchecked")
    @Override
    public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition,
            Iterator recordItr, Partitioner partitioner) {
        UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner;
        BucketInfo binfo = upsertPartitioner.getBucketInfo(partition);
        BucketType btype = binfo.bucketType;
        try {
            if (btype.equals(BucketType.INSERT)) {
                return handleInsert(commitTime, recordItr);
            } else if (btype.equals(BucketType.UPDATE)) {
                return handleUpdate(commitTime, binfo.fileLoc, recordItr);
            } else {
                throw new HoodieUpsertException("Unknown bucketType " + btype + " for partition :" + partition);
            }
        } catch (Throwable t) {
            String msg = "Error upserting bucketType " + btype + " for partition :" + partition;
            logger.error(msg, t);
            throw new HoodieUpsertException(msg, t);
        }
    }

    @Override
    public Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition,
            Iterator recordItr, Partitioner partitioner) {
        return handleUpsertPartition(commitTime, partition, recordItr, partitioner);
    }

    /**
     * Performs cleaning of partition paths according to cleaning policy and returns the number of
     * files cleaned. Handles skews in partitions to clean by making files to clean as the unit of
     * task distribution.
     *
     * @throws IllegalArgumentException if unknown cleaning policy is provided
     */
    @Override
    public List<HoodieCleanStat> clean(JavaSparkContext jsc) {
        try {
            FileSystem fs = getMetaClient().getFs();
            List<String> partitionsToClean = FSUtils.getAllPartitionPaths(fs, getMetaClient().getBasePath(),
                    config.shouldAssumeDatePartitioning());
            logger.info(
                    "Partitions to clean up : " + partitionsToClean + ", with policy " + config.getCleanerPolicy());
            if (partitionsToClean.isEmpty()) {
                logger.info("Nothing to clean here mom. It is already clean");
                return Collections.emptyList();
            }
            return cleanPartitionPaths(partitionsToClean, jsc);
        } catch (IOException e) {
            throw new HoodieIOException("Failed to clean up after commit", e);
        }
    }

    /**
     * Common method used for cleaning out parquet files under a partition path during rollback of a
     * set of commits
     */
    protected Map<FileStatus, Boolean> deleteCleanedFiles(String partitionPath, List<String> commits)
            throws IOException {
        Map<FileStatus, Boolean> results = Maps.newHashMap();
        // PathFilter to get all parquet files and log files that need to be deleted
        PathFilter filter = (path) -> {
            if (path.toString().contains(".parquet")) {
                String fileCommitTime = FSUtils.getCommitTime(path.getName());
                return commits.contains(fileCommitTime);
            }
            return false;
        };
        deleteCleanedFiles(results, partitionPath, filter);
        return results;
    }

    /**
     * Common method used for cleaning out parquet files under a partition path during rollback of a
     * set of commits
     */
    protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String partitionPath,
            PathFilter filter) throws IOException {
        logger.info("Cleaning path " + partitionPath);
        FileSystem fs = getMetaClient().getFs();
        FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter);
        for (FileStatus file : toBeDeleted) {
            boolean success = fs.delete(file.getPath(), false);
            results.put(file, success);
            logger.info("Delete file " + file.getPath() + "\t" + success);
        }
        return results;
    }

    @Override
    public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits) throws IOException {
        String actionType = metaClient.getCommitActionType();
        HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
        List<String> inflights = this.getInflightCommitTimeline().getInstants().map(HoodieInstant::getTimestamp)
                .collect(Collectors.toList());

        // Atomically unpublish all the commits
        commits.stream().filter(s -> !inflights.contains(s)).map(s -> new HoodieInstant(false, actionType, s))
                .forEach(activeTimeline::revertToInflight);
        logger.info("Unpublished " + commits);

        // delete all the data files for all these commits
        logger.info("Clean out all parquet files generated for commits: " + commits);
        List<HoodieRollbackStat> stats = jsc
                .parallelize(FSUtils.getAllPartitionPaths(metaClient.getFs(), getMetaClient().getBasePath(),
                        config.shouldAssumeDatePartitioning()))
                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
                    // Scan all partitions files with this commit time
                    Map<FileStatus, Boolean> results = deleteCleanedFiles(partitionPath, commits);
                    return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
                            .withDeletedFileResults(results).build();
                }).collect();

        // clean temporary data files
        cleanTemporaryDataFiles(jsc);

        // Remove the rolled back inflight commits
        commits.stream().map(s -> new HoodieInstant(true, actionType, s)).forEach(activeTimeline::deleteInflight);
        logger.info("Deleted inflight commits " + commits);
        return stats;
    }

    /**
     * Finalize the written data files
     *
     * @param writeStatuses List of WriteStatus
     * @return number of files finalized
     */
    @Override
    @SuppressWarnings("unchecked")
    public Optional<Integer> finalizeWrite(JavaSparkContext jsc, List writeStatuses) {
        if (!config.shouldUseTempFolderForCopyOnWrite()) {
            return Optional.empty();
        }

        // This is to rename each data file from temporary path to its final location
        List<Tuple2<String, Boolean>> results = jsc.parallelize(writeStatuses, config.getFinalizeWriteParallelism())
                .map(writeStatus -> {
                    Tuple2<String, HoodieWriteStat> writeStatTuple2 = (Tuple2<String, HoodieWriteStat>) writeStatus;
                    HoodieWriteStat writeStat = writeStatTuple2._2();
                    final FileSystem fs = getMetaClient().getFs();
                    final Path finalPath = new Path(config.getBasePath(), writeStat.getPath());

                    if (writeStat.getTempPath() != null) {
                        final Path tempPath = new Path(config.getBasePath(), writeStat.getTempPath());
                        boolean success;
                        try {
                            logger.info("Renaming temporary file: " + tempPath + " to " + finalPath);
                            success = fs.rename(tempPath, finalPath);
                        } catch (IOException e) {
                            throw new HoodieIOException("Failed to rename file: " + tempPath + " to " + finalPath);
                        }

                        if (!success) {
                            throw new HoodieIOException("Failed to rename file: " + tempPath + " to " + finalPath);
                        }
                    }

                    return new Tuple2<>(writeStat.getPath(), true);
                }).collect();

        // clean temporary data files
        cleanTemporaryDataFiles(jsc);

        return Optional.of(results.size());
    }

    /**
     * Clean temporary data files that are produced from previous failed commit or retried spark
     * stages.
     */
    private void cleanTemporaryDataFiles(JavaSparkContext jsc) {
        if (!config.shouldUseTempFolderForCopyOnWrite()) {
            return;
        }

        final FileSystem fs = getMetaClient().getFs();
        final Path temporaryFolder = new Path(config.getBasePath(), HoodieTableMetaClient.TEMPFOLDER_NAME);
        try {
            if (!fs.exists(temporaryFolder)) {
                logger.info("Temporary folder does not exist: " + temporaryFolder);
                return;
            }
            List<FileStatus> fileStatusesList = Arrays.asList(fs.listStatus(temporaryFolder));
            List<Tuple2<String, Boolean>> results = jsc
                    .parallelize(fileStatusesList, config.getFinalizeWriteParallelism()).map(fileStatus -> {
                        FileSystem fs1 = getMetaClient().getFs();
                        boolean success = fs1.delete(fileStatus.getPath(), false);
                        logger.info("Deleting file in temporary folder" + fileStatus.getPath() + "\t" + success);
                        return new Tuple2<>(fileStatus.getPath().toString(), success);
                    }).collect();

            for (Tuple2<String, Boolean> result : results) {
                if (!result._2()) {
                    logger.info("Failed to delete file: " + result._1());
                    throw new HoodieIOException("Failed to delete file in temporary folder: " + result._1());
                }
            }
        } catch (IOException e) {
            throw new HoodieIOException("Failed to clean data files in temporary folder: " + temporaryFolder);
        }
    }

    private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean, JavaSparkContext jsc) {
        int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
        logger.info("Using cleanerParallelism: " + cleanerParallelism);
        List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
                .parallelize(partitionsToClean, cleanerParallelism)
                .flatMapToPair(getFilesToDeleteFunc(this, config)).repartition(cleanerParallelism) // repartition to remove skews
                .mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey(
                        // merge partition level clean stats below
                        (Function2<PartitionCleanStat, PartitionCleanStat, PartitionCleanStat>) (e1, e2) -> e1
                                .merge(e2))
                .collect();

        Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats.stream()
                .collect(Collectors.toMap(e -> e._1(), e -> e._2()));

        HoodieCleanHelper cleaner = new HoodieCleanHelper(this, config);
        // Return PartitionCleanStat for each partition passed.
        return partitionsToClean.stream().map(partitionPath -> {
            PartitionCleanStat partitionCleanStat = (partitionCleanStatsMap.containsKey(partitionPath))
                    ? partitionCleanStatsMap.get(partitionPath)
                    : new PartitionCleanStat(partitionPath);
            return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy())
                    .withPartitionPath(partitionPath)
                    .withEarliestCommitRetained(cleaner.getEarliestCommitToRetain())
                    .withDeletePathPattern(partitionCleanStat.deletePathPatterns)
                    .withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
                    .withFailedDeletes(partitionCleanStat.failedDeleteFiles).build();
        }).collect(Collectors.toList());
    }

    enum BucketType {
        UPDATE, INSERT
    }

    /**
     * Consumer that dequeues records from queue and sends to Merge Handle
     */
    private static class UpdateHandler extends BoundedInMemoryQueueConsumer<GenericRecord, Void> {

        private final HoodieMergeHandle upsertHandle;

        private UpdateHandler(HoodieMergeHandle upsertHandle) {
            this.upsertHandle = upsertHandle;
        }

        @Override
        protected void consumeOneRecord(GenericRecord record) {
            upsertHandle.write(record);
        }

        @Override
        protected void finish() {
        }

        @Override
        protected Void getResult() {
            return null;
        }
    }

    private static class PartitionCleanStat implements Serializable {

        private final String partitionPath;
        private final List<String> deletePathPatterns = new ArrayList<>();
        private final List<String> successDeleteFiles = new ArrayList<>();
        private final List<String> failedDeleteFiles = new ArrayList<>();

        private PartitionCleanStat(String partitionPath) {
            this.partitionPath = partitionPath;
        }

        private void addDeletedFileResult(String deletePathStr, Boolean deletedFileResult) {
            if (deletedFileResult) {
                successDeleteFiles.add(deletePathStr);
            } else {
                failedDeleteFiles.add(deletePathStr);
            }
        }

        private void addDeleteFilePatterns(String deletePathStr) {
            deletePathPatterns.add(deletePathStr);
        }

        private PartitionCleanStat merge(PartitionCleanStat other) {
            if (!this.partitionPath.equals(other.partitionPath)) {
                throw new RuntimeException(String.format("partitionPath is not a match: (%s, %s)", partitionPath,
                        other.partitionPath));
            }
            successDeleteFiles.addAll(other.successDeleteFiles);
            deletePathPatterns.addAll(other.deletePathPatterns);
            failedDeleteFiles.addAll(other.failedDeleteFiles);
            return this;
        }
    }

    /**
     * Helper class for a small file's location and its actual size on disk
     */
    class SmallFile implements Serializable {

        HoodieRecordLocation location;
        long sizeBytes;

        @Override
        public String toString() {
            final StringBuilder sb = new StringBuilder("SmallFile {");
            sb.append("location=").append(location).append(", ");
            sb.append("sizeBytes=").append(sizeBytes);
            sb.append('}');
            return sb.toString();
        }
    }

    /**
     * Helper class for an insert bucket along with the weight [0.0, 0.1] that defines the amount of
     * incoming inserts that should be allocated to the bucket
     */
    class InsertBucket implements Serializable {

        int bucketNumber;
        // fraction of total inserts, that should go into this bucket
        double weight;

        @Override
        public String toString() {
            final StringBuilder sb = new StringBuilder("WorkloadStat {");
            sb.append("bucketNumber=").append(bucketNumber).append(", ");
            sb.append("weight=").append(weight);
            sb.append('}');
            return sb.toString();
        }
    }

    /**
     * Helper class for a bucket's type (INSERT and UPDATE) and its file location
     */
    class BucketInfo implements Serializable {

        BucketType bucketType;
        String fileLoc;

        @Override
        public String toString() {
            final StringBuilder sb = new StringBuilder("BucketInfo {");
            sb.append("bucketType=").append(bucketType).append(", ");
            sb.append("fileLoc=").append(fileLoc);
            sb.append('}');
            return sb.toString();
        }
    }

    /**
     * Packs incoming records to be upserted, into buckets (1 bucket = 1 RDD partition)
     */
    class UpsertPartitioner extends Partitioner {

        /**
         * List of all small files to be corrected
         */
        List<SmallFile> smallFiles = new ArrayList<SmallFile>();
        /**
         * Total number of RDD partitions, is determined by total buckets we want to pack the incoming
         * workload into
         */
        private int totalBuckets = 0;
        /**
         * Stat for the current workload. Helps in determining total inserts, upserts etc.
         */
        private WorkloadStat globalStat;
        /**
         * Helps decide which bucket an incoming update should go to.
         */
        private HashMap<String, Integer> updateLocationToBucket;
        /**
         * Helps us pack inserts into 1 or more buckets depending on number of incoming records.
         */
        private HashMap<String, List<InsertBucket>> partitionPathToInsertBuckets;
        /**
         * Remembers what type each bucket is for later.
         */
        private HashMap<Integer, BucketInfo> bucketInfoMap;

        UpsertPartitioner(WorkloadProfile profile) {
            updateLocationToBucket = new HashMap<>();
            partitionPathToInsertBuckets = new HashMap<>();
            bucketInfoMap = new HashMap<>();
            globalStat = profile.getGlobalStat();

            assignUpdates(profile);
            assignInserts(profile);

            logger.info("Total Buckets :" + totalBuckets + ", " + "buckets info => " + bucketInfoMap + ", \n"
                    + "Partition to insert buckets => " + partitionPathToInsertBuckets + ", \n"
                    + "UpdateLocations mapped to buckets =>" + updateLocationToBucket);
        }

        private void assignUpdates(WorkloadProfile profile) {
            // each update location gets a partition
            WorkloadStat gStat = profile.getGlobalStat();
            for (Map.Entry<String, Pair<String, Long>> updateLocEntry : gStat.getUpdateLocationToCount()
                    .entrySet()) {
                addUpdateBucket(updateLocEntry.getKey());
            }
        }

        private int addUpdateBucket(String fileLoc) {
            int bucket = totalBuckets;
            updateLocationToBucket.put(fileLoc, bucket);
            BucketInfo bucketInfo = new BucketInfo();
            bucketInfo.bucketType = BucketType.UPDATE;
            bucketInfo.fileLoc = fileLoc;
            bucketInfoMap.put(totalBuckets, bucketInfo);
            totalBuckets++;
            return bucket;
        }

        private void assignInserts(WorkloadProfile profile) {
            // for new inserts, compute buckets depending on how many records we have for each partition
            Set<String> partitionPaths = profile.getPartitionPaths();
            long averageRecordSize = averageBytesPerRecord();
            logger.info("AvgRecordSize => " + averageRecordSize);
            for (String partitionPath : partitionPaths) {
                WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
                if (pStat.getNumInserts() > 0) {

                    List<SmallFile> smallFiles = getSmallFiles(partitionPath);
                    logger.info("For partitionPath : " + partitionPath + " Small Files => " + smallFiles);

                    long totalUnassignedInserts = pStat.getNumInserts();
                    List<Integer> bucketNumbers = new ArrayList<>();
                    List<Long> recordsPerBucket = new ArrayList<>();

                    // first try packing this into one of the smallFiles
                    for (SmallFile smallFile : smallFiles) {
                        long recordsToAppend = Math.min(
                                (config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageRecordSize,
                                totalUnassignedInserts);
                        if (recordsToAppend > 0 && totalUnassignedInserts > 0) {
                            // create a new bucket or re-use an existing bucket
                            int bucket;
                            if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
                                bucket = updateLocationToBucket.get(smallFile.location.getFileId());
                                logger.info("Assigning " + recordsToAppend + " inserts to existing update bucket "
                                        + bucket);
                            } else {
                                bucket = addUpdateBucket(smallFile.location.getFileId());
                                logger.info(
                                        "Assigning " + recordsToAppend + " inserts to new update bucket " + bucket);
                            }
                            bucketNumbers.add(bucket);
                            recordsPerBucket.add(recordsToAppend);
                            totalUnassignedInserts -= recordsToAppend;
                        }
                    }

                    // if we have anything more, create new insert buckets, like normal
                    if (totalUnassignedInserts > 0) {
                        long insertRecordsPerBucket = config.getCopyOnWriteInsertSplitSize();
                        if (config.shouldAutoTuneInsertSplits()) {
                            insertRecordsPerBucket = config.getParquetMaxFileSize() / averageRecordSize;
                        }

                        int insertBuckets = (int) Math.max(totalUnassignedInserts / insertRecordsPerBucket, 1L);
                        logger.info("After small file assignment: unassignedInserts => " + totalUnassignedInserts
                                + ", totalInsertBuckets => " + insertBuckets + ", recordsPerBucket => "
                                + insertRecordsPerBucket);
                        for (int b = 0; b < insertBuckets; b++) {
                            bucketNumbers.add(totalBuckets);
                            recordsPerBucket.add(totalUnassignedInserts / insertBuckets);
                            BucketInfo bucketInfo = new BucketInfo();
                            bucketInfo.bucketType = BucketType.INSERT;
                            bucketInfoMap.put(totalBuckets, bucketInfo);
                            totalBuckets++;
                        }
                    }

                    // Go over all such buckets, and assign weights as per amount of incoming inserts.
                    List<InsertBucket> insertBuckets = new ArrayList<>();
                    for (int i = 0; i < bucketNumbers.size(); i++) {
                        InsertBucket bkt = new InsertBucket();
                        bkt.bucketNumber = bucketNumbers.get(i);
                        bkt.weight = (1.0 * recordsPerBucket.get(i)) / pStat.getNumInserts();
                        insertBuckets.add(bkt);
                    }
                    logger.info(
                            "Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
                    partitionPathToInsertBuckets.put(partitionPath, insertBuckets);
                }
            }
        }

        /**
         * Returns a list  of small files in the given partition path
         */
        protected List<SmallFile> getSmallFiles(String partitionPath) {

            // smallFiles only for partitionPath
            List<SmallFile> smallFileLocations = new ArrayList<>();

            HoodieTimeline commitTimeline = getCompletedCommitTimeline();

            if (!commitTimeline.empty()) { // if we have some commits
                HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
                List<HoodieDataFile> allFiles = getROFileSystemView()
                        .getLatestDataFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp())
                        .collect(Collectors.toList());

                for (HoodieDataFile file : allFiles) {
                    if (file.getFileSize() < config.getParquetSmallFileLimit()) {
                        String filename = file.getFileName();
                        SmallFile sf = new SmallFile();
                        sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename),
                                FSUtils.getFileId(filename));
                        sf.sizeBytes = file.getFileSize();
                        smallFileLocations.add(sf);
                        // Update the global small files list
                        smallFiles.add(sf);
                    }
                }
            }

            return smallFileLocations;
        }

        /**
         * Obtains the average record size based on records written during last commit. Used for
         * estimating how many records pack into one file.
         */
        private long averageBytesPerRecord() {
            long avgSize = 0L;
            HoodieTimeline commitTimeline = metaClient.getActiveTimeline().getCommitTimeline()
                    .filterCompletedInstants();
            try {
                if (!commitTimeline.empty()) {
                    HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
                    HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
                            .fromBytes(commitTimeline.getInstantDetails(latestCommitTime).get());
                    avgSize = (long) Math.ceil((1.0 * commitMetadata.fetchTotalBytesWritten())
                            / commitMetadata.fetchTotalRecordsWritten());
                }
            } catch (Throwable t) {
                // make this fail safe.
                logger.error("Error trying to compute average bytes/record ", t);
            }
            return avgSize <= 0L ? config.getCopyOnWriteRecordSizeEstimate() : avgSize;
        }

        public BucketInfo getBucketInfo(int bucketNumber) {
            return bucketInfoMap.get(bucketNumber);
        }

        public List<InsertBucket> getInsertBuckets(String partitionPath) {
            return partitionPathToInsertBuckets.get(partitionPath);
        }

        @Override
        public int numPartitions() {
            return totalBuckets;
        }

        @Override
        public int getPartition(Object key) {
            Tuple2<HoodieKey, Option<HoodieRecordLocation>> keyLocation = (Tuple2<HoodieKey, Option<HoodieRecordLocation>>) key;
            if (keyLocation._2().isDefined()) {
                HoodieRecordLocation location = keyLocation._2().get();
                return updateLocationToBucket.get(location.getFileId());
            } else {
                List<InsertBucket> targetBuckets = partitionPathToInsertBuckets
                        .get(keyLocation._1().getPartitionPath());
                // pick the target bucket to use based on the weights.
                double totalWeight = 0.0;
                final long totalInserts = Math.max(1, globalStat.getNumInserts());
                final long hashOfKey = Hashing.md5()
                        .hashString(keyLocation._1().getRecordKey(), StandardCharsets.UTF_8).asLong();
                final double r = 1.0 * Math.floorMod(hashOfKey, totalInserts) / totalInserts;
                for (InsertBucket insertBucket : targetBuckets) {
                    totalWeight += insertBucket.weight;
                    if (r <= totalWeight) {
                        return insertBucket.bucketNumber;
                    }
                }
                // return first one, by default
                return targetBuckets.get(0).bucketNumber;
            }
        }
    }
}