com.uber.hoodie.cli.commands.FileSystemViewCommand.java Source code

Java tutorial

Introduction

Here is the source code for com.uber.hoodie.cli.commands.FileSystemViewCommand.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.cli.commands;

import com.uber.hoodie.cli.HoodieCLI;
import com.uber.hoodie.cli.HoodiePrintHelper;
import com.uber.hoodie.cli.TableHeader;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
import com.uber.hoodie.common.util.NumericUtils;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;

@Component
public class FileSystemViewCommand implements CommandMarker {

    @CliCommand(value = "show fsview all", help = "Show entire file-system view")
    public String showAllFileSlices(@CliOption(key = {
            "pathRegex" }, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") String globRegex,
            @CliOption(key = {
                    "readOptimizedOnly" }, help = "Only display read-optimized view", unspecifiedDefaultValue = "false") boolean readOptimizedOnly,
            @CliOption(key = {
                    "maxInstant" }, help = "File-Slices upto this instant are displayed", unspecifiedDefaultValue = "") String maxInstant,
            @CliOption(key = {
                    "includeMax" }, help = "Include Max Instant", unspecifiedDefaultValue = "false") boolean includeMaxInstant,
            @CliOption(key = {
                    "includeInflight" }, help = "Include Inflight Instants", unspecifiedDefaultValue = "false") boolean includeInflight,
            @CliOption(key = {
                    "excludeCompaction" }, help = "Exclude compaction Instants", unspecifiedDefaultValue = "false") boolean excludeCompaction,
            @CliOption(key = {
                    "limit" }, help = "Limit rows to be displayed", unspecifiedDefaultValue = "-1") Integer limit,
            @CliOption(key = {
                    "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
            @CliOption(key = {
                    "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
            @CliOption(key = {
                    "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
            throws IOException {

        HoodieTableFileSystemView fsView = buildFileSystemView(globRegex, maxInstant, readOptimizedOnly,
                includeMaxInstant, includeInflight, excludeCompaction);
        List<Comparable[]> rows = new ArrayList<>();
        fsView.getAllFileGroups().forEach(fg -> fg.getAllFileSlices().forEach(fs -> {
            int idx = 0;
            // For ReadOptimized Views, do not display any delta-file related columns
            Comparable[] row = new Comparable[readOptimizedOnly ? 5 : 8];
            row[idx++] = fg.getPartitionPath();
            row[idx++] = fg.getFileGroupId().getFileId();
            row[idx++] = fs.getBaseInstantTime();
            row[idx++] = fs.getDataFile().isPresent() ? fs.getDataFile().get().getPath() : "";
            row[idx++] = fs.getDataFile().isPresent() ? fs.getDataFile().get().getFileSize() : -1;
            if (!readOptimizedOnly) {
                row[idx++] = fs.getLogFiles().count();
                row[idx++] = fs.getLogFiles().mapToLong(lf -> lf.getFileSize()).sum();
                row[idx++] = fs.getLogFiles().collect(Collectors.toList()).toString();
            }
            rows.add(row);
        }));
        Function<Object, String> converterFunction = entry -> NumericUtils
                .humanReadableByteCount((Double.valueOf(entry.toString())));
        Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
        fieldNameToConverterMap.put("Total Delta File Size", converterFunction);
        fieldNameToConverterMap.put("Data-File Size", converterFunction);

        TableHeader header = new TableHeader().addTableHeaderField("Partition").addTableHeaderField("FileId")
                .addTableHeaderField("Base-Instant").addTableHeaderField("Data-File")
                .addTableHeaderField("Data-File Size");
        if (!readOptimizedOnly) {
            header = header.addTableHeaderField("Num Delta Files").addTableHeaderField("Total Delta File Size")
                    .addTableHeaderField("Delta Files");
        }
        return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly,
                rows);
    }

    @CliCommand(value = "show fsview latest", help = "Show latest file-system view")
    public String showLatestFileSlices(
            @CliOption(key = {
                    "partitionPath" }, help = "A valid paritition path", mandatory = true) String partition,
            @CliOption(key = {
                    "readOptimizedOnly" }, help = "Only display read-optimized view", unspecifiedDefaultValue = "false") boolean readOptimizedOnly,
            @CliOption(key = {
                    "maxInstant" }, help = "File-Slices upto this instant are displayed", unspecifiedDefaultValue = "") String maxInstant,
            @CliOption(key = {
                    "merge" }, help = "Merge File Slices due to pending compaction", unspecifiedDefaultValue = "true") final boolean merge,
            @CliOption(key = {
                    "includeMax" }, help = "Include Max Instant", unspecifiedDefaultValue = "false") boolean includeMaxInstant,
            @CliOption(key = {
                    "includeInflight" }, help = "Include Inflight Instants", unspecifiedDefaultValue = "false") boolean includeInflight,
            @CliOption(key = {
                    "excludeCompaction" }, help = "Exclude compaction Instants", unspecifiedDefaultValue = "false") boolean excludeCompaction,
            @CliOption(key = {
                    "limit" }, help = "Limit rows to be displayed", unspecifiedDefaultValue = "-1") Integer limit,
            @CliOption(key = {
                    "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
            @CliOption(key = {
                    "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
            @CliOption(key = {
                    "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
            throws IOException {

        HoodieTableFileSystemView fsView = buildFileSystemView(partition, maxInstant, readOptimizedOnly,
                includeMaxInstant, includeInflight, excludeCompaction);
        List<Comparable[]> rows = new ArrayList<>();

        final Stream<FileSlice> fileSliceStream;
        if (!merge) {
            fileSliceStream = fsView.getLatestFileSlices(partition);
        } else {
            if (maxInstant.isEmpty()) {
                maxInstant = HoodieCLI.tableMetadata.getActiveTimeline().filterCompletedAndCompactionInstants()
                        .lastInstant().get().getTimestamp();
            }
            fileSliceStream = fsView.getLatestMergedFileSlicesBeforeOrOn(partition, maxInstant);
        }

        fileSliceStream.forEach(fs -> {
            int idx = 0;
            Comparable[] row = new Comparable[readOptimizedOnly ? 5 : 13];
            row[idx++] = partition;
            row[idx++] = fs.getFileId();
            row[idx++] = fs.getBaseInstantTime();
            row[idx++] = fs.getDataFile().isPresent() ? fs.getDataFile().get().getPath() : "";

            long dataFileSize = fs.getDataFile().isPresent() ? fs.getDataFile().get().getFileSize() : -1;
            row[idx++] = dataFileSize;

            if (!readOptimizedOnly) {
                row[idx++] = fs.getLogFiles().count();
                row[idx++] = fs.getLogFiles().mapToLong(lf -> lf.getFileSize()).sum();
                long logFilesScheduledForCompactionTotalSize = fs.getLogFiles()
                        .filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
                        .mapToLong(lf -> lf.getFileSize()).sum();
                row[idx++] = logFilesScheduledForCompactionTotalSize;

                long logFilesUnscheduledTotalSize = fs.getLogFiles()
                        .filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
                        .mapToLong(lf -> lf.getFileSize()).sum();
                row[idx++] = logFilesUnscheduledTotalSize;

                double logSelectedForCompactionToBaseRatio = dataFileSize > 0
                        ? logFilesScheduledForCompactionTotalSize / (dataFileSize * 1.0)
                        : -1;
                row[idx++] = logSelectedForCompactionToBaseRatio;
                double logUnscheduledToBaseRatio = dataFileSize > 0
                        ? logFilesUnscheduledTotalSize / (dataFileSize * 1.0)
                        : -1;
                row[idx++] = logUnscheduledToBaseRatio;

                row[idx++] = fs.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
                        .collect(Collectors.toList()).toString();
                row[idx++] = fs.getLogFiles().filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
                        .collect(Collectors.toList()).toString();
            }
            rows.add(row);
        });

        Function<Object, String> converterFunction = entry -> NumericUtils
                .humanReadableByteCount((Double.valueOf(entry.toString())));
        Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
        fieldNameToConverterMap.put("Data-File Size", converterFunction);
        if (!readOptimizedOnly) {
            fieldNameToConverterMap.put("Total Delta Size", converterFunction);
            fieldNameToConverterMap.put("Delta Size - compaction scheduled", converterFunction);
            fieldNameToConverterMap.put("Delta Size - compaction unscheduled", converterFunction);
        }

        TableHeader header = new TableHeader().addTableHeaderField("Partition").addTableHeaderField("FileId")
                .addTableHeaderField("Base-Instant").addTableHeaderField("Data-File")
                .addTableHeaderField("Data-File Size");

        if (!readOptimizedOnly) {
            header = header.addTableHeaderField("Num Delta Files").addTableHeaderField("Total Delta Size")
                    .addTableHeaderField("Delta Size - compaction scheduled")
                    .addTableHeaderField("Delta Size - compaction unscheduled")
                    .addTableHeaderField("Delta To Base Ratio - compaction scheduled")
                    .addTableHeaderField("Delta To Base Ratio - compaction unscheduled")
                    .addTableHeaderField("Delta Files - compaction scheduled")
                    .addTableHeaderField("Delta Files - compaction unscheduled");
        }
        return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly,
                rows);
    }

    /**
     * Build File System View
     * @param globRegex Path Regex
     * @param maxInstant  Max Instants to be used for displaying file-instants
     * @param readOptimizedOnly Include only read optimized view
     * @param includeMaxInstant Include Max instant
     * @param includeInflight   Include inflight instants
     * @param excludeCompaction Exclude Compaction instants
     * @return
     * @throws IOException
     */
    private HoodieTableFileSystemView buildFileSystemView(String globRegex, String maxInstant,
            boolean readOptimizedOnly, boolean includeMaxInstant, boolean includeInflight,
            boolean excludeCompaction) throws IOException {
        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(HoodieCLI.tableMetadata.getHadoopConf(),
                HoodieCLI.tableMetadata.getBasePath(), true);
        FileSystem fs = HoodieCLI.fs;
        String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
        FileStatus[] statuses = fs.globStatus(new Path(globPath));
        Stream<HoodieInstant> instantsStream = null;

        HoodieTimeline timeline = null;
        if (readOptimizedOnly) {
            timeline = metaClient.getActiveTimeline().getCommitTimeline();
        } else if (excludeCompaction) {
            timeline = metaClient.getActiveTimeline().getCommitsTimeline();
        } else {
            timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline();
        }

        if (!includeInflight) {
            timeline = timeline.filterCompletedInstants();
        }

        instantsStream = timeline.getInstants();

        if (!maxInstant.isEmpty()) {
            final BiPredicate<String, String> predicate;
            if (includeMaxInstant) {
                predicate = HoodieTimeline.GREATER_OR_EQUAL;
            } else {
                predicate = HoodieTimeline.GREATER;
            }
            instantsStream = instantsStream.filter(is -> predicate.test(maxInstant, is.getTimestamp()));
        }

        HoodieTimeline filteredTimeline = new HoodieDefaultTimeline(instantsStream,
                (Function<HoodieInstant, Optional<byte[]>> & Serializable) metaClient
                        .getActiveTimeline()::getInstantDetails);
        return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses);
    }
}