com.uber.hoodie.cli.commands.StatsCommand.java Source code

Java tutorial

Introduction

Here is the source code for com.uber.hoodie.cli.commands.StatsCommand.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.cli.commands;

import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
import com.uber.hoodie.cli.HoodieCLI;
import com.uber.hoodie.cli.HoodiePrintHelper;
import com.uber.hoodie.cli.TableHeader;
import com.uber.hoodie.common.model.HoodieCommitMetadata;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.NumericUtils;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;

@Component
public class StatsCommand implements CommandMarker {

    private static final int MAX_FILES = 1000000;

    @CliAvailabilityIndicator({ "stats wa" })
    public boolean isWriteAmpAvailable() {
        return HoodieCLI.tableMetadata != null;
    }

    @CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many "
            + "records were actually written")
    public String writeAmplificationStats(
            @CliOption(key = {
                    "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
            @CliOption(key = {
                    "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
            @CliOption(key = {
                    "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
            @CliOption(key = {
                    "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
            throws IOException {

        long totalRecordsUpserted = 0;
        long totalRecordsWritten = 0;

        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
        HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();

        List<Comparable[]> rows = new ArrayList<>();
        int i = 0;
        DecimalFormat df = new DecimalFormat("#.00");
        for (HoodieInstant commitTime : timeline.getInstants().collect(Collectors.toList())) {
            String waf = "0";
            HoodieCommitMetadata commit = HoodieCommitMetadata
                    .fromBytes(activeTimeline.getInstantDetails(commitTime).get(), HoodieCommitMetadata.class);
            if (commit.fetchTotalUpdateRecordsWritten() > 0) {
                waf = df.format(
                        (float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
            }
            rows.add(new Comparable[] { commitTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(),
                    commit.fetchTotalRecordsWritten(), waf });
            totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
            totalRecordsWritten += commit.fetchTotalRecordsWritten();
        }
        String waf = "0";
        if (totalRecordsUpserted > 0) {
            waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
        }
        rows.add(new Comparable[] { "Total", totalRecordsUpserted, totalRecordsWritten, waf });

        TableHeader header = new TableHeader().addTableHeaderField("CommitTime")
                .addTableHeaderField("Total Upserted").addTableHeaderField("Total Written")
                .addTableHeaderField("Write Amplifiation Factor");
        return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
    }

    private Comparable[] printFileSizeHistogram(String commitTime, Snapshot s) {
        return new Comparable[] { commitTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(),
                s.get95thPercentile(), s.getMax(), s.size(), s.getStdDev() };
    }

    @CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
    public String fileSizeStats(@CliOption(key = {
            "partitionPath" }, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") final String globRegex,
            @CliOption(key = {
                    "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
            @CliOption(key = {
                    "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
            @CliOption(key = {
                    "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
            @CliOption(key = {
                    "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
            throws IOException {

        FileSystem fs = HoodieCLI.fs;
        String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
        FileStatus[] statuses = fs.globStatus(new Path(globPath));

        // max, min, #small files < 10MB, 50th, avg, 95th
        Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
        HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
        for (FileStatus fileStatus : statuses) {
            String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
            long sz = fileStatus.getLen();
            if (!commitHistoMap.containsKey(commitTime)) {
                commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
            }
            commitHistoMap.get(commitTime).update(sz);
            globalHistogram.update(sz);
        }

        List<Comparable[]> rows = new ArrayList<>();
        int ind = 0;
        for (String commitTime : commitHistoMap.keySet()) {
            Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
            rows.add(printFileSizeHistogram(commitTime, s));
        }
        Snapshot s = globalHistogram.getSnapshot();
        rows.add(printFileSizeHistogram("ALL", s));

        Function<Object, String> converterFunction = entry -> NumericUtils
                .humanReadableByteCount((Double.valueOf(entry.toString())));
        Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
        fieldNameToConverterMap.put("Min", converterFunction);
        fieldNameToConverterMap.put("10th", converterFunction);
        fieldNameToConverterMap.put("50th", converterFunction);
        fieldNameToConverterMap.put("avg", converterFunction);
        fieldNameToConverterMap.put("95th", converterFunction);
        fieldNameToConverterMap.put("Max", converterFunction);
        fieldNameToConverterMap.put("StdDev", converterFunction);

        TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Min")
                .addTableHeaderField("10th").addTableHeaderField("50th").addTableHeaderField("avg")
                .addTableHeaderField("95th").addTableHeaderField("Max").addTableHeaderField("NumFiles")
                .addTableHeaderField("StdDev");
        return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly,
                rows);
    }
}