org.apache.parquet.tools.command.MetadataUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.parquet.tools.command.MetadataUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.tools.command;

import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.FileMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
import org.apache.parquet.tools.util.PrettyPrintWriter;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

class MetadataUtils {
    static void showDetails(PrettyPrintWriter out, ParquetMetadata meta, boolean showOriginalTypes) {
        showDetails(out, meta.getFileMetaData(), showOriginalTypes);

        long i = 1;
        for (BlockMetaData bmeta : meta.getBlocks()) {
            out.println();
            showDetails(out, bmeta, i++);
        }
    }

    static void showDetails(PrettyPrintWriter out, FileMetaData meta, boolean showOriginalTypes) {
        out.format("creator: %s%n", meta.getCreatedBy());

        Map<String, String> extra = meta.getKeyValueMetaData();
        if (extra != null) {
            for (Map.Entry<String, String> entry : meta.getKeyValueMetaData().entrySet()) {
                out.print("extra: ");
                out.incrementTabLevel();
                out.format("%s = %s%n", entry.getKey(), entry.getValue());
                out.decrementTabLevel();
            }
        }

        out.println();
        out.format("file schema: %s%n", meta.getSchema().getName());
        out.rule('-');
        showDetails(out, meta.getSchema(), showOriginalTypes);
    }

    private static void showDetails(PrettyPrintWriter out, BlockMetaData meta, Long num) {
        long rows = meta.getRowCount();
        long tbs = meta.getTotalByteSize();
        long offset = meta.getStartingPos();

        out.format("row group%s: RC:%d TS:%d OFFSET:%d%n", (num == null ? "" : " " + num), rows, tbs, offset);
        out.rule('-');
        showDetails(out, meta.getColumns());
    }

    static void showDetails(PrettyPrintWriter out, List<ColumnChunkMetaData> ccmeta) {
        Map<String, Object> chunks = new LinkedHashMap<String, Object>();
        for (ColumnChunkMetaData cmeta : ccmeta) {
            String[] path = cmeta.getPath().toArray();

            Map<String, Object> current = chunks;
            for (int i = 0; i < path.length - 1; ++i) {
                String next = path[i];
                if (!current.containsKey(next)) {
                    current.put(next, new LinkedHashMap<String, Object>());
                }

                current = (Map<String, Object>) current.get(next);
            }

            current.put(path[path.length - 1], cmeta);
        }

        showColumnChunkDetails(out, chunks, 0);
    }

    private static void showColumnChunkDetails(PrettyPrintWriter out, Map<String, Object> current, int depth) {
        for (Map.Entry<String, Object> entry : current.entrySet()) {
            String name = Strings.repeat(".", depth) + entry.getKey();
            Object value = entry.getValue();

            if (value instanceof Map) {
                out.println(name + ": ");
                showColumnChunkDetails(out, (Map<String, Object>) value, depth + 1);
            } else {
                out.print(name + ": ");
                showDetails(out, (ColumnChunkMetaData) value, false);
            }
        }
    }

    private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
        long doff = meta.getDictionaryPageOffset();
        long foff = meta.getFirstDataPageOffset();
        long tsize = meta.getTotalSize();
        long usize = meta.getTotalUncompressedSize();
        long count = meta.getValueCount();
        double ratio = usize / (double) tsize;
        String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());

        if (name) {
            String path = Joiner.on('.').skipNulls().join(meta.getPath());
            out.format("%s: ", path);
        }

        out.format(" %s", meta.getType());
        out.format(" %s", meta.getCodec());
        out.format(" DO:%d", doff);
        out.format(" FPO:%d", foff);
        out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
        out.format(" VC:%d", count);
        if (!encodings.isEmpty())
            out.format(" ENC:%s", encodings);
        Statistics<?> stats = meta.getStatistics();
        if (stats != null) {
            out.format(" ST:[%s]", stats);
        } else {
            out.format(" ST:[none]");
        }
        out.println();
    }

    static void showDetails(PrettyPrintWriter out, MessageType type, boolean showOriginalTypes) {
        List<String> cpath = new ArrayList<String>();
        for (Type ftype : type.getFields()) {
            showDetails(out, ftype, 0, type, cpath, showOriginalTypes);
        }
    }

    private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container,
            List<String> cpath, boolean showOriginalTypes) {
        String name = Strings.repeat(".", depth) + type.getName();
        Repetition rep = type.getRepetition();
        int fcount = type.getFieldCount();
        out.format("%s: %s F:%d%n", name, rep, fcount);

        cpath.add(type.getName());
        for (Type ftype : type.getFields()) {
            showDetails(out, ftype, depth + 1, container, cpath, showOriginalTypes);
        }
        cpath.remove(cpath.size() - 1);
    }

    private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container,
            List<String> cpath, boolean showOriginalTypes) {
        String name = Strings.repeat(".", depth) + type.getName();
        Repetition rep = type.getRepetition();
        PrimitiveTypeName ptype = type.getPrimitiveTypeName();

        out.format("%s: %s %s", name, rep, ptype);
        if (showOriginalTypes) {
            OriginalType otype;
            try {
                otype = type.getOriginalType();
            } catch (Exception e) {
                otype = null;
            }
            if (otype != null)
                out.format(" O:%s", otype);
        } else {
            LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
            if (ltype != null)
                out.format(" L:%s", ltype);
        }

        if (container != null) {
            cpath.add(type.getName());
            String[] paths = cpath.toArray(new String[cpath.size()]);
            cpath.remove(cpath.size() - 1);

            ColumnDescriptor desc = container.getColumnDescription(paths);

            int defl = desc.getMaxDefinitionLevel();
            int repl = desc.getMaxRepetitionLevel();
            out.format(" R:%d D:%d", repl, defl);
        }
        out.println();
    }

    private static void showDetails(PrettyPrintWriter out, Type type, int depth, MessageType container,
            List<String> cpath, boolean showOriginalTypes) {
        if (type instanceof GroupType) {
            showDetails(out, type.asGroupType(), depth, container, cpath, showOriginalTypes);
            return;
        } else if (type instanceof PrimitiveType) {
            showDetails(out, type.asPrimitiveType(), depth, container, cpath, showOriginalTypes);
            return;
        }
    }
}