com.moz.fiji.schema.tools.ToolUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.moz.fiji.schema.tools.ToolUtils.java

Source

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.moz.fiji.schema.tools;

import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.net.URLCodec;
import org.apache.hadoop.hbase.util.Bytes;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonParser.Feature;
import org.codehaus.jackson.map.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.moz.fiji.annotations.ApiAudience;
import com.moz.fiji.annotations.ApiStability;
import com.moz.fiji.schema.EntityId;
import com.moz.fiji.schema.EntityIdFactory;
import com.moz.fiji.schema.FijiCell;
import com.moz.fiji.schema.FijiColumnName;
import com.moz.fiji.schema.FijiDataRequest;
import com.moz.fiji.schema.FijiDataRequestBuilder;
import com.moz.fiji.schema.FijiRowData;
import com.moz.fiji.schema.avro.RowKeyFormat;
import com.moz.fiji.schema.avro.RowKeyFormat2;
import com.moz.fiji.schema.avro.SchemaType;
import com.moz.fiji.schema.layout.FijiTableLayout;
import com.moz.fiji.schema.layout.FijiTableLayout.LocalityGroupLayout.FamilyLayout;
import com.moz.fiji.schema.layout.FijiTableLayout.LocalityGroupLayout.FamilyLayout.ColumnLayout;
import com.moz.fiji.schema.util.ByteArrayFormatter;

/**
 * Utility class providing static methods used by command-line tools.
 */
@ApiAudience.Framework
@ApiStability.Evolving
public final class ToolUtils {

    private static final Logger LOG = LoggerFactory.getLogger(ToolUtils.class);

    /** Disable this constructor. */
    private ToolUtils() {
    }

    /** Prefix to specify an HBase row key from the command-line. */
    public static final String HBASE_ROW_KEY_SPEC_PREFIX = "hbase=";

    /** Optional prefix to specify a Fiji row key from the command-line. */
    public static final String FIJI_ROW_KEY_SPEC_PREFIX = "fiji=";

    /**
     * Parses a command-line flag specifying an entity ID.
     *
     * <ul>
     * <li> HBase row key specifications must be prefixed with <code>"hbase=..."</code></li>
     * <li> Fiji row key specifications may be explicitly prefixed with <code>"fiji=..."</code> if
     *     necessary. The prefix is not always necessary.</li>
     * </ul>
     *
     * @param entityFlag Command-line flag specifying an entity ID.
     * @param layout Layout of the table describing the entity ID format.
     * @return the entity ID as specified in the flags.
     * @throws IOException on I/O error.
     */
    public static EntityId createEntityIdFromUserInputs(String entityFlag, FijiTableLayout layout)
            throws IOException {
        Preconditions.checkNotNull(entityFlag);

        final EntityIdFactory factory = EntityIdFactory.getFactory(layout);

        if (entityFlag.startsWith(HBASE_ROW_KEY_SPEC_PREFIX)) {
            // HBase row key specification
            final String hbaseSpec = entityFlag.substring(HBASE_ROW_KEY_SPEC_PREFIX.length());
            final byte[] hbaseRowKey = parseBytesFlag(hbaseSpec);
            return factory.getEntityIdFromHBaseRowKey(hbaseRowKey);

        } else {
            // Fiji row key specification
            final String fijiSpec = entityFlag.startsWith(FIJI_ROW_KEY_SPEC_PREFIX)
                    ? entityFlag.substring(FIJI_ROW_KEY_SPEC_PREFIX.length())
                    : entityFlag;

            return parseFijiRowKey(fijiSpec, factory, layout);
        }
    }

    /**
     * Parses a Fiji row key specification from a command-line flag.
     *
     * @param rowKeySpec Fiji row key specification.
     * @param factory Factory for entity IDs.
     * @param layout Layout of the table to parse the entity ID of.
     * @return the parsed entity ID.
     * @throws IOException on I/O error.
     */
    public static EntityId parseFijiRowKey(String rowKeySpec, EntityIdFactory factory, FijiTableLayout layout)
            throws IOException {

        final Object keysFormat = layout.getDesc().getKeysFormat();
        if (keysFormat instanceof RowKeyFormat) {
            // Former, deprecated, unformatted row key specification:
            return factory.getEntityId(rowKeySpec);

        } else if (keysFormat instanceof RowKeyFormat2) {
            final RowKeyFormat2 format = (RowKeyFormat2) keysFormat;
            switch (format.getEncoding()) {
            case RAW:
                return factory.getEntityIdFromHBaseRowKey(parseBytesFlag(rowKeySpec));
            case FORMATTED:
                return parseJsonFormattedKeySpec(rowKeySpec, format, factory);
            default:
                throw new RuntimeException(
                        String.format("Invalid layout for table '%s' with unsupported keys format: '%s'.",
                                layout.getName(), format));
            }

        } else {
            throw new RuntimeException(String.format("Unknown row key format: '%s'.", keysFormat));
        }
    }

    /**
     * Converts a JSON string or integer node into a Java object (String, Integer or Long).
     *
     * @param node JSON string or integer numeric node.
     * @return the JSON value, as a String, an Integer or a Long instance.
     * @throws IOException if the JSON node is neither a string nor an integer value.
     */
    private static Object getJsonStringOrIntValue(JsonNode node) throws IOException {
        if (node.isInt() || node.isLong()) {
            return node.getNumberValue();
        } else if (node.isTextual()) {
            return node.getTextValue();
        } else if (node.isNull()) {
            return null;
        } else {
            throw new IOException(
                    String.format("Invalid JSON value: '%s', expecting string, int, long, or null.", node));
        }
    }

    /**
     * Parses a JSON formatted row key specification.
     *
     * @param json JSON specification of the formatted row key.
     *     Either a JSON ordered array, a JSON map (object), or an immediate JSON primitive.
     * @param format Row key format specification from the table layout.
     * @param factory Entity ID factory.
     * @return the parsed entity ID.
     * @throws IOException on I/O error.
     */
    public static EntityId parseJsonFormattedKeySpec(String json, RowKeyFormat2 format, EntityIdFactory factory)
            throws IOException {
        try {
            final ObjectMapper mapper = new ObjectMapper();
            final JsonParser parser = new JsonFactory().createJsonParser(json).enable(Feature.ALLOW_COMMENTS)
                    .enable(Feature.ALLOW_SINGLE_QUOTES).enable(Feature.ALLOW_UNQUOTED_FIELD_NAMES);
            final JsonNode node = mapper.readTree(parser);
            if (node.isArray()) {
                final Object[] components = new Object[node.size()];
                for (int i = 0; i < node.size(); ++i) {
                    components[i] = getJsonStringOrIntValue(node.get(i));
                }
                return factory.getEntityId(components);
            } else if (node.isObject()) {
                // TODO: Implement map row key specifications:
                throw new RuntimeException("Map row key specifications are not implemented yet.");
            } else {
                return factory.getEntityId(getJsonStringOrIntValue(node));
            }

        } catch (JsonParseException jpe) {
            throw new IOException(jpe);
        }
    }

    /** Prefix to specify a sequence of bytes in hexadecimal, as in: "00dead88beefaa". */
    public static final String BYTES_SPEC_PREFIX_HEX = "hex:";

    /** Prefix to specify a sequence of bytes as a URL, as in: "URL%20encoded". */
    public static final String BYTES_SPEC_PREFIX_URL = "url:";

    /** Optional prefix to specify a sequence of bytes in UTF-8, as in: "utf-8 \x00 encoded". */
    public static final String BYTES_SPEC_PREFIX_UTF8 = "utf8:";

    // Support other encoding, eg base64 encoding?

    /**
     * Parses a command-line flag specifying a byte array.
     *
     * Valid specifications are:
     * <ul>
     *   <li> UTF-8 encoded strings, as in "utf8:encoded \x00 text".</li>
     *   <li> Hexadecimal sequence, with "hex:00dead88beefaa".</li>
     *   <li> URL encoded strings, as in "url:this%20is%20a%20URL".</li>
     * </ul>
     *
     * UTF-8 is the default, hence the "utf8:" prefix is optional unless there is an ambiguity with
     * other prefixes.
     *
     * @param flag Command-line flag specification for a byte array.
     * @return the decoded byte array.
     * @throws IOException on I/O error.
     */
    public static byte[] parseBytesFlag(String flag) throws IOException {
        if (flag.startsWith(BYTES_SPEC_PREFIX_HEX)) {
            // Hexadecimal encoded byte array:
            return ByteArrayFormatter.parseHex(flag.substring(BYTES_SPEC_PREFIX_HEX.length()));

        } else if (flag.startsWith(BYTES_SPEC_PREFIX_URL)) {
            // URL encoded byte array:
            try {
                return URLCodec.decodeUrl(Bytes.toBytes(flag));
            } catch (DecoderException de) {
                throw new IOException(de);
            }

        } else {
            // UTF-8 encoded and escaped byte array:
            final String spec = flag.startsWith(BYTES_SPEC_PREFIX_UTF8)
                    ? flag.substring(BYTES_SPEC_PREFIX_UTF8.length())
                    : flag;
            return Bytes.toBytes(spec);
        }
    }

    /**
     * Prints cell data from the <code>row</code> for each column specified on the
     * <code>request</code>.
     *
     * @param row The row to read from.
     * @param mapTypeFamilies The map type families to print.
     * @param groupTypeColumns The group type columns to print.
     * @param printStream The stream to print to.
     * @throws IOException if there is an error retrieving data from the FijiRowData.
     */
    public static void printRow(FijiRowData row, Map<FamilyLayout, List<String>> mapTypeFamilies,
            Map<FamilyLayout, List<ColumnLayout>> groupTypeColumns, PrintStream printStream) throws IOException {

        // Unpack and print result for the map type families.
        for (Entry<FamilyLayout, List<String>> entry : mapTypeFamilies.entrySet()) {
            final FamilyLayout family = entry.getKey();
            if (family.getDesc().getMapSchema().getType() == SchemaType.COUNTER) {

                // If this map family of counters has no qualifiers, print entire family.
                if (entry.getValue().isEmpty()) {
                    for (String key : row.getQualifiers(family.getName())) {
                        FijiCell<Long> counter = row.getMostRecentCell(family.getName(), key);
                        if (null != counter) {
                            printCell(row.getEntityId(), counter, printStream);
                        }
                    }
                    // If this map family of counters has been qualified, print only the given columns.
                } else {
                    for (String key : entry.getValue()) {
                        FijiCell<Long> counter = row.getMostRecentCell(family.getName(), key);
                        if (null != counter) {
                            printCell(row.getEntityId(), counter, printStream);
                        }
                    }
                }
            } else {
                // If this map family of non-counters has no qualifiers, print entire family.
                if (entry.getValue().isEmpty()) {
                    NavigableMap<String, NavigableMap<Long, Object>> keyTimeseriesMap = row
                            .getValues(family.getName());
                    for (String key : keyTimeseriesMap.keySet()) {
                        for (Entry<Long, Object> timestampedCell : keyTimeseriesMap.get(key).entrySet()) {
                            long timestamp = timestampedCell.getKey();
                            printCell(row.getEntityId(), timestamp, family.getName(), key,
                                    timestampedCell.getValue(), printStream);
                        }
                    }
                    // If this map family of non-counters has been qualified, print only the given columns.
                } else {
                    for (String key : entry.getValue()) {
                        NavigableMap<Long, Object> timeseriesMap = row.getValues(family.getName(), key);
                        for (Entry<Long, Object> timestampedCell : timeseriesMap.entrySet()) {
                            long timestamp = timestampedCell.getKey();
                            printCell(row.getEntityId(), timestamp, family.getName(), key,
                                    timestampedCell.getValue(), printStream);
                        }
                    }
                }
            }
        }

        // Unpack and print result for the group type families.
        for (Entry<FamilyLayout, List<ColumnLayout>> entry : groupTypeColumns.entrySet()) {
            String familyName = entry.getKey().getName();
            for (ColumnLayout column : entry.getValue()) {
                final FijiColumnName colName = FijiColumnName.create(familyName, column.getName());
                if (column.getDesc().getColumnSchema().getType() == SchemaType.COUNTER) {
                    final FijiCell<Long> counter = row.getMostRecentCell(colName.getFamily(),
                            colName.getQualifier());
                    if (null != counter) {
                        printCell(row.getEntityId(), counter, printStream);
                    }
                } else {
                    for (Entry<Long, Object> timestampedCell : row
                            .getValues(colName.getFamily(), colName.getQualifier()).entrySet()) {
                        long timestamp = timestampedCell.getKey();
                        printCell(row.getEntityId(), timestamp, colName.getFamily(), colName.getQualifier(),
                                timestampedCell.getValue(), printStream);
                    }
                }
            }
        }
        printStream.println("");
    }

    /**
     * Prints the contents of a single fiji cell to the printstream.
     *
     * @param entityId The entity id.
     * @param timestamp This timestamp of a FijiCell.
     * @param family The family of a FijiCell.
     * @param qualifier The qualifier of a FijiCell.
     * @param cellData The contents of a FijiCell.
     * @param printStream The stream to print to.
     */
    private static void printCell(EntityId entityId, Long timestamp, String family, String qualifier,
            Object cellData, PrintStream printStream) {
        printStream.printf("entity-id=%s [%d] %s:%s%n                                 %s%n",
                formatEntityId(entityId), timestamp, family, qualifier, cellData);
    }

    /**
     * Prints the contents of a single fiji cell to the printstream.
     *
     * @param entityId The entity id.
     * @param cell The FijiCell.
     * @param printStream The stream to print to.
     */
    private static void printCell(EntityId entityId, FijiCell<?> cell, PrintStream printStream) {
        printStream.printf("entity-id=%s [%d] %s:%s%n                                 %s%n",
                formatEntityId(entityId), cell.getTimestamp(), cell.getColumn().getFamily(),
                cell.getColumn().getQualifier(), cell.getData());
    }

    /**
     * Returns the list of map-type families specified by <code>rawColumns</code>.
     * If <code>rawColumns</code> is null, then all map-type families are returned.
     *
     * @param rawColumnNames The raw columns supplied by the user.
     * @param layout The FijiTableLayout.
     * @return A list of map type families specified by the raw columns.
     */
    public static Map<FamilyLayout, List<String>> getMapTypeFamilies(List<FijiColumnName> rawColumnNames,
            FijiTableLayout layout) {
        final Map<FamilyLayout, List<String>> familyMap = Maps.newHashMap();
        if (rawColumnNames.isEmpty()) {
            for (FamilyLayout family : layout.getFamilies()) {
                if (family.isMapType()) {
                    familyMap.put(family, new ArrayList<String>());
                }
            }
        } else {
            for (FijiColumnName rawColumn : rawColumnNames) {
                final FamilyLayout family = layout.getFamilyMap().get(rawColumn.getFamily());
                if (null == family) {
                    throw new RuntimeException(String.format("No family '%s' in table '%s'.", rawColumn.getFamily(),
                            layout.getName()));
                }
                if (family.isMapType()) {
                    addColumn(family, rawColumn.getQualifier(), familyMap);
                }
            }
        }
        return familyMap;
    }

    /**
     * Returns the list of group-type columns specified by <code>rawColumns</code>.
     * If <code>rawColumns</code> is null, then all columns in all group-type families are returned.
     * If a raw column specifies a group-type family, but no qualifier, then each column in that
     * family is returned.
     *
     * @param rawColumnNames The raw columns supplied by the user.
     * @param layout The FijiTableLayout.
     * @return The fully qualified columns specified by the raw columns.
     */
    public static Map<FamilyLayout, List<ColumnLayout>> getGroupTypeColumns(List<FijiColumnName> rawColumnNames,
            FijiTableLayout layout) {
        final Map<FamilyLayout, List<ColumnLayout>> familyMap = Maps.newHashMap();
        if (rawColumnNames.isEmpty()) {
            for (FamilyLayout family : layout.getFamilies()) {
                if (family.isGroupType()) {
                    familyMap.put(family, Lists.newArrayList(family.getColumns()));
                }
            }
        } else {
            for (FijiColumnName rawColumn : rawColumnNames) {
                final FamilyLayout family = layout.getFamilyMap().get(rawColumn.getFamily());
                if (null == family) {
                    throw new RuntimeException(String.format("No family '%s' in table '%s'.", rawColumn.getFamily(),
                            layout.getName()));
                }
                if (family.isGroupType()) {
                    // We'll include it.  Is it fully qualified?
                    if (!rawColumn.isFullyQualified()) {
                        // User specified a group-type family, but no qualifier.  Include all qualifiers.
                        for (ColumnLayout column : family.getColumns()) {
                            addColumn(family, column, familyMap);
                        }
                    } else {
                        final ColumnLayout column = family.getColumnMap().get(rawColumn.getQualifier());
                        if (null == column) {
                            throw new RuntimeException(
                                    String.format("No column '%s' in table '%s'.", rawColumn, layout.getName()));
                        }
                        addColumn(family, column, familyMap);
                    }
                }
            }
        }
        return familyMap;
    }

    /**
     * Adds a column to the list of columns mapped from <code>family</code>.
     *
     * @param family The family as a key.
     * @param column The column to add to the list of columns for this family.
     * @param familyColumnMap The map between families and lists of columns.
     */
    private static void addColumn(FamilyLayout family, ColumnLayout column,
            Map<FamilyLayout, List<ColumnLayout>> familyColumnMap) {
        if (!familyColumnMap.containsKey(family)) {
            familyColumnMap.put(family, new ArrayList<ColumnLayout>());
        }
        familyColumnMap.get(family).add(column);
    }

    /**
     * Adds a column to the list of columns mapped from <code>family</code>.
     *
     * @param mapFamily The map family as a key.
     * @param qualifier The qualifier to add to the list of qualifiers for this map family.
     * @param familyQualifierMap The map between map families and lists of qualifiers.
     */
    private static void addColumn(FamilyLayout mapFamily, String qualifier,
            Map<FamilyLayout, List<String>> familyQualifierMap) {
        if (!familyQualifierMap.containsKey(mapFamily)) {
            familyQualifierMap.put(mapFamily, new ArrayList<String>());
        }
        if (null != qualifier) {
            familyQualifierMap.get(mapFamily).add(qualifier);
        }
    }

    /**
     * Returns a FijiDataRequest for the specified columns.  If columns is null,
     * returns a request for all columns.
     *
     * @param mapTypeFamilies The list of map type families to include.
     * @param groupTypeColumns The family:qualifier map of group type columns to include.
     * @param maxVersions The max versions to include.
     * @param minTimestamp The min timestamp.
     * @param maxTimestamp The max timestamp.
     * @return The FijiDataRequest.
     */
    public static FijiDataRequest getDataRequest(Map<FamilyLayout, List<String>> mapTypeFamilies,
            Map<FamilyLayout, List<ColumnLayout>> groupTypeColumns, int maxVersions, long minTimestamp,
            long maxTimestamp) {
        final FijiDataRequestBuilder builder = FijiDataRequest.builder().withTimeRange(minTimestamp, maxTimestamp);

        final FijiDataRequestBuilder.ColumnsDef colBuilder = builder.newColumnsDef().withMaxVersions(maxVersions);

        for (Entry<FamilyLayout, List<String>> entry : mapTypeFamilies.entrySet()) {
            String familyName = entry.getKey().getName();
            // If the map family is without qualifiers, add entire family.
            if (entry.getValue().isEmpty()) {
                LOG.debug("Adding family to data request: " + familyName);
                colBuilder.addFamily(familyName);
            } else {
                // If the map family is with qualifiers, add only the columns of interest.
                for (String qualifier : entry.getValue()) {
                    LOG.debug("Adding column to data request: " + familyName + ":" + qualifier);
                    colBuilder.add(familyName, qualifier);
                }
            }
        }

        for (Entry<FamilyLayout, List<ColumnLayout>> entry : groupTypeColumns.entrySet()) {
            String familyName = entry.getKey().getName();
            for (ColumnLayout column : entry.getValue()) {
                LOG.debug("Adding column to data request: " + column.getName());
                colBuilder.add(familyName, column.getName());
            }
        }
        return builder.build();
    }

    /**
     * Formats an entity ID for a command-line user.
     *
     * @deprecated use {@link EntityId#toShellString()} instead.
     * @param eid Entity ID to format.
     * @return the formatted entity ID as a String to print on the console.
     */
    @Deprecated
    public static String formatEntityId(EntityId eid) {
        return EntityIdFactory.formatEntityId(eid);
    }
}