Java tutorial
/** * (c) Copyright 2014 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.moz.fiji.schema.util; import java.io.IOException; import com.google.common.base.Preconditions; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.hbase.util.Bytes; import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonParser.Feature; import org.codehaus.jackson.map.ObjectMapper; import com.moz.fiji.annotations.ApiAudience; import com.moz.fiji.annotations.ApiStability; import com.moz.fiji.schema.EntityId; import com.moz.fiji.schema.EntityIdFactory; import com.moz.fiji.schema.avro.ComponentType; import com.moz.fiji.schema.avro.HashSpec; import com.moz.fiji.schema.avro.RowKeyEncoding; import com.moz.fiji.schema.avro.RowKeyFormat; import com.moz.fiji.schema.avro.RowKeyFormat2; import com.moz.fiji.schema.layout.FijiTableLayout; /** * Container class for entity ids which can be backed as strings * (suitable for raw, hashed, hash-prefixed, and materialization suppressed keys) * xor as a list of components * (suitable for formatted entity ids without materialization unsuppresed). */ @ApiAudience.Framework @ApiStability.Evolving public final class JsonEntityIdParser { private static final ObjectMapper BASIC_MAPPER = new ObjectMapper(); /** * Prefixes for specifying hex row keys. */ public static final String HBASE_ROW_KEY_PREFIX = "hbase="; public static final String HBASE_HEX_ROW_KEY_PREFIX = "hbase_hex="; /** * Back eid as either string or list of components. */ private final String mStringEntityId; private final Object[] mComponents; /** * This field is only relevant w.r.t. wildcarded lists of components. */ private final boolean mIsWildcarded; /** The table layout associated with this parser. **/ private FijiTableLayout mLayout; /** * Private constructor for JsonEntityIdParser parametrized by a String. * Validate fields as necessary. * * @param stringEntityId string representing the entity id. * @param layout is the table's layout. */ private JsonEntityIdParser(final String stringEntityId, final FijiTableLayout layout) { // stringEntityId may not be null. Preconditions.checkNotNull(stringEntityId, "Incoming entity_id can't be null."); // StringEntityId must be prefixed by "hbase=" or "hbase_hex=". Preconditions.checkArgument( stringEntityId.startsWith(HBASE_ROW_KEY_PREFIX) || stringEntityId.startsWith(HBASE_HEX_ROW_KEY_PREFIX), "Incoming entity_id must start with hbase= or hbase_hex="); mComponents = null; mStringEntityId = stringEntityId; mIsWildcarded = false; mLayout = layout; } /** * Private constructor for JsonEntityIdParser parametrized by an array of components. * Validate fields as necessary. * * @param components of the formatted entity id. * @param wildCarded if one of the components is a wildcard. * @param layout is the table's layout. */ private JsonEntityIdParser(final boolean wildCarded, final FijiTableLayout layout, final Object... components) { Preconditions.checkNotNull(components, "Entity ID components can't be null."); // Wildcarded flag is only applicable for components array. Preconditions.checkArgument(components.length > 0, "Must have at least one component."); mComponents = components; mStringEntityId = null; mIsWildcarded = wildCarded; mLayout = layout; } /** * Create JsonEntityIdParser from a string input, which can be a json string or a raw hbase * rowKey. * This method is used for entity ids specified from the URL. * * @param entityId string of the row. * @param layout of the table in which the entity id belongs. * If null, then long components may not be recognized. * @return a properly constructed JsonEntityIdParser. * @throws IOException if JsonEntityIdParser can not be properly constructed. */ public static JsonEntityIdParser create(final String entityId, final FijiTableLayout layout) throws IOException { if (entityId.startsWith(HBASE_ROW_KEY_PREFIX) || entityId.startsWith(HBASE_HEX_ROW_KEY_PREFIX)) { return new JsonEntityIdParser(entityId, layout); } else { final JsonParser parser = new JsonFactory().createJsonParser(entityId).enable(Feature.ALLOW_COMMENTS) .enable(Feature.ALLOW_SINGLE_QUOTES).enable(Feature.ALLOW_UNQUOTED_FIELD_NAMES); final JsonNode node = BASIC_MAPPER.readTree(parser); return create(node, layout); } } /** * Create JsonEntityIdParser from entity id and layout. * * @param entityId of the row. * @param layout of the table containing the row. * @return a properly constructed JsonEntityIdParser. * @throws IOException if JsonEntityIdParser can not be properly constructed. */ public static JsonEntityIdParser create(final EntityId entityId, final FijiTableLayout layout) throws IOException { final Object keysFormat = layout.getDesc().getKeysFormat(); final RowKeyEncoding encoding = getEncoding(keysFormat); // Either we are dealing with a hash_prefix entity_id in which case it's not wildcarded // and it's a single component entity_id. If we are dealing with a formatted entity_id // then either the materialization of the rowkey was suppressed (in which case it's as good as // a hashed rowkey) or it's a normal componentized rowkey. switch (encoding) { case HASH_PREFIX: return new JsonEntityIdParser(false, layout, Bytes.toString((byte[]) entityId.getComponentByIndex(0))); case FORMATTED: final HashSpec hashSpec = ((RowKeyFormat2) keysFormat).getSalt(); if (hashSpec.getSuppressKeyMaterialization()) { return new JsonEntityIdParser( String.format("hbase=%s", Bytes.toStringBinary(entityId.getHBaseRowKey())), layout); } else { return new JsonEntityIdParser(false, layout, entityId.getComponents()); } default: // Treat all other formats as a raw/hashed key. return new JsonEntityIdParser( String.format("hbase=%s", Bytes.toStringBinary(entityId.getHBaseRowKey())), layout); } } /** * Gets row key encoding of a row key format. * * @param keysFormat row key format. * @return row key encoding. * @throws IOException if row key format is unrecognized. */ private static RowKeyEncoding getEncoding(final Object keysFormat) throws IOException { if (keysFormat instanceof RowKeyFormat) { return ((RowKeyFormat) keysFormat).getEncoding(); } else if (keysFormat instanceof RowKeyFormat2) { return ((RowKeyFormat2) keysFormat).getEncoding(); } else { throw new IOException(String.format("Unrecognized row key format: %s", keysFormat.getClass())); } } /** * Gets the RowKeyFormat2 of the provided layout, if it exists. Otherwise, null. * * @param layout of the table to find the RowKeyFormat2. * @return the RowKeyFormat2, null if the layout has RowKeyFormat1. * @throws IOException if the keys format can not be ascertained. */ private static RowKeyFormat2 getRKF2(final FijiTableLayout layout) throws IOException { if (null != layout && RowKeyEncoding.FORMATTED == getEncoding(layout.getDesc().getKeysFormat())) { return (RowKeyFormat2) layout.getDesc().getKeysFormat(); } else { return null; } } /** * Create JsonEntityIdParser from a JSON node. * * @param node is the JSON representation of the formatted entity_id. * @param layout of the table in which the entity id belongs. * If null, then long components may not be recognized. * @return a properly constructed JsonEntityIdParser. * @throws IOException if JsonEntityIdParser can not be properly constructed. */ public static JsonEntityIdParser create(final JsonNode node, final FijiTableLayout layout) throws IOException { RowKeyFormat2 format = getRKF2(layout); if (node.isArray()) { final Object[] components = new Object[node.size()]; boolean wildCarded = false; for (int i = 0; i < node.size(); i++) { final Object component = getNodeValue(node.get(i)); if (component.equals(WildcardSingleton.INSTANCE)) { wildCarded = true; components[i] = null; } else if (null != format && ComponentType.LONG == format.getComponents().get(i).getType()) { components[i] = ((Number) component).longValue(); } else { components[i] = component; } } return new JsonEntityIdParser(wildCarded, layout, components); } else { // Disallow non-arrays. throw new IllegalArgumentException( "Provide components wrapped as a JSON array or provide the row key."); } } /** * Gets the array of components. * * @return array of components. */ public Object[] getComponents() { return mComponents; } /** * Are any of the components wildcarded... * * @return true iff at least one component is a wildcard (indicated by a null). */ public boolean isWildcarded() { return mIsWildcarded; } /** * Gets the json node eid (which can be null if the eid was backed as a string). * * @return json node of the eid. */ public JsonNode getJsonEntityId() { return BASIC_MAPPER.valueToTree(mComponents); } /** * Gets the string representation of the eid. * * @return string representation of eid. */ public String getStringEntityId() { return mStringEntityId; } /** * If the eid backed by a json or a string? * * @return true iff eid is backed by json node. */ public boolean hasComponents() { return mComponents != null; } /** * Construct eid from a entity id string. * Formatted entity ids mustn't have wildcards in order to resolve. * * @return the eid. * @throws IOException if construction of eid fails due to incorrect user input. */ public EntityId getEntityId() throws IOException { if (this.hasComponents()) { if (this.isWildcarded()) { throw new IllegalArgumentException( "Entity id must be fully specified for resolution, i.e. without wildcards."); } return EntityIdFactory.getFactory(mLayout).getEntityId(mComponents); } else { final EntityIdFactory factory = EntityIdFactory.getFactory(mLayout); return factory.getEntityIdFromHBaseRowKey(parseBytes(getStringEntityId())); } } /** * Gets byte array from string entity id given in "hbase=" or "hbase_hex" format. * * @param stringEntityId representing the row to acquire byte array for. * @return byte array of entity id. * @throws IOException if the ASCII-encoded hex was improperly formed. */ private static byte[] parseBytes(final String stringEntityId) throws IOException { if (stringEntityId.startsWith(HBASE_ROW_KEY_PREFIX)) { final String rowKeySubstring = stringEntityId.substring(HBASE_ROW_KEY_PREFIX.length()); return Bytes.toBytesBinary(rowKeySubstring); } else if (stringEntityId.startsWith(HBASE_HEX_ROW_KEY_PREFIX)) { final String rowKeySubstring = stringEntityId.substring(HBASE_HEX_ROW_KEY_PREFIX.length()); try { return Hex.decodeHex(rowKeySubstring.toCharArray()); } catch (DecoderException de) { // Re-wrap decoder exception as IOException. throw new IOException(de.getMessage()); } } else { throw new IllegalArgumentException("Passed string must be prefixed by hbase= or hbase_hex=."); } } /** * Converts a JSON string, integer, or wildcard (empty array) * node into a Java object (String, Integer, Long, WILDCARD, or null). * * @param node JSON string, integer numeric, or wildcard (empty array) node. * @return the JSON value, as a String, an Integer, a Long, a WILDCARD, or null. * @throws JsonParseException if the JSON node is not String, Integer, Long, WILDCARD, or null. */ private static Object getNodeValue(JsonNode node) throws JsonParseException { // TODO: Write tests to distinguish integer and long components. if (node.isInt()) { return node.asInt(); } else if (node.isLong()) { return node.asLong(); } else if (node.isTextual()) { return node.asText(); } else if (node.isArray() && node.size() == 0) { // An empty array token indicates a wildcard. return WildcardSingleton.INSTANCE; } else if (node.isNull()) { return null; } else { throw new JsonParseException(String.format( "Invalid JSON value: '%s', expecting string, int, long, null, or wildcard [].", node), null); } } /** * Singleton object to use to represent a wildcard. */ private static enum WildcardSingleton { INSTANCE; } @Override public String toString() { if (this.hasComponents()) { return this.getJsonEntityId().toString(); } else { return this.getStringEntityId(); } } }