mvm.rya.indexing.KeyParts.java Source code

Java tutorial

Introduction

Here is the source code for mvm.rya.indexing.KeyParts.java

Source

package mvm.rya.indexing;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import mvm.rya.indexing.accumulo.Md5Hash;
import mvm.rya.indexing.accumulo.StatementSerializer;

import org.apache.accumulo.core.data.Value;
import org.apache.commons.codec.binary.StringUtils;
import org.apache.hadoop.io.Text;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.impl.ContextStatementImpl;
import org.openrdf.model.impl.StatementImpl;
import org.openrdf.model.impl.URIImpl;

/**
 * Store and format the various temporal index keys.
 * Row Keys are in these two forms, where [x] denotes x is optional:
 *       rowkey = contraintPrefix datetime
 *       rowkey = datetime 0x/00 uniquesuffix
 *       contraintPrefix = 0x/00 hash([subject][predicate])
 *       uniquesuffix = some bytes to make it unique, like hash(statement).
 * 
 * The instance is in one of two modes depending on the constructor:
 *       storage mode  -- construct with a triple statement, get an iterator of keys to store.  
 *       query mode     -- construct with a statement and query constraints, get the key prefix to search.
 * 
 * this has the flavor of an immutable object
 * This is independent of the underlying database engine
 * 
 * @author David.Lotts
 *
 */
public class KeyParts implements Iterable<KeyParts> {
    private static final String CQ_S_P_AT = "spo";
    private static final String CQ_P_AT = "po";
    private static final String CQ_S_AT = "so";
    private static final String CQ_O_AT = "o";
    public static final String CQ_BEGIN = "begin";
    public static final String CQ_END = "end";

    public static final byte[] HASH_PREFIX = new byte[] { 0 };
    public static final byte[] HASH_PREFIX_FOLLOWING = new byte[] { 1 };

    public final Text cf;
    public final Text cq;
    public final Text constraintPrefix; // subject and/or predicate
    final Text storeKey; // subject and/or predicate
    final private TemporalInstant instant;
    final private Statement statement;
    final private boolean queryMode;

    KeyParts(Text constraintPrefix, TemporalInstant instant, String cf, String cq) {
        this.queryMode = true; // query mode
        this.storeKey = null;
        this.statement = null;
        this.constraintPrefix = constraintPrefix;
        this.instant = instant;
        this.cf = new Text(cf);
        this.cq = new Text(cq);
    }

    /**
     * this is the value to index.
     * @return
     */
    public Value getValue() {
        assert statement != null;
        return new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement)));
    }

    public KeyParts(Statement statement, TemporalInstant instant2) {
        this.queryMode = false; // store mode
        this.storeKey = null;
        this.constraintPrefix = null;
        this.statement = statement;
        this.instant = instant2;
        this.cf = null;
        this.cq = null;
    }

    private KeyParts(Text keyText, Text cf, Text cq, Statement statement) {
        this.queryMode = false; // store mode
        this.constraintPrefix = null;
        this.statement = statement;
        this.instant = null;
        this.storeKey = keyText;
        this.cf = cf;
        this.cq = cq;
    }

    @Override
    public Iterator<KeyParts> iterator() {
        final String[] strategies = new String[] { CQ_O_AT, CQ_S_P_AT, CQ_P_AT, CQ_S_AT }; // CQ_END?
        assert !queryMode : "iterator for queryMode is not immplemented";
        if (queryMode)
            return null;

        // if (!queryMode)
        return new Iterator<KeyParts>() {
            int nextStrategy = 0;

            @Override
            public boolean hasNext() {
                return nextStrategy < strategies.length;
            }

            @Override
            public KeyParts next() {
                assert (statement != null);
                Text keyText = new Text();
                // increment++ the next strategy AFTER getting the value
                switch (nextStrategy++) {
                case 0: // index o+hash(p+s)
                    assert (CQ_O_AT.equals(strategies[0]));
                    keyText = new Text(instant.getAsKeyBytes());
                    KeyParts.appendUniqueness(statement, keyText);
                    return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)),
                            new Text(CQ_O_AT), statement);
                case 1:// index hash(s+p)+o
                    assert (CQ_S_P_AT.equals(strategies[1]));
                    KeyParts.appendSubjectPredicate(statement, keyText);
                    KeyParts.appendInstant(instant, keyText);
                    // appendUniqueness -- Not needed since it is already unique.
                    return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)),
                            new Text(CQ_S_P_AT), statement);
                case 2: // index hash(p)+o
                    assert (CQ_P_AT.equals(strategies[2]));
                    KeyParts.appendPredicate(statement, keyText);
                    KeyParts.appendInstant(instant, keyText);
                    KeyParts.appendUniqueness(statement, keyText);
                    return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)),
                            new Text(CQ_P_AT), statement);
                case 3: // index hash(s)+o
                    assert (CQ_S_AT.equals(strategies[3]));
                    KeyParts.appendSubject(statement, keyText);
                    KeyParts.appendInstant(instant, keyText);
                    KeyParts.appendUniqueness(statement, keyText);
                    return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)),
                            new Text(CQ_S_AT), statement);
                }
                throw new Error("Next passed end?  No such nextStrategy=" + (nextStrategy - 1));

            }

            @Override
            public void remove() {
                throw new Error("Remove not Implemented.");
            }
        };
    }

    public byte[] getStoreKey() {
        assert !queryMode : "must be in store Mode, store keys are not initialized.";
        return this.storeKey.copyBytes();
    }

    /**
     * Query key is the prefix plus the datetime, but no uniqueness at the end.
     * @return the row key for range queries.
     */
    public Text getQueryKey() {
        return getQueryKey(this.instant);
    };

    /**
     * Query key is the prefix plus the datetime, but no uniqueness at the end.
     * 
     * @return the row key for range queries.
     */
    public Text getQueryKey(TemporalInstant theInstant) {
        assert queryMode : "must be in query Mode, query keys are not initialized.";
        Text keyText = new Text();
        if (constraintPrefix != null)
            appendBytes(constraintPrefix.copyBytes(), keyText);
        appendInstant(theInstant, keyText);
        return keyText;
    };

    @Override
    public String toString() {
        return "KeyParts [contraintPrefix=" + toHumanString(constraintPrefix) + ", instant="
                + toHumanString(instant.getAsKeyBytes()) + ", cf=" + cf + ", cq=" + cq + "]";
    }

    private static void appendSubject(Statement statement, Text keyText) {
        Value statementValue = new Value(StatementSerializer.writeSubject(statement).getBytes());
        byte[] hashOfValue = uniqueFromValueForKey(statementValue);
        appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. 
        appendBytes(hashOfValue, keyText);
    }

    private static void appendPredicate(Statement statement, Text keyText) {
        Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writePredicate(statement)));
        byte[] hashOfValue = uniqueFromValueForKey(statementValue);
        appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. 
        appendBytes(hashOfValue, keyText);
    }

    private static void appendInstant(TemporalInstant instant, Text keyText) {
        byte[] bytes = instant.getAsKeyBytes();
        appendBytes(bytes, keyText);
    }

    private static void appendSubjectPredicate(Statement statement, Text keyText) {
        Value statementValue = new Value(
                StringUtils.getBytesUtf8(StatementSerializer.writeSubjectPredicate(statement)));
        byte[] hashOfValue = uniqueFromValueForKey(statementValue);
        appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. 
        appendBytes(hashOfValue, keyText);
    }

    /**
     * Append any byte array to a row key.
     * @param bytes append this
     * @param keyText text to append to
     */
    private static void appendBytes(byte[] bytes, Text keyText) {
        keyText.append(bytes, 0, bytes.length);
    }

    /**
    * Get a collision unlikely hash string and append to the key, 
    * so that if two keys have the same value, then they will be the same,
    * if two different values that occur at the same time there keys are different.
    * If the application uses a very large number of statements at the exact same time,
    * the md5 value might be upgraded to for example sha-1 to avoid collisions.
    * @param statement
    * @param keyText
    */
    public static void appendUniqueness(Statement statement, Text keyText) {
        keyText.append(HASH_PREFIX, 0, 1); // delimiter
        Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement)));
        byte[] hashOfValue = Md5Hash.md5Binary(statementValue);
        keyText.append(hashOfValue, 0, hashOfValue.length);
    }

    /**
    * Get a collision unlikely hash string to append to the key, 
    * so that if two keys have the same value, then they will be the same,
    * if two different values that occur at the same time there keys are different.
    * @param value
    * @return
    */
    private static byte[] uniqueFromValueForKey(Value value) {
        return Md5Hash.md5Binary(value);
    }

    /**
     * List all the index keys to find for any query.  Set the strategy via the column qualifier, ex: CQ_S_P_AT.
     * Column Family (CF) is the context/named-graph.
     * @param queryInstant
     * @param contraints
     * @return
     */
    static public List<KeyParts> keyPartsForQuery(TemporalInstant queryInstant, StatementContraints contraints) {
        List<KeyParts> keys = new LinkedList<KeyParts>();
        URI urlNull = new URIImpl("urn:null");
        Resource currentContext = contraints.getContext();
        boolean hasSubj = contraints.hasSubject();
        if (contraints.hasPredicates()) {
            for (URI nextPredicate : contraints.getPredicates()) {
                Text contraintPrefix = new Text();
                Statement statement = new ContextStatementImpl(hasSubj ? contraints.getSubject() : urlNull,
                        nextPredicate, urlNull, contraints.getContext());
                if (hasSubj)
                    appendSubjectPredicate(statement, contraintPrefix);
                else
                    appendPredicate(statement, contraintPrefix);
                keys.add(new KeyParts(contraintPrefix, queryInstant,
                        (currentContext == null) ? "" : currentContext.toString(), hasSubj ? CQ_S_P_AT : CQ_P_AT));
            }
        } else if (contraints.hasSubject()) { // and no predicates
            Text contraintPrefix = new Text();
            Statement statement = new StatementImpl(contraints.getSubject(), urlNull, urlNull);
            appendSubject(statement, contraintPrefix);
            keys.add(new KeyParts(contraintPrefix, queryInstant,
                    (currentContext == null) ? "" : currentContext.toString(), CQ_S_AT));
        } else {
            // No constraints except possibly a context/named-graph, handled by the CF
            keys.add(new KeyParts(null, queryInstant, (currentContext == null) ? "" : currentContext.toString(),
                    CQ_O_AT));
        }
        return keys;
    }

    /**
    * convert a non-utf8 byte[] and text and value to string and show unprintable bytes as {xx} where x is hex.
    * @param value
    * @return Human readable representation.
    */
    public static String toHumanString(Value value) {
        return toHumanString(value == null ? null : value.get());
    }

    public static String toHumanString(Text text) {
        return toHumanString(text == null ? null : text.copyBytes());
    }

    public static String toHumanString(byte[] bytes) {
        if (bytes == null)
            return "{null}";
        StringBuilder sb = new StringBuilder();
        for (byte b : bytes) {
            if ((b > 0x7e) || (b < 32)) {
                sb.append("{");
                sb.append(Integer.toHexString(b & 0xff)); // Lop off the sign extended ones.
                sb.append("}");
            } else if (b == '{' || b == '}') { // Escape the literal braces.
                sb.append("{");
                sb.append((char) b);
                sb.append("}");
            } else
                sb.append((char) b);
        }
        return sb.toString();
    }

}