org.trustedanalytics.atk.giraph.io.titan.formats.TitanVertexBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.trustedanalytics.atk.giraph.io.titan.formats.TitanVertexBuilder.java

Source

/*
// Copyright(c)2015 IntelCorporation
//
// LicensedundertheApacheLicense,Version2.0(the"License");
// youmaynotusethisfileexceptincompliancewiththeLicense.
// YoumayobtainacopyoftheLicenseat
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unlessrequiredbyapplicablelaworagreedtoinwriting,software
// distributedundertheLicenseisdistributedonan"ASIS"BASIS,
// WITHOUTWARRANTIESORCONDITIONSOFANYKIND,eitherexpressorimplied.
// SeetheLicenseforthespecificlanguagegoverningpermissionsand
// limitationsundertheLicense.
*/

package org.trustedanalytics.atk.giraph.io.titan.formats;

import org.trustedanalytics.atk.giraph.io.EdgeData4CFWritable;
import org.trustedanalytics.atk.giraph.io.VertexData4CFWritable;
import com.thinkaurelius.titan.core.TitanEdge;
import com.thinkaurelius.titan.core.TitanProperty;
import com.thinkaurelius.titan.hadoop.FaunusVertex;
import com.tinkerpop.blueprints.Direction;
import org.apache.commons.lang.StringUtils;
import org.apache.giraph.conf.StrConfOption;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.edge.EdgeFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.log4j.Logger;
import org.apache.mahout.math.DenseVector;

import java.util.*;

import static org.trustedanalytics.atk.giraph.io.titan.common.GiraphTitanConstants.*;
import static org.trustedanalytics.atk.giraph.io.titan.common.GiraphTitanConstants.EDGE_TYPE_PROPERTY_KEY;
import static org.trustedanalytics.atk.giraph.io.titan.common.GiraphTitanConstants.VERTEX_TYPE_PROPERTY_KEY;

public class TitanVertexBuilder {

    /** Enable vector value */
    protected boolean enableVectorValue = false;

    /** HashMap of configured vertex properties */
    protected final Map<String, Integer> vertexValuePropertyKeys;

    /** HashMap of configured edge properties */
    protected final Map<String, Integer> edgeValuePropertyKeys;

    /** HashSet of configured edge labels */
    protected final Map<String, Integer> edgeLabelKeys;

    /** Property key for Vertex Type */
    protected String vertexTypePropertyKey;

    /** Property key for Edge Type */
    protected String edgeTypePropertyKey;

    /** Regex for splitting delimited strings */
    protected String regexp = "[\\s,\\t]+";

    private static final Logger LOG = Logger.getLogger(TitanVertexBuilder.class);

    /**
     * Constructs a Titan/HBase vertex builder.
     *
     * Reads the Giraph configuration to extract the vertex and edge property names,
     * and edge labels needed by the builder.
     *
     * @param conf Giraph configuration file.
     */
    public TitanVertexBuilder(final Configuration conf) {
        this.vertexValuePropertyKeys = getPropertyKeyMap(conf, INPUT_VERTEX_VALUE_PROPERTY_KEY_LIST);
        this.edgeValuePropertyKeys = getPropertyKeyMap(conf, INPUT_EDGE_VALUE_PROPERTY_KEY_LIST);
        this.edgeLabelKeys = getPropertyKeyMap(conf, INPUT_EDGE_LABEL_LIST);
        this.enableVectorValue = Boolean.TRUE.toString().equals(VECTOR_VALUE.get(conf));
        this.vertexTypePropertyKey = VERTEX_TYPE_PROPERTY_KEY.get(conf);
        this.edgeTypePropertyKey = EDGE_TYPE_PROPERTY_KEY.get(conf);
    }

    /**
     * Constructs an iterator of the Titan vertex properties from a Faunus vertex.
     *
     * The vertex properties are filtered by property name.
     *
     * @param faunusVertex Titan/Hadoop (faunus) vertex
     * @return Titan vertex properties filtered by property name.
     */
    public Iterator<TitanProperty> buildTitanProperties(FaunusVertex faunusVertex) {
        ArrayList<TitanProperty> titanProperties = new ArrayList<>();

        if (vertexValuePropertyKeys != null) {
            for (String propertyKey : vertexValuePropertyKeys.keySet()) {
                Iterator<TitanProperty> propertyIterator = faunusVertex.getProperties(propertyKey).iterator();

                if (propertyIterator.hasNext()) {
                    titanProperties.add(propertyIterator.next());
                }
            }

        }
        return (titanProperties.iterator());
    }

    /**
     * Get CF vertex property type
     *
     * @param faunusVertex          Faunus (Titan/Hadoop) vertex
     * @param vertexTypePropertyKey Property name for vertex type
     * @return Vertex property type (Left or Right)
     * @throws IllegalArgumentException
     */
    public VertexData4CFWritable.VertexType getCFVertexTypeProperty(FaunusVertex faunusVertex,
            String vertexTypePropertyKey) throws IllegalArgumentException {
        VertexData4CFWritable.VertexType vertexType;
        Object vertexTypeObject = null;

        if (StringUtils.isNotBlank(vertexTypePropertyKey)) {
            vertexTypeObject = faunusVertex.getProperty(vertexTypePropertyKey);
        }

        if (vertexTypeObject != null) {
            String vertexTypeString = vertexTypeObject.toString().toLowerCase();
            if (vertexTypeString.equals(VERTEX_TYPE_LEFT)) {
                vertexType = VertexData4CFWritable.VertexType.User;
            } else if (vertexTypeString.equals(VERTEX_TYPE_RIGHT)) {
                vertexType = VertexData4CFWritable.VertexType.Item;
            } else {
                LOG.error("Vertex type string: %s isn't supported." + vertexTypeString);
                throw new IllegalArgumentException(
                        String.format("Vertex type string: %s isn't supported.", vertexTypeString));
            }
        } else {
            LOG.error(String.format("Vertex type property %s should not be null", vertexTypePropertyKey));
            throw new IllegalArgumentException(
                    String.format("Vertex type property %s should not be null", vertexTypePropertyKey));
        }
        return (vertexType);
    }

    /**
     * Get CF edge property type.
     *
     * @param titanEdge           Titan edge
     * @param edgeTypePropertyKey Edge type property name
     * @return Edge type
     */
    public EdgeData4CFWritable.EdgeType getCFEdgeTypeProperty(TitanEdge titanEdge, String edgeTypePropertyKey) {
        EdgeData4CFWritable.EdgeType edgeType;
        Object edgeTypeObject = null;

        if (StringUtils.isNotBlank(edgeTypePropertyKey)) {
            edgeTypeObject = titanEdge.getProperty(edgeTypePropertyKey);
        }

        if (edgeTypeObject != null) {
            String edgeTypeString = edgeTypeObject.toString().toLowerCase();

            if (edgeTypeString.equals(TYPE_TRAIN)) {
                edgeType = EdgeData4CFWritable.EdgeType.TRAIN;
            } else if (edgeTypeString.equals(TYPE_VALIDATE)) {
                edgeType = EdgeData4CFWritable.EdgeType.VALIDATE;
            } else if (edgeTypeString.equals(TYPE_TEST)) {
                edgeType = EdgeData4CFWritable.EdgeType.TEST;
            } else {
                LOG.error("Edge type string: %s isn't supported." + edgeTypeString);
                throw new IllegalArgumentException(
                        String.format("Edge type string: %s isn't supported.", edgeTypeString));
            }
        } else {
            LOG.error("Edge type: %s not found." + edgeTypePropertyKey);
            throw new IllegalArgumentException(String.format("Edge type: %s not found.", edgeTypePropertyKey));
        }
        return edgeType;
    }

    /**
     * Create Giraph edge from Titan edge
     *
     * @param faunusVertex Faunus (Titan/Hadoop) vertex
     * @param titanEdge    Titan edge
     * @param edgeType     Edge Type
     * @param propertyKey  Edge property name used to set edge value
     * @return Giraph edge
     */
    public Edge<LongWritable, EdgeData4CFWritable> getCFGiraphEdge(FaunusVertex faunusVertex, TitanEdge titanEdge,
            EdgeData4CFWritable.EdgeType edgeType, String propertyKey) {

        Object edgeValueObject = titanEdge.getProperty(propertyKey);
        double edgeValue = 1.0d;

        if (edgeValueObject == null) {
            // TODO - revisit
            throw new IllegalArgumentException("Edge did not have property named '" + propertyKey + "'");
        }

        try {
            edgeValue = Double.parseDouble(edgeValueObject.toString());
        } catch (NumberFormatException e) {
            LOG.warn("Unable to parse double value for property: " + edgeValueObject);
        }

        return EdgeFactory.create(new LongWritable(titanEdge.getOtherVertex(faunusVertex).getLongId()),
                new EdgeData4CFWritable(edgeType, edgeValue));
    }

    /**
     * Update vector values using Titan property value
     *
     * @param vector Mahout vector
     * @param titanProperty Titan property
     * @return Updated Mahout vector
     */
    public org.apache.mahout.math.Vector setVector(org.apache.mahout.math.Vector vector,
            TitanProperty titanProperty) {
        Object vertexValueObject = titanProperty.getValue();
        if (enableVectorValue) {
            //one property key has a vector as value
            //split by either space or comma or tab
            String[] valueString = vertexValueObject.toString().split(regexp);
            int size = valueString.length;
            double[] data = new double[size];
            vector = new DenseVector(data);
            for (int i = 0; i < valueString.length; i++) {
                vector.set(i, Double.parseDouble(valueString[i]));
            }
        } else {
            String propertyName = titanProperty.getPropertyKey().getName();
            int propertyIndex = vertexValuePropertyKeys.get(propertyName);
            double vertexValue = Double.parseDouble(vertexValueObject.toString());
            vector.set(propertyIndex, vertexValue);
        }
        return vector;
    }

    /**
     * Construct Titan edges filtered by edge label
     *
     * @param faunusVertex Titan/Hadoop (faunus) vertex
     * @return Titan edges filtered by edge label
     */
    public Iterator<TitanEdge> buildBlueprintsEdges(FaunusVertex faunusVertex) {

        if (edgeLabelKeys != null) {
            String[] edgeLabels = edgeLabelKeys.keySet().toArray(new String[edgeLabelKeys.keySet().size()]);
            Iterable<TitanEdge> titanEdges = faunusVertex.query().direction(Direction.OUT).labels(edgeLabels)
                    .titanEdges();
            return (titanEdges.iterator());
        } else {
            return (Collections.<TitanEdge>emptyList().iterator());
        }
    }

    /**
     * Get a Hashmap with name of configuration parameter, and corresponding index in list.
     *
     * @param conf Giraph configuration file.
     * @param confOption Configuration option with delimited list of property keys
     * @return Hashmap with name of configuration parameter, and corresponding index in list
     */
    private Map<String, Integer> getPropertyKeyMap(final Configuration conf, final StrConfOption confOption) {
        String propertyKeyString = confOption.get(conf).trim();
        Map<String, Integer> propertyKeyMap = new HashMap<>();

        if (StringUtils.isNotEmpty(propertyKeyString)) {
            String[] configList = confOption.get(conf).split(regexp);
            for (int i = 0; i < configList.length; i++) {
                propertyKeyMap.put(configList[i], i);
            }
        }

        return (propertyKeyMap);
    }
}