com.chinamobile.bcbsp.io.titan.TitanInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.chinamobile.bcbsp.io.titan.TitanInputFormat.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.chinamobile.bcbsp.io.titan;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;

import com.chinamobile.bcbsp.io.InputFormat;
import com.chinamobile.bcbsp.io.RecordReader;
import com.chinamobile.bcbsp.util.BSPJob;
import com.tinkerpop.rexster.client.RexProException;
import com.tinkerpop.rexster.client.RexsterClient;
import com.tinkerpop.rexster.client.RexsterClientFactory;

/**
 * TitanInputFormat implements input split strategy of table in Titan database.
 * @author Zhicheng Liu 2013/4/25
 */
public class TitanInputFormat extends InputFormat<Text, Text> {
    /**log handle*/
    private static final Log LOG = LogFactory.getLog(TitanInputFormat.class);
    /**HBase property handle*/
    private HTable hTable;
    /**Titan client*/
    private RexsterClient client;
    /**configuration handle*/
    private Configuration conf;

    @Override
    public RecordReader<Text, Text> createRecordReader(InputSplit input, BSPJob job)
            throws IOException, InterruptedException {
        /*Get the record reader for every given split. */
        TitanTableSplit split = (TitanTableSplit) input;
        TitanRecordReader reader = null;
        try {
            reader = new TitanRecordReader();
            reader.setClient(client);
            reader.setFirstVertexID(split.getFirstVertexID());
            reader.setLastVertexID(split.getLastVertexID());
            reader.init();
        } catch (Exception e) {
            LOG.error("Can not create a RecordReader for Titan");
            return null;
        }
        return reader;
    }

    @Override
    public List<InputSplit> getSplits(BSPJob job) throws IOException, InterruptedException {
        /*Get the input splits for given table in Titan database.*/
        ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
        int splitNum = hTable.getRegionLocations().size();
        String ids = null;
        try {
            ids = client.execute("g.V.vertexID").toString();
        } catch (NumberFormatException e) {
            LOG.error("Can not get the number of vertex in database!");
            return null;
        } catch (RexProException e) {
            LOG.error("Client of database collapse!");
            return null;
        }
        String[] vertexIDs = ids.split(", ");
        vertexIDs[0] = vertexIDs[0].split("\\[")[1];
        vertexIDs[vertexIDs.length - 1] = vertexIDs[vertexIDs.length - 1].split("\\]")[0];
        long smallestVertexID = 0;
        long largestVertexID = 0;
        for (int i = 0; i < vertexIDs.length; i++) { // test only
            if (Long.parseLong(vertexIDs[i]) < smallestVertexID) {
                smallestVertexID = Long.parseLong(vertexIDs[i]);
            }
            if (Long.parseLong(vertexIDs[i]) > largestVertexID) {
                largestVertexID = Long.parseLong(vertexIDs[i]);
            }
        }
        long blockLength = largestVertexID / splitNum;
        long index = smallestVertexID;
        while (index <= largestVertexID) {
            TitanTableSplit split = new TitanTableSplit();
            split.setTableName(conf.get("TITAN_INPUT_TABLE_NAME"));
            split.setFirstVertexID(index);
            index += blockLength;
            if (index > largestVertexID) {
                index = largestVertexID;
            }
            split.setLastVertexID(index);
            splits.add(split);
            index++;
        }
        return splits;
    }

    @Override
    public void initialize(Configuration configuration) {
        /*The configuration is Initialization "titan.xml"*/
        conf = new Configuration(configuration);
        conf.set("TITAN_SERVER_ADDRESS", configuration.get("titan.server.address", "localhost"));
        conf.set("TITAN_INPUT_TABLE_NAME", configuration.get("titan.input.table.name", "graph"));
        conf.set("HBASE_MASTER_ADDRESS", configuration.get("hbase.master.address", "localhost"));
        conf.set("HBASE_INPUT_TABLE_NAME", configuration.get("hbase.input.table.name", "titan"));
        try {
            client = RexsterClientFactory.open(conf.get("TITAN_SERVER_ADDRESS"),
                    conf.get("TITAN_INPUT_TABLE_NAME"));
        } catch (Exception e) {
            LOG.error("The client of database can not obtain!");
            return;
        }
        conf = HBaseConfiguration.create(conf);
        try {
            hTable = new HTable(conf, conf.get("HBASE_INPUT_TABLE_NAME"));
        } catch (IOException e) {
            LOG.error("The table in HBase can not obtain");
            return;
        }
    }
}