com.chinamobile.bcbsp.fault.storage.Checkpoint.java Source code

Java tutorial

Introduction

Here is the source code for com.chinamobile.bcbsp.fault.storage.Checkpoint.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.chinamobile.bcbsp.fault.storage;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.ConcurrentLinkedQueue;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.FSDataOutputStream;
//import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.util.ReflectionUtils;

import com.chinamobile.bcbsp.Constants;
import com.chinamobile.bcbsp.api.Vertex;
import com.chinamobile.bcbsp.bspstaff.Staff;
import com.chinamobile.bcbsp.comm.CommunicationFactory;
import com.chinamobile.bcbsp.comm.CommunicatorInterface;
import com.chinamobile.bcbsp.comm.GraphStaffHandler;
import com.chinamobile.bcbsp.comm.IMessage;
import com.chinamobile.bcbsp.graph.GraphDataFactory;
import com.chinamobile.bcbsp.graph.GraphDataInterface;
import com.chinamobile.bcbsp.io.OutputFormat;
import com.chinamobile.bcbsp.io.RecordWriter;
import com.chinamobile.bcbsp.io.TextBSPFileOutputFormat;
import com.chinamobile.bcbsp.io.db.TableOutputFormat;
import com.chinamobile.bcbsp.thirdPartyInterface.HDFS.BSPHdfs;
import com.chinamobile.bcbsp.thirdPartyInterface.HDFS.BSPoutHdfs;
import com.chinamobile.bcbsp.thirdPartyInterface.HDFS.impl.BSPHdfsImpl;
import com.chinamobile.bcbsp.thirdPartyInterface.HDFS.impl.BSPoutHdfsImpl;
import com.chinamobile.bcbsp.util.BSPJob;
import com.chinamobile.bcbsp.util.StaffAttemptID;

/**
 * write and read the checkpoint for fault tolerance and staff migrate.
 * @author hadoop
 */
public class Checkpoint {
    /**handle log information in checkpoint class*/
    private static final Log LOG = LogFactory.getLog(Checkpoint.class);
    /**vertex class handle*/
    private Class<? extends Vertex<?, ?, ?>> vertexClass;

    /**
     * checkpoint construct method,get the job vertex class.
     * @param job
     *        job to checkpoint.
     */
    public Checkpoint(BSPJob job) {
        vertexClass = job.getVertexClass();
    }

    /**
     * Write Check point
     * @param graphData
     *        graphdata to checkpoint
     * @param writePath
     *        checkpoint write path
     * @param job
     *        job to checkpoint
     * @param staff
     *        staff to checkpoint
     * @return boolean Note it should be modified , some detailed opts to be
     *         sealed into graph data interface and should not be exposed outside.
     */
    @SuppressWarnings("unchecked")
    public boolean writeCheckPoint(GraphDataInterface graphData, Path writePath, BSPJob job, Staff staff)
            throws IOException {
        LOG.info("The init write path is : " + writePath.toString());
        try {
            OutputFormat outputformat = null;
            if (job.getCheckpointType().equals("HBase")) {
                createTable(job, staff.getStaffAttemptId());
                outputformat = ReflectionUtils.newInstance(TableOutputFormat.class, job.getConf());
                job.getConf().set(TableOutputFormat.OUTPUT_TABLE, staff.getStaffAttemptId().toString());
            } else if (job.getCheckpointType().equals("HDFS")) {
                outputformat = ReflectionUtils.newInstance(TextBSPFileOutputFormat.class, job.getConf());
            }
            outputformat.initialize(job.getConf());
            RecordWriter output = outputformat.getRecordWriter(job, staff.getStaffAttemptId(), writePath);
            graphData.saveAllVertices(output);
            output.close(job);
        } catch (Exception e) {
            LOG.error("Exception has happened and been catched!", e);
            return false;
        }
        return true;
    }

    /**
     * Read Check point
     * @param readPath
     *        checkpoint readpath
     * @param job
     *        job to read the checkpoint
     * @param staff
     *        staff to get the graphdata
     * @return checkpoint graphdata
     */
    @SuppressWarnings("unchecked")
    public GraphDataInterface readCheckPoint(Path readPath, BSPJob job, Staff staff) {
        GraphDataInterface graphData = null;
        GraphDataFactory graphDataFactory = staff.getGraphDataFactory();
        int version = job.getGraphDataVersion();
        graphData = graphDataFactory.createGraphData(version, staff);
        Vertex vertexTmp = null;
        String s = null;
        if (job.getCheckpointType().equals("HBase")) {
            HTablePool pool = new HTablePool(job.getConf(), 1000);
            HTable table = (HTable) pool.getTable(staff.getStaffAttemptId().toString());
            try {
                ResultScanner rs = table.getScanner(new Scan());
                for (Result r : rs) {
                    KeyValue[] keyValue = r.raw();
                    s = new String(r.getRow()) + Constants.KV_SPLIT_FLAG + new String(keyValue[0].getValue());
                    if (s != null) {
                        try {
                            vertexTmp = this.vertexClass.newInstance();
                            vertexTmp.fromString(s);
                        } catch (Exception e) {
                            throw new RuntimeException("[Checkpoint] caught: ", e);
                        }
                        if (vertexTmp != null) {
                            LOG.info("vertexTmp = " + vertexTmp);
                            graphData.addForAll(vertexTmp);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }

        } else if (job.getCheckpointType().equals("HDFS")) {
            LOG.info("The init read path is : " + readPath.toString());
            String uri = readPath.toString() + "/" + staff.getStaffAttemptId().toString() + "/checkpoint.cp";
            LOG.info("uri: " + uri);
            // Configuration conf = new Configuration();
            // alter by gtt
            BSPHdfs HdfsCheckpoint = new BSPHdfsImpl();
            InputStream in = null;
            BufferedReader bis = null;
            try {
                // FileSystem fs = FileSystem.get(URI.create(uri), conf);
                // in = fs.open(new Path(uri));
                in = HdfsCheckpoint.hdfsCheckpoint(uri, HdfsCheckpoint.getConf());
                bis = new BufferedReader(new InputStreamReader(new BufferedInputStream(in)));
                s = bis.readLine();
                while (s != null) {
                    try {
                        vertexTmp = this.vertexClass.newInstance();
                        vertexTmp.fromString(s);
                    } catch (Exception e) {
                        // LOG.error("[Checkpoint] caught: ", e);
                        throw new RuntimeException("[Checkpoint] caught: ", e);
                    }
                    if (vertexTmp != null) {
                        graphData.addForAll(vertexTmp);
                    }
                    s = bis.readLine();
                }
                graphData.finishAdd();
                bis.close();
            } catch (IOException e) {
                // LOG.error("Exception has happened and been catched!", e);
                throw new RuntimeException("Exception has happened and been catched!", e);
            }
        }
        return graphData;
    }

    /**
     * write the message and graphdata information for
     * migrate slow staff.
     * @param communicator
     *        message information need to backup
     * @param graphData
     *        graphdata to backup
     * @param writePath
     *        backup write path
     * @param job
     *        job to backup
     * @param staff
     *        staff to backup
     * @return write result if success and nothing to write true
     *         not false
     * @throws IOException
     *         exceptions when write messages
     * @author liuzhicheng
     */
    public boolean writeMessages(CommunicatorInterface communicator, GraphDataInterface graphData, Path writePath,
            BSPJob job, GraphStaffHandler graphStaffHandler, Staff staff,
            ConcurrentLinkedQueue<String> messagesQueue) throws IOException, InterruptedException {
        //    if (communicator.getIncomedQueuesSize() == 0) {
        //       LOG.info("Feng test! writeMessages "+communicator.getIncomedQueuesSize());
        //      return true;
        //    }
        //    LOG.info("The init write path is : " + writePath.toString());
        //    BSPHdfs HDFSCheckpoint = new BSPHdfsImpl();
        //    BSPoutHdfs OUT = new BSPoutHdfsImpl();
        //    OUT.fsDataOutputStream(writePath, HDFSCheckpoint.getConf());
        //    StringBuffer sb = new StringBuffer();
        //boolean writeResult = false;
        try {
            //Staff staff = graphStaffHandler;
            //    OutputFormat outputformat = (OutputFormat) ReflectionUtils.newInstance(
            //            job.getConf().getClass(Constants.USER_BC_BSP_JOB_OUTPUT_FORMAT_CLASS,
            //                OutputFormat.class), job.getConf());
            //        outputformat.initialize(job.getConf());
            //        RecordWriter output = outputformat.getRecordWriter(job,
            //            staff.getStaffAttemptId(), writePath);
            LOG.info("The init write path is : " + writePath.toString());
            BSPHdfs HDFSCheckpoint = new BSPHdfsImpl();
            BSPoutHdfs OUT = new BSPoutHdfsImpl();
            OUT.fsDataOutputStream(writePath, HDFSCheckpoint.getConf());
            StringBuffer sb = new StringBuffer();

            ConcurrentLinkedQueue<String> messages = messagesQueue;
            Iterator<String> it = messages.iterator();
            //        if(messages.size()!=0){
            //           LOG.info("Feng test writeMessages messages "+it.next());
            //           //continue;
            //        }
            while (it.hasNext()) {
                String message = it.next();
                LOG.info("Write messages " + message);
                sb.append(message);
                sb.append("\n");
            }
            //        Iterator<IMessage> messagesIter = messages.iterator();
            //        StringBuffer sb = new StringBuffer();
            //        //sb.append(vertexID + Constants.MESSAGE_SPLIT);
            //       while (messagesIter.hasNext()) {
            //          IMessage msg = messagesIter.next();
            //          String info = msg.intoString();
            //          if (info != null) {
            //           sb.append(info + Constants.SPACE_SPLIT_FLAG);
            //         }
            //       }
            //       if (sb.length() > 0) {
            //           int k = sb.length();
            //           sb.delete(k - 1, k - 1);
            //         }
            //       //sb.append("\n");
            //       output.write(new Text(v.getVertexID() + Constants.MESSAGE_SPLIT), new Text(sb.toString()));
            //       LOG.info("Feng test! message record"+sb.toString());
            //        
            //    graphData.getAllVertex(graphStaffHandler,communicator,output);
            OUT.writeBytes(sb.toString());
            OUT.flush();
            OUT.close();
            // return true;
        } catch (IOException e) {
            LOG.error("Exception has happened and been catched!", e);
            return false;
        }
        return true;
        //    //Vertex<?, ?, Edge> vertex = graphData.getForAll(i);
        //    String vertexID = vertex.getVertexID().toString();
        //    Iterator<IMessage> it = communicator.getMessageIterator(vertexID);
        //    sb.append(vertexID + Constants.MESSAGE_SPLIT);
        //    while (it.hasNext()) {
        //      IMessage msg = it.next();
        //      String info = msg.intoString();
        //      if (info != null) {
        //        sb.append(info + Constants.SPACE_SPLIT_FLAG);
        //      }
        //    }
        //    sb.append("\n");
        //  }
        //  OUT.writeBytes(sb.toString());
        //  OUT.flush();
        //  OUT.close()
    }

    /**
     * read message information
     * @param readPath
     *        message backup to read
     * @param job
     *        need to read the message backup
     * @param staff
     *        read the message backup
     * @return read successfully true
     *         not false.
     */
    public Map<String, LinkedList<IMessage>> readMessages(Path readPath, BSPJob job, Staff staff) {
        LOG.info("The init read path is : " + readPath.toString());
        String uri = readPath.toString();
        InputStream in = null;
        //boolean exist = false;
        BufferedReader bis = null;
        Map<String, LinkedList<IMessage>> incomedMessages = new HashMap<String, LinkedList<IMessage>>();
        CommunicationFactory.setMessageClass(job.getMessageClass());
        try {
            BSPHdfs BSPRead = new BSPHdfsImpl();
            //exist = BSPRead.exists(BSPRead.newPath(uri));
            in = BSPRead.hdfsCheckpoint(uri, BSPRead.getConf());
            bis = new BufferedReader(new InputStreamReader(new BufferedInputStream(in)));
            String s = bis.readLine();
            if (s == null) {
                LOG.info("message file is empty! ");
            }
            while (s != null) {
                try {
                    String[] msgInfos = s.split(Constants.MESSAGE_SPLIT);
                    if (msgInfos.length > 0) {
                        LOG.info("message is not empty!" + msgInfos.length);
                    }
                    String vertexID = msgInfos[0];
                    String[] msgs = msgInfos[1].split(Constants.SPACE_SPLIT_FLAG);
                    LinkedList<IMessage> list = new LinkedList<IMessage>();
                    for (int i = 0; i < msgs.length; i++) {
                        String[] msgss = msgs[i].split(Constants.SPLIT_FLAG);
                        IMessage msg = CommunicationFactory.createPagerankMessage();
                        if (msgss.length > 0) {
                            msg.setMessageId(Integer.parseInt(msgss[0]));
                            msg.setContent(Float.parseFloat(msgss[1]));
                            //             LOG.info("Read every message in checkpoint! vertexID "+
                            //                   msgss[0]+"vertexValue"+msgss[1]);
                        }
                        list.add(msg);
                    }
                    incomedMessages.put(vertexID, list);
                } catch (Exception e) {
                    //LOG.error("[Checkpoint] caught: ", e);
                    throw new RuntimeException("[Checkpoint] caught: ", e);
                }
                s = bis.readLine();
            }
            bis.close();
        } catch (IOException e) {
            //LOG.error("Exception has happened and been catched!", e);
            throw new RuntimeException("Exception has happened and been catched!", e);
        }
        return incomedMessages;
    }

    private void createTable(BSPJob job, StaffAttemptID staffId) {
        LOG.info("create hbase table");
        Configuration conf = HBaseConfiguration.create();
        //    conf.addResource(new Path("/usr/local/termite/bc-bsp-1.0/conf/bcbsp-site.xml")); 
        conf.set("hbase.zookeeper.property.clientPort", job.getConf().get(Constants.ZOOKEPER_CLIENT_PORT));
        conf.set("hbase.zookeeper.quorum", "master");
        conf.set("hbase.master", "master:60000");

        String tableName = staffId.toString();
        String columnFamilyName = "BorderNode";

        try {
            HBaseAdmin admin = new HBaseAdmin(conf);
            if (admin.tableExists(tableName)) {
                admin.disableTable(tableName);
                admin.deleteTable(tableName);
            }
            HTableDescriptor descriptor = new HTableDescriptor(tableName);
            descriptor.addFamily(new HColumnDescriptor(columnFamilyName));
            admin.createTable(descriptor);
            admin.close();
        } catch (MasterNotRunningException e1) {
            e1.printStackTrace();
        } catch (ZooKeeperConnectionException e1) {
            e1.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}