com.codefollower.lealone.omid.tso.TSOHandler.java Source code

Java tutorial

Introduction

Here is the source code for com.codefollower.lealone.omid.tso.TSOHandler.java

Source

/**
 * Copyright (c) 2011 Yahoo! Inc. All rights reserved. 
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at 
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 * See the License for the specific language governing permissions and 
 * limitations under the License. See accompanying LICENSE file.
 */

package com.codefollower.lealone.omid.tso;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jboss.netty.buffer.ChannelBuffer;
import org.jboss.netty.channel.Channel;
import org.jboss.netty.channel.ChannelFuture;
import org.jboss.netty.channel.ChannelHandlerContext;
import org.jboss.netty.channel.ChannelStateEvent;
import org.jboss.netty.channel.Channels;
import org.jboss.netty.channel.ExceptionEvent;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.channel.SimpleChannelHandler;
import org.jboss.netty.channel.group.ChannelGroup;

import com.codefollower.lealone.omid.replication.SharedMessageBuffer.ReadingBuffer;
import com.codefollower.lealone.omid.tso.messages.AbortRequest;
import com.codefollower.lealone.omid.tso.messages.AbortedTransactionReport;
import com.codefollower.lealone.omid.tso.messages.CommitQueryRequest;
import com.codefollower.lealone.omid.tso.messages.CommitQueryResponse;
import com.codefollower.lealone.omid.tso.messages.CommitRequest;
import com.codefollower.lealone.omid.tso.messages.CommitResponse;
import com.codefollower.lealone.omid.tso.messages.FullAbortRequest;
import com.codefollower.lealone.omid.tso.messages.LargestDeletedTimestampReport;
import com.codefollower.lealone.omid.tso.messages.TimestampRequest;
import com.codefollower.lealone.omid.tso.messages.TimestampResponse;
import com.codefollower.lealone.omid.tso.persistence.LoggerException;
import com.codefollower.lealone.omid.tso.persistence.LoggerProtocol;
import com.codefollower.lealone.omid.tso.persistence.LoggerAsyncCallback.AddRecordCallback;
import com.codefollower.lealone.omid.tso.persistence.LoggerException.Code;

/**
 * ChannelHandler for the TSO Server
 * TSO Server?TSOHandler
 *
 */
public class TSOHandler extends SimpleChannelHandler {

    private static final Log LOG = LogFactory.getLog(TSOHandler.class);

    /*
     * Wrapper for Channel and Message
     */
    private static class ChannelAndMessage {
        final ChannelHandlerContext ctx;
        final TSOMessage msg;

        ChannelAndMessage(ChannelHandlerContext c, TSOMessage m) {
            ctx = c;
            msg = m;
        }
    }

    public static final AtomicLong commitCounter = new AtomicLong();
    public static final AtomicLong abortCounter = new AtomicLong();
    public static final AtomicLong commitQueryCounter = new AtomicLong();

    /**
     * Channel Group
     */
    private final ChannelGroup channelGroup;

    /**
     * Timestamp Oracle
     */
    private final TimestampOracle timestampOracle;

    /**
     * The wrapper for the shared state of TSO
     */
    private final TSOState sharedState;

    private final FlushThread flushThread;
    private final ScheduledExecutorService scheduledExecutor;
    private final int batchSize;

    private ScheduledFuture<?> flushFuture;
    private boolean finish;
    private List<ChannelAndMessage> nextBatch = new ArrayList<ChannelAndMessage>();

    private final Map<Channel, ReadingBuffer> messageBuffersMap = new ConcurrentHashMap<Channel, ReadingBuffer>();
    private final Object sharedMsgBufLock = new Object();
    private final ExecutorService executor = Executors.newSingleThreadExecutor();
    private final Object callbackLock = new Object();
    private final AddRecordCallback noCallback = new AddRecordCallback() {
        @Override
        public void addRecordComplete(int rc, Object ctx) {
        }
    };

    private final Runnable createAbortedSnaphostTask = new Runnable() {
        @Override
        public void run() {
            createAbortedSnapshot();
        }
    };

    private final class FlushThread implements Runnable {
        @Override
        public void run() {
            if (finish) {
                return;
            }
            if (nextBatch.size() > 0) {
                synchronized (sharedState) {
                    if (nextBatch.size() > 0) {
                        addRecord();
                        if (flushFuture.cancel(false)) {
                            scheduleFlushThread();
                        }
                    }
                }
            }
            scheduleFlushThread();
        }
    }

    /**
     * Constructor
     * @param channelGroup
     */
    public TSOHandler(ChannelGroup channelGroup, TSOState state, int batchSize) {
        this.channelGroup = channelGroup;
        this.timestampOracle = state.getTimestampOracle();
        this.sharedState = state;

        this.flushThread = new FlushThread();
        this.scheduledExecutor = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() {
            @Override
            public Thread newThread(Runnable r) {
                Thread t = new Thread(Thread.currentThread().getThreadGroup(), r);
                t.setDaemon(true);
                t.setName("Flush Thread");
                return t;
            }
        });
        this.batchSize = batchSize;
    }

    private void createAbortedSnapshot() {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        DataOutputStream toWAL = new DataOutputStream(baos);

        long snapshot = sharedState.hashmap.getAndIncrementAbortedSnapshot();

        try {
            toWAL.writeByte(LoggerProtocol.SNAPSHOT);
            toWAL.writeLong(snapshot);
            for (AbortedTransaction aborted : sharedState.hashmap.halfAborted) {
                // ignore aborted transactions from last snapshot
                if (aborted.getSnapshot() < snapshot) {
                    toWAL.writeByte(LoggerProtocol.ABORT);
                    toWAL.writeLong(aborted.getStartTimestamp());
                }
            }
        } catch (IOException e) {
            // can't happen
            throw new RuntimeException(e);
        }

        sharedState.addRecord(baos.toByteArray(), noCallback, null);
    }

    private void addRecord() {
        sharedState.addRecord(sharedState.baos.toByteArray(), new AddRecordCallback() {
            @Override
            public void addRecordComplete(int rc, Object ctx) {
                if (rc != Code.OK) {
                    LOG.warn("Write failed: " + LoggerException.getMessage(rc));
                } else {
                    synchronized (callbackLock) {
                        @SuppressWarnings("unchecked")
                        ArrayList<ChannelAndMessage> theBatch = (ArrayList<ChannelAndMessage>) ctx;
                        for (ChannelAndMessage cam : theBatch) {
                            Channels.write(cam.ctx, Channels.succeededFuture(cam.ctx.getChannel()), cam.msg);
                        }
                    }
                }
            }
        }, nextBatch);
        nextBatch = new ArrayList<ChannelAndMessage>(nextBatch.size() + 5);
        sharedState.baos.reset();
    }

    private void scheduleFlushThread() {
        flushFuture = scheduledExecutor.schedule(flushThread, TSOState.FLUSH_TIMEOUT, TimeUnit.MILLISECONDS);
    }

    public void start() {
        scheduleFlushThread();
    }

    public void stop() {
        finish = true;
    }

    /**
     * If write of a message was not possible before, we can do it here
     */
    @Override
    public void channelInterestChanged(ChannelHandlerContext ctx, ChannelStateEvent e) {
    }

    @Override
    public void channelConnected(ChannelHandlerContext ctx, ChannelStateEvent e) throws Exception {
        channelGroup.add(ctx.getChannel());
    }

    @Override
    public void channelDisconnected(ChannelHandlerContext ctx, ChannelStateEvent e) throws Exception {
        synchronized (sharedMsgBufLock) {
            sharedState.sharedMessageBuffer.removeReadingBuffer(ctx);
        }
        messageBuffersMap.remove(ctx.getChannel());
    }

    @Override
    public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) {
        LOG.warn("TSOHandler: Unexpected exception from downstream.", e.getCause());
        Channels.close(e.getChannel());
    }

    /**
     * Handle receieved messages
     */
    @Override
    public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) {
        Object msg = e.getMessage();
        if (msg instanceof TimestampRequest) {
            handle((TimestampRequest) msg, ctx);
        } else if (msg instanceof CommitRequest) {
            handle((CommitRequest) msg, ctx);
        } else if (msg instanceof FullAbortRequest) {
            handle((FullAbortRequest) msg, ctx);
        } else if (msg instanceof CommitQueryRequest) {
            handle((CommitQueryRequest) msg, ctx);
        } else if (msg instanceof AbortRequest) {
            handleHalfAbort(((AbortRequest) msg).startTimestamp);
        }
    }

    /**
     * Handle the TimestampRequest message
     */
    private void handle(TimestampRequest msg, ChannelHandlerContext ctx) {
        long timestamp;
        synchronized (sharedState) {
            try {
                timestamp = timestampOracle.next(sharedState.toWAL);
            } catch (IOException e) {
                LOG.error("failed to return the next timestamp", e);
                return;
            }
        }

        Channel channel = ctx.getChannel();
        ReadingBuffer readingBuffer = messageBuffersMap.get(channel);
        if (readingBuffer == null) {
            synchronized (sharedMsgBufLock) {
                readingBuffer = sharedState.sharedMessageBuffer.getReadingBuffer(ctx);
                messageBuffersMap.put(channel, readingBuffer);
                LOG.info("Channel connected: " + messageBuffersMap.size());
            }

            synchronized (sharedState) {
                synchronized (sharedMsgBufLock) {
                    channel.write(readingBuffer.getZipperState());
                    readingBuffer.initializeIndexes();
                }
            }
            channel.write(new LargestDeletedTimestampReport(sharedState.largestDeletedTimestamp));
            for (AbortedTransaction halfAborted : sharedState.hashmap.halfAborted) {
                channel.write(new AbortedTransactionReport(halfAborted.getStartTimestamp()));
            }
        }

        ChannelBuffer cb;
        ChannelFuture future = Channels.future(channel);
        synchronized (sharedMsgBufLock) {
            cb = readingBuffer.flush(future);
        }
        Channels.write(ctx, future, cb);
        Channels.write(channel, new TimestampResponse(timestamp));
    }

    private void handleHalfAbort(long startTimestamp) {
        abortCounter.incrementAndGet();

        synchronized (sharedState) {
            DataOutputStream toWAL = sharedState.toWAL;
            try {
                toWAL.writeByte(LoggerProtocol.ABORT);
                toWAL.writeLong(startTimestamp);
            } catch (IOException e) {
                LOG.error("failed to write abort wal", e);
            }
            sharedState.processHalfAbort(startTimestamp);
            synchronized (sharedMsgBufLock) {
                queueHalfAbort(startTimestamp);
            }
        }
    }

    /**
     * Handle the FullAbortReport message
     */
    private void handle(FullAbortRequest msg, ChannelHandlerContext ctx) {
        synchronized (sharedState) {
            DataOutputStream toWAL = sharedState.toWAL;
            try {
                toWAL.writeByte(LoggerProtocol.FULL_ABORT);
                toWAL.writeLong(msg.startTimestamp);
            } catch (IOException e) {
                LOG.error("failed to write fullabort wal", e);
            }
            sharedState.processFullAbort(msg.startTimestamp);
        }
        synchronized (sharedMsgBufLock) {
            queueFullAbort(msg.startTimestamp);
        }
    }

    /**
     * Handle the CommitRequest message
     */
    private void handle(CommitRequest msg, ChannelHandlerContext ctx) {
        CommitResponse reply = new CommitResponse(msg.startTimestamp);
        DataOutputStream toWAL = sharedState.toWAL;
        synchronized (sharedState) {
            // 0. check if it should abort
            if (msg.startTimestamp < timestampOracle.first()) {
                reply.committed = false;
                LOG.warn("Aborting transaction after restarting TSO");
            } else if (msg.rows.length > 0 && msg.startTimestamp < sharedState.largestDeletedTimestamp) {
                // Too old and not read only
                reply.committed = false;// set as abort
                LOG.warn("Too old startTimestamp: ST " + msg.startTimestamp + " MAX "
                        + sharedState.largestDeletedTimestamp);
            } else {
                // 1. check the write-write conflicts
                for (RowKey r : msg.rows) {
                    long value = sharedState.hashmap.getLatestWriteForRow(r.hashCode());
                    if (value != 0 && value > msg.startTimestamp) {
                        reply.committed = false;// set as abort
                        break;
                    }
                }
            }

            if (reply.committed) {
                // 2. commit
                try {
                    long commitTimestamp = timestampOracle.next(toWAL);
                    sharedState.uncommited.commit(commitTimestamp);
                    sharedState.uncommited.commit(msg.startTimestamp);
                    reply.commitTimestamp = commitTimestamp;
                    if (msg.rows.length > 0) {
                        if (LOG.isTraceEnabled()) {
                            LOG.trace("Adding commit to WAL");
                        }
                        toWAL.writeByte(LoggerProtocol.COMMIT);
                        toWAL.writeLong(msg.startTimestamp);
                        toWAL.writeLong(commitTimestamp);

                        long oldLargestDeletedTimestamp = sharedState.largestDeletedTimestamp;

                        for (RowKey r : msg.rows) {
                            sharedState.hashmap.putLatestWriteForRow(r.hashCode(), commitTimestamp);
                        }

                        sharedState.largestDeletedTimestamp = sharedState.hashmap.getLargestDeletedTimestamp();
                        sharedState.processCommit(msg.startTimestamp, commitTimestamp);
                        if (sharedState.largestDeletedTimestamp > oldLargestDeletedTimestamp) {
                            toWAL.writeByte(LoggerProtocol.LARGEST_DELETED_TIMESTAMP);
                            toWAL.writeLong(sharedState.largestDeletedTimestamp);
                            Set<Long> toAbort = sharedState.uncommited
                                    .raiseLargestDeletedTransaction(sharedState.largestDeletedTimestamp);
                            if (LOG.isWarnEnabled() && !toAbort.isEmpty()) {
                                LOG.warn("Slow transactions after raising max: " + toAbort.size());
                            }
                            synchronized (sharedMsgBufLock) {
                                for (Long id : toAbort) {
                                    sharedState.hashmap.setHalfAborted(id);
                                    queueHalfAbort(id);
                                }
                                queueLargestDeletedTimestamp(sharedState.largestDeletedTimestamp);
                            }
                        }
                        if (sharedState.largestDeletedTimestamp > sharedState.previousLargestDeletedTimestamp
                                + TSOState.MAX_ITEMS) {
                            // schedule snapshot
                            executor.submit(createAbortedSnaphostTask);
                            sharedState.previousLargestDeletedTimestamp = sharedState.largestDeletedTimestamp;
                        }
                        synchronized (sharedMsgBufLock) {
                            queueCommit(msg.startTimestamp, commitTimestamp);
                        }
                    }
                } catch (IOException e) {
                    LOG.error("failed to handle CommitRequest", e);
                }
            } else { // add it to the aborted list
                handleHalfAbort(msg.startTimestamp);
            }

            commitCounter.incrementAndGet();
            nextBatch.add(new ChannelAndMessage(ctx, reply));

            if (sharedState.baos.size() >= batchSize) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Going to add record of size " + sharedState.baos.size());
                }

                addRecord();
            }
        }
    }

    /**
     * Handle the CommitQueryRequest message
     */
    private void handle(CommitQueryRequest msg, ChannelHandlerContext ctx) {
        commitQueryCounter.incrementAndGet();

        CommitQueryResponse reply = new CommitQueryResponse(msg.startTimestamp);
        reply.queryTimestamp = msg.queryTimestamp;
        synchronized (sharedState) {
            // 1. check the write-write conflicts
            long value;
            value = sharedState.hashmap.getCommittedTimestamp(msg.queryTimestamp);
            if (value != 0) { // it exists
                reply.commitTimestamp = value;
                reply.committed = value < msg.startTimestamp;// set as abort
            } else if (sharedState.hashmap.isHalfAborted(msg.queryTimestamp))
                reply.committed = false;
            else if (sharedState.uncommited.isUncommited(msg.queryTimestamp))
                reply.committed = false;
            else
                reply.retry = true;
            // else if (sharedState.largestDeletedTimestamp >=
            // msg.queryTimestamp)
            // reply.committed = true;
            // TODO retry needed? isnt it just fully aborted?

            ctx.getChannel().write(reply);

            // We send the message directly. If after a failure the state is
            // inconsistent we'll detect it
        }
    }

    private void queueCommit(long startTimestamp, long commitTimestamp) {
        sharedState.sharedMessageBuffer.writeCommit(startTimestamp, commitTimestamp);
    }

    private void queueHalfAbort(long startTimestamp) {
        sharedState.sharedMessageBuffer.writeHalfAbort(startTimestamp);
    }

    private void queueFullAbort(long startTimestamp) {
        sharedState.sharedMessageBuffer.writeFullAbort(startTimestamp);
    }

    private void queueLargestDeletedTimestamp(long largestDeletedTimestamp) {
        sharedState.sharedMessageBuffer.writeLargestDeletedTimestamp(largestDeletedTimestamp);
    }

}