org.apache.bookkeeper.bookie.Bookie.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.bookkeeper.bookie.Bookie.java

Source

/*
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 */

package org.apache.bookkeeper.bookie;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.concurrent.LinkedBlockingQueue;

import org.apache.bookkeeper.bookie.BookieException;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback;
import org.apache.log4j.Logger;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.ZooDefs.Ids;

/**
 * Implements a bookie.
 *
 */

public class Bookie extends Thread {
    HashMap<Long, LedgerDescriptor> ledgers = new HashMap<Long, LedgerDescriptor>();
    static Logger LOG = Logger.getLogger(Bookie.class);

    final File journalDirectory;

    final File ledgerDirectories[];

    // ZK registration path for this bookie
    static final String BOOKIE_REGISTRATION_PATH = "/ledgers/available/";
    static final String LEDGERS_PATH = "/ledgers";

    // ZooKeeper client instance for the Bookie
    ZooKeeper zk;

    // Running flag
    private volatile boolean running = false;

    public static class NoLedgerException extends IOException {
        private static final long serialVersionUID = 1L;
        private long ledgerId;

        public NoLedgerException(long ledgerId) {
            this.ledgerId = ledgerId;
        }

        public long getLedgerId() {
            return ledgerId;
        }
    }

    public static class NoEntryException extends IOException {
        private static final long serialVersionUID = 1L;
        private long ledgerId;
        private long entryId;

        public NoEntryException(long ledgerId, long entryId) {
            super("Entry " + entryId + " not found in " + ledgerId);
            this.ledgerId = ledgerId;
            this.entryId = entryId;
        }

        public long getLedger() {
            return ledgerId;
        }

        public long getEntry() {
            return entryId;
        }
    }

    EntryLogger entryLogger;
    LedgerCache ledgerCache;

    class SyncThread extends Thread {
        volatile boolean running = true;

        public SyncThread() {
            super("SyncThread");
        }

        @Override
        public void run() {
            while (running) {
                synchronized (this) {
                    try {
                        wait(100);
                        if (!entryLogger.testAndClearSomethingWritten()) {
                            continue;
                        }
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                        continue;
                    }
                }
                lastLogMark.markLog();
                try {
                    ledgerCache.flushLedger(true);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                try {
                    entryLogger.flush();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                lastLogMark.rollLog();
            }
        }
    }

    SyncThread syncThread = new SyncThread();

    public Bookie(int port, String zkServers, File journalDirectory, File ledgerDirectories[]) throws IOException {
        instantiateZookeeperClient(port, zkServers);
        this.journalDirectory = journalDirectory;
        this.ledgerDirectories = ledgerDirectories;
        entryLogger = new EntryLogger(ledgerDirectories);
        ledgerCache = new LedgerCache(ledgerDirectories);
        lastLogMark.readLog();
        final long markedLogId = lastLogMark.txnLogId;
        if (markedLogId > 0) {
            File logFiles[] = journalDirectory.listFiles();
            ArrayList<Long> logs = new ArrayList<Long>();
            for (File f : logFiles) {
                String name = f.getName();
                if (!name.endsWith(".txn")) {
                    continue;
                }
                String idString = name.split("\\.")[0];
                long id = Long.parseLong(idString, 16);
                if (id < markedLogId) {
                    continue;
                }
                logs.add(id);
            }
            Collections.sort(logs);
            if (logs.size() == 0 || logs.get(0) != markedLogId) {
                throw new IOException("Recovery log " + markedLogId + " is missing");
            }
            // TODO: When reading in the journal logs that need to be synced, we
            // should use BufferedChannels instead to minimize the amount of
            // system calls done.
            ByteBuffer lenBuff = ByteBuffer.allocate(4);
            ByteBuffer recBuff = ByteBuffer.allocate(64 * 1024);
            for (Long id : logs) {
                FileChannel recLog = openChannel(id);
                while (true) {
                    lenBuff.clear();
                    fullRead(recLog, lenBuff);
                    if (lenBuff.remaining() != 0) {
                        break;
                    }
                    lenBuff.flip();
                    int len = lenBuff.getInt();
                    if (len == 0) {
                        break;
                    }
                    recBuff.clear();
                    if (recBuff.remaining() < len) {
                        recBuff = ByteBuffer.allocate(len);
                    }
                    recBuff.limit(len);
                    if (fullRead(recLog, recBuff) != len) {
                        // This seems scary, but it just means that this is where we
                        // left off writing
                        break;
                    }
                    recBuff.flip();
                    long ledgerId = recBuff.getLong();
                    // XXX we net to make sure we set the master keys appropriately!
                    LedgerDescriptor handle = getHandle(ledgerId, false);
                    try {
                        recBuff.rewind();
                        handle.addEntry(recBuff);
                    } finally {
                        putHandle(handle);
                    }
                }
            }
        }
        setDaemon(true);
        LOG.debug("I'm starting a bookie with journal directory " + journalDirectory.getName());
        start();
        syncThread.start();
    }

    // Method to instantiate the ZooKeeper client for the Bookie.
    private void instantiateZookeeperClient(int port, String zkServers) throws IOException {
        if (zkServers == null) {
            LOG.warn(
                    "No ZK servers passed to Bookie constructor so BookKeeper clients won't know about this server!");
            zk = null;
            return;
        }
        // Create the ZooKeeper client instance
        zk = new ZooKeeper(zkServers, 10000, new Watcher() {
            @Override
            public void process(WatchedEvent event) {
                // TODO: handle session disconnects and expires
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Process: " + event.getType() + " " + event.getPath());
                }
            }
        });
        // Create the ZK ephemeral node for this Bookie.
        try {
            zk.create(BOOKIE_REGISTRATION_PATH + InetAddress.getLocalHost().getHostAddress() + ":" + port,
                    new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
        } catch (Exception e) {
            LOG.fatal("ZK exception registering ephemeral Znode for Bookie!", e);
            // Throw an IOException back up. This will cause the Bookie
            // constructor to error out. Alternatively, we could do a System
            // exit here as this is a fatal error.
            throw new IOException(e);
        }
    }

    private static int fullRead(FileChannel fc, ByteBuffer bb) throws IOException {
        int total = 0;
        while (bb.remaining() > 0) {
            int rc = fc.read(bb);
            if (rc <= 0) {
                return total;
            }
            total += rc;
        }
        return total;
    }

    private void putHandle(LedgerDescriptor handle) {
        synchronized (ledgers) {
            handle.decRef();
        }
    }

    private LedgerDescriptor getHandle(long ledgerId, boolean readonly, byte[] masterKey) throws IOException {
        LedgerDescriptor handle = null;
        synchronized (ledgers) {
            handle = ledgers.get(ledgerId);
            if (handle == null) {
                if (readonly) {
                    throw new NoLedgerException(ledgerId);
                }
                handle = createHandle(ledgerId, readonly);
                ledgers.put(ledgerId, handle);
                handle.setMasterKey(ByteBuffer.wrap(masterKey));
            }
            handle.incRef();
        }
        return handle;
    }

    private LedgerDescriptor getHandle(long ledgerId, boolean readonly) throws IOException {
        LedgerDescriptor handle = null;
        synchronized (ledgers) {
            handle = ledgers.get(ledgerId);
            if (handle == null) {
                if (readonly) {
                    throw new NoLedgerException(ledgerId);
                }
                handle = createHandle(ledgerId, readonly);
                ledgers.put(ledgerId, handle);
            }
            handle.incRef();
        }
        return handle;
    }

    private LedgerDescriptor createHandle(long ledgerId, boolean readOnly) throws IOException {
        return new LedgerDescriptor(ledgerId, entryLogger, ledgerCache);
    }

    static class QueueEntry {
        QueueEntry(ByteBuffer entry, long ledgerId, long entryId, WriteCallback cb, Object ctx) {
            this.entry = entry.duplicate();
            this.cb = cb;
            this.ctx = ctx;
            this.ledgerId = ledgerId;
            this.entryId = entryId;
        }

        ByteBuffer entry;

        long ledgerId;

        long entryId;

        WriteCallback cb;

        Object ctx;
    }

    LinkedBlockingQueue<QueueEntry> queue = new LinkedBlockingQueue<QueueEntry>();

    public final static long preAllocSize = 4 * 1024 * 1024;

    public final static ByteBuffer zeros = ByteBuffer.allocate(512);

    class LastLogMark {
        long txnLogId;
        long txnLogPosition;
        LastLogMark lastMark;

        LastLogMark(long logId, long logPosition) {
            this.txnLogId = logId;
            this.txnLogPosition = logPosition;
        }

        synchronized void setLastLogMark(long logId, long logPosition) {
            txnLogId = logId;
            txnLogPosition = logPosition;
        }

        synchronized void markLog() {
            lastMark = new LastLogMark(txnLogId, txnLogPosition);
        }

        synchronized void rollLog() {
            byte buff[] = new byte[16];
            ByteBuffer bb = ByteBuffer.wrap(buff);
            bb.putLong(txnLogId);
            bb.putLong(txnLogPosition);
            for (File dir : ledgerDirectories) {
                File file = new File(dir, "lastMark");
                try {
                    FileOutputStream fos = new FileOutputStream(file);
                    fos.write(buff);
                    fos.getChannel().force(true);
                    fos.close();
                } catch (IOException e) {
                    LOG.error("Problems writing to " + file, e);
                }
            }
        }

        synchronized void readLog() {
            byte buff[] = new byte[16];
            ByteBuffer bb = ByteBuffer.wrap(buff);
            for (File dir : ledgerDirectories) {
                File file = new File(dir, "lastMark");
                try {
                    FileInputStream fis = new FileInputStream(file);
                    fis.read(buff);
                    fis.close();
                    bb.clear();
                    long i = bb.getLong();
                    long p = bb.getLong();
                    if (i > txnLogId) {
                        txnLogId = i;
                    }
                    if (p > txnLogPosition) {
                        txnLogPosition = p;
                    }
                } catch (IOException e) {
                    LOG.error("Problems reading from " + file
                            + " (this is okay if it is the first time starting this bookie");
                }
            }
        }
    }

    private LastLogMark lastLogMark = new LastLogMark(0, 0);

    public boolean isRunning() {
        return running;
    }

    @Override
    public void run() {
        LinkedList<QueueEntry> toFlush = new LinkedList<QueueEntry>();
        ByteBuffer lenBuff = ByteBuffer.allocate(4);
        try {
            long logId = System.currentTimeMillis();
            FileChannel logFile = openChannel(logId);
            BufferedChannel bc = new BufferedChannel(logFile, 65536);
            zeros.clear();
            long nextPrealloc = preAllocSize;
            long lastFlushPosition = 0;
            logFile.write(zeros, nextPrealloc);
            running = true;
            // TODO: Currently, when we roll over the journal logs, the older
            // ones are never garbage collected. We should remove a journal log
            // once all of its entries have been synced with the entry logs.
            while (true) {
                QueueEntry qe = null;
                if (toFlush.isEmpty()) {
                    qe = queue.take();
                } else {
                    qe = queue.poll();
                    if (qe == null || bc.position() > lastFlushPosition + 512 * 1024) {
                        //logFile.force(false);
                        bc.flush(true);
                        lastFlushPosition = bc.position();
                        lastLogMark.setLastLogMark(logId, lastFlushPosition);
                        for (QueueEntry e : toFlush) {
                            e.cb.writeComplete(0, e.ledgerId, e.entryId, null, e.ctx);
                        }
                        toFlush.clear();
                    }
                }
                if (qe == null) {
                    continue;
                }
                lenBuff.clear();
                lenBuff.putInt(qe.entry.remaining());
                lenBuff.flip();
                //
                // we should be doing the following, but then we run out of
                // direct byte buffers
                // logFile.write(new ByteBuffer[] { lenBuff, qe.entry });
                bc.write(lenBuff);
                bc.write(qe.entry);
                if (bc.position() > nextPrealloc) {
                    nextPrealloc = (logFile.size() / preAllocSize + 1) * preAllocSize;
                    zeros.clear();
                    logFile.write(zeros, nextPrealloc);
                }
                toFlush.add(qe);
            }
        } catch (Exception e) {
            LOG.fatal("Bookie thread exiting", e);
        }
        running = false;
    }

    private FileChannel openChannel(long logId) throws FileNotFoundException {
        FileChannel logFile = new RandomAccessFile(new File(journalDirectory, Long.toHexString(logId) + ".txn"),
                "rw").getChannel();
        return logFile;
    }

    public void shutdown() throws InterruptedException {
        if (zk != null)
            zk.close();
        this.interrupt();
        this.join();
        syncThread.running = false;
        syncThread.join();
        for (LedgerDescriptor d : ledgers.values()) {
            d.close();
        }
    }

    public void addEntry(ByteBuffer entry, WriteCallback cb, Object ctx, byte[] masterKey)
            throws IOException, BookieException {
        long ledgerId = entry.getLong();
        LedgerDescriptor handle = getHandle(ledgerId, false, masterKey);

        if (!handle.cmpMasterKey(ByteBuffer.wrap(masterKey))) {
            throw BookieException.create(BookieException.Code.UnauthorizedAccessException);
        }
        try {
            entry.rewind();
            long entryId = handle.addEntry(entry);
            entry.rewind();
            if (LOG.isTraceEnabled()) {
                LOG.trace("Adding " + entryId + "@" + ledgerId);
            }
            queue.add(new QueueEntry(entry, ledgerId, entryId, cb, ctx));
        } finally {
            putHandle(handle);
        }
    }

    public ByteBuffer readEntry(long ledgerId, long entryId) throws IOException {
        LedgerDescriptor handle = getHandle(ledgerId, true);
        try {
            if (LOG.isTraceEnabled()) {
                LOG.trace("Reading " + entryId + "@" + ledgerId);
            }
            return handle.readEntry(entryId);
        } finally {
            putHandle(handle);
        }
    }

    // The rest of the code is test stuff
    static class CounterCallback implements WriteCallback {
        int count;

        synchronized public void writeComplete(int rc, long l, long e, InetSocketAddress addr, Object ctx) {
            count--;
            if (count == 0) {
                notifyAll();
            }
        }

        synchronized public void incCount() {
            count++;
        }

        synchronized public void waitZero() throws InterruptedException {
            while (count > 0) {
                wait();
            }
        }
    }

    /**
     * @param args
     * @throws IOException
     * @throws InterruptedException
     */
    public static void main(String[] args) throws IOException, InterruptedException, BookieException {
        Bookie b = new Bookie(5000, null, new File("/tmp"), new File[] { new File("/tmp") });
        CounterCallback cb = new CounterCallback();
        long start = System.currentTimeMillis();
        for (int i = 0; i < 100000; i++) {
            ByteBuffer buff = ByteBuffer.allocate(1024);
            buff.putLong(1);
            buff.putLong(i);
            buff.limit(1024);
            buff.position(0);
            cb.incCount();
            b.addEntry(buff, cb, null, new byte[0]);
        }
        cb.waitZero();
        long end = System.currentTimeMillis();
        System.out.println("Took " + (end - start) + "ms");
    }
}