org.paxle.data.db.impl.CommandDB.java Source code

Java tutorial

Introduction

Here is the source code for org.paxle.data.db.impl.CommandDB.java

Source

/**
 * This file is part of the Paxle project.
 * Visit http://www.paxle.net for more information.
 * Copyright 2007-2010 the original author or authors.
 *
 * Licensed under the terms of the Common Public License 1.0 ("CPL 1.0").
 * Any use, reproduction or distribution of this program constitutes the recipient's acceptance of this agreement.
 * The full license text is available under http://www.opensource.org/licenses/cpl1.0.txt
 * or in the file LICENSE.txt in the root directory of the Paxle distribution.
 *
 * Unless required by applicable law or agreed to in writing, this software is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

package org.paxle.data.db.impl;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.net.URI;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.ResourceBundle;

import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
import net.sf.ehcache.Status;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.hibernate.CacheMode;
import org.hibernate.FlushMode;
import org.hibernate.HibernateException;
import org.hibernate.Query;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.hibernate.Transaction;
import org.hibernate.cfg.Configuration;
import org.onelab.filter.DynamicBloomFilter;
import org.onelab.filter.Key;
import org.osgi.framework.Constants;
import org.osgi.service.event.Event;
import org.osgi.service.event.EventHandler;
import org.osgi.service.monitor.Monitorable;
import org.osgi.service.monitor.StatusVariable;
import org.paxle.core.data.IDataProvider;
import org.paxle.core.data.IDataSink;
import org.paxle.core.doc.CommandEvent;
import org.paxle.core.doc.ICommand;
import org.paxle.core.doc.ICommandProfile;
import org.paxle.core.doc.ICommandTracker;
import org.paxle.core.doc.IDocumentFactory;
import org.paxle.data.db.ICommandDB;
import org.paxle.data.db.URIQueueEntry;

public class CommandDB
        implements IDataProvider<ICommand>, IDataSink<URIQueueEntry>, ICommandDB, EventHandler, Monitorable {

    private static final String CACHE_DIR = "double-urls-caches";
    private static final String BLOOM_CACHE_FILE = "doubleURLsCache.ser";
    private static final String EHCACHE_NAME = "DoubleURLCache";

    private static final int MAX_IDLE_SLEEP = 60000;

    /* ======================================================================
     * MONITORABLE CONSTANTS
     * ====================================================================== */
    /**
     * {@link Constants#SERVICE_PID} used to register the {@link Monitorable} interface
     */
    public static final String PID = "org.paxle.data.cmddb";

    /**
     * @see #totalSize()
     */
    private static final String MONITOR_TOTAL_SIZE = "size.total";

    /**
     * @see #enqueuedSize()
     */
    private static final String MONITOR_ENQUEUED_SIZE = "size.enqueued";

    /**
     * The names of all {@link StatusVariable status-variables} supported by this {@link Monitorable}
     */
    private static final HashSet<String> VAR_NAMES = new HashSet<String>(
            Arrays.asList(new String[] { MONITOR_TOTAL_SIZE, MONITOR_ENQUEUED_SIZE }));

    /**
     * Descriptions of all {@link StatusVariable status-variables} supported by this {@link Monitorable}
     */
    private final ResourceBundle rb = ResourceBundle.getBundle("OSGI-INF/l10n/CommandDB");

    private static final String UTF8 = "UTF-8";
    /**
     * The cachemanager to use
     */
    private CacheManager manager = null;

    /**
     * A cach to hold {@link RobotsTxt} objects in memory
     */
    private Cache urlExistsCache = null;

    /**
     * Component to track {@link ICommand commands}
     */
    private ICommandTracker commandTracker;

    /**
     * A factory class to create new Commands
     */
    private IDocumentFactory commandFactory;

    /**
     * A {@link IDataSink data-sink} to write the loaded {@link ICommand commands} out
     */
    private IDataSink<ICommand> sink = null;

    /**
     * A {@link Thread thread} to read {@link ICommand commands} from the {@link #db database}
     * and write it into the {@link #sink data-sink}.
     */
    private Writer writerThread = null;

    /**
     * A {@link Thread thread} to populate the double URLs cache from database.
     */
    private PopulateThread populateThread = null;

    /**
     * The logger
     */
    private Log logger = LogFactory.getLog(this.getClass());

    /**
     * The hibernate {@link SessionFactory}
     */
    private SessionFactory sessionFactory;

    /**
     * The currently used db configuration
     */
    private Configuration config;

    private boolean closed = false;

    /**
     * A set holding all known URLs
     */
    private DynamicBloomFilter bloomFilter = null;

    /**
     * Total number of {@link URI} known to this DB
     */
    private volatile long cntTotal;

    /**
     * Number of {@link URI} that are enqueued for processing 
     */
    private volatile long cntCrawlerQueue;

    public CommandDB(URL configURL, List<URL> mappings, ICommandTracker commandTracker,
            IDocumentFactory commandFactory) {
        this(configURL, mappings, null, commandTracker, commandFactory);
    }

    public CommandDB(URL configURL, List<URL> mappings, Properties extraProperties, ICommandTracker commandTracker,
            IDocumentFactory commandFactory) {
        if (configURL == null)
            throw new NullPointerException("The URL to the hibernate config file is null.");
        if (mappings == null)
            throw new NullPointerException("The list of mapping files was null.");

        try {
            this.commandTracker = commandTracker;
            this.commandFactory = commandFactory;

            /* ===========================================================================
             * Init Hibernate
             * =========================================================================== */
            try {
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());

                // Read the hibernate configuration from *.cfg.xml
                this.logger.info(String.format("Loading DB configuration from URL '%s'.", configURL));
                this.config = new Configuration().configure(configURL);

                // register an interceptor (required to support our interface-based command model)
                this.config.setInterceptor(new InterfaceInterceptor(this.commandFactory));

                // merge with additional properties
                if (extraProperties != null) {
                    this.config.addProperties(extraProperties);
                }

                // post-processing of read properties
                ConnectionUrlTool.postProcessProperties(this.config);

                // load the various mapping files
                for (URL mapping : mappings) {
                    if (this.logger.isDebugEnabled())
                        this.logger.debug(String.format("Loading mapping file from URL '%s'.", mapping));
                    this.config.addURL(mapping);
                }

                // String[] sql = this.config.generateSchemaCreationScript( new org.hibernate.dialect.DerbyDialect());               

                // create the session factory
                this.sessionFactory = this.config.buildSessionFactory();
            } catch (Throwable ex) {
                // Make sure you log the exception, as it might be swallowed
                this.logger.error("Initial SessionFactory creation failed.", ex);
                throw new ExceptionInInitializerError(ex);
            }
            this.optimizeDbSchema();
            cntTotal = this.totalSize();
            cntCrawlerQueue = this.size("enqueued");
            logger.info("command-db size: " + cntTotal + ", to crawl: " + cntCrawlerQueue);

            /* ===========================================================================
             * Init Reader/Writer Threads
             * =========================================================================== */
            this.writerThread = new Writer();

            /* ===========================================================================
             * Init Cache
             * =========================================================================== */
            // configure caching manager
            this.manager = CacheManager.getInstance();

            // init a new cache 
            this.urlExistsCache = new Cache(EHCACHE_NAME, 100000, false, false, 60 * 60, 30 * 60);
            this.manager.addCache(this.urlExistsCache);

            // init/open the double URLs cache, initializes the bloom-filter
            openDoubleURLSet();
        } catch (Throwable e) {
            this.logger.error(
                    String.format("Unexpected '%s' while initializing the command-DB.", e.getClass().getName()), e);
            throw new RuntimeException(e);
        }
    }

    public String getDatabaseLocation() {
        final String connection = config.getProperty("connection.url");
        final int semicolon = connection.indexOf(';');
        return connection.substring(connection.lastIndexOf(':') + 1,
                (semicolon == -1) ? connection.length() : semicolon);
    }

    /* =========================================================================
     * Monitorable support
     * ========================================================================= */

    /**
     * @see Monitorable#getDescription(String)
     */
    public String getDescription(String id) throws IllegalArgumentException {
        if (!VAR_NAMES.contains(id)) {
            throw new IllegalArgumentException("Invalid Status Variable name " + id);
        }

        return this.rb.getString(id);
    }

    /**
     * @see Monitorable#getStatusVariable(String)
     */
    public StatusVariable getStatusVariable(String id) throws IllegalArgumentException {
        if (!VAR_NAMES.contains(id)) {
            throw new IllegalArgumentException("Invalid Status Variable name " + id);
        }

        int value = -1;
        if (id.equals(MONITOR_TOTAL_SIZE)) {
            value = (int) this.size();
        } else if (id.equals(MONITOR_ENQUEUED_SIZE)) {
            value = (int) enqueuedSize();
        }

        return new StatusVariable(id, StatusVariable.CM_CC, value);
    }

    /**
     * @see Monitorable#getStatusVariableNames()
     */
    public String[] getStatusVariableNames() {
        return VAR_NAMES.toArray(new String[VAR_NAMES.size()]);
    }

    /**
     * @see Monitorable#notifiesOnChange(String)
     */
    public boolean notifiesOnChange(String id) throws IllegalArgumentException {
        return false;
    }

    /**
     * @see Monitorable#resetStatusVariable(String)
     */
    public boolean resetStatusVariable(String id) throws IllegalArgumentException {
        return false;
    }

    /* =========================================================================
     * Management for the double URLs cache
     * ========================================================================= */

    /**
     * Serializes the bloom filter backed doubleURL Cache into the cache directory
     */
    private void closeDoubleURLSet() throws IOException {
        final long start = System.currentTimeMillis();
        final OutputStream fileOs = new FileOutputStream(new File(getCreateCacheDir(), BLOOM_CACHE_FILE));
        DataOutputStream dataOs = null;
        try {
            dataOs = new DataOutputStream(new BufferedOutputStream(fileOs));
            bloomFilter.write(dataOs);
            dataOs.flush();
            final long end = System.currentTimeMillis();
            final int size = cacheSize();
            logger.info("Flushed double URLs cache (" + size + " entries) to disk in " + (end - start) + " ms");
        } finally {
            ((dataOs == null) ? fileOs : dataOs).close();
        }
    }

    /**
     * Returns the cache directory from the Paxle data folder.
     * If it doesn't exist yet, it's created.
     */
    private File getCreateCacheDir() {
        final String dataPath = System.getProperty("paxle.data") + File.separatorChar + CACHE_DIR;
        final File cacheDir = new File(dataPath);
        if (!cacheDir.exists()) {
            if (!cacheDir.mkdirs()) {
                this.logger.error("Unable to create cache-directory: " + cacheDir);
            }
        }
        return cacheDir;
    }

    private void openDoubleURLSet() throws IOException {
        File serializedFile = new File(getCreateCacheDir(), BLOOM_CACHE_FILE);

        if (!(serializedFile.canRead() && serializedFile.isFile())) {
            logger.info(
                    "Serialized double URL set not found, populating cache from DB (this may take some time) ...");
            bloomFilter = new DynamicBloomFilter(1437764, 10, 100000); // creating a maximum false positive rate of 0.1 %
            populateThread = new PopulateThread();
            populateThread.start();
        } else {
            logger.info(String.format("Serialized double URL set found, reading %d bytes ...",
                    Long.valueOf(serializedFile.length())));
            final InputStream fileIs = new FileInputStream(serializedFile);
            try {
                final DataInputStream dataIs = new DataInputStream(new BufferedInputStream(fileIs));
                bloomFilter = new DynamicBloomFilter();
                bloomFilter.readFields(dataIs);
            } finally {
                fileIs.close();
                if (!serializedFile.delete()) {
                    this.logger.error("Unable to delete bloom-filter file: " + serializedFile);
                }
            }
        }
    }

    private class PopulateThread extends Thread {

        public PopulateThread() {
            super("DoubleURLCachePopulater");
        }

        @Override
        public void run() {
            final long time = System.currentTimeMillis();
            final long count = populateDoubleURLSet();
            logger.info("Initialized the double URL cache with " + count + " entries in "
                    + ((System.currentTimeMillis() - time) / 1000) + " seconds");
        }

        private long populateDoubleURLSet() {
            Session session = null;
            Transaction transaction = null;
            long count = 0;

            try {
                session = sessionFactory.openSession();
                session.setFlushMode(FlushMode.COMMIT);
                session.setCacheMode(CacheMode.IGNORE);
                transaction = session.beginTransaction();

                final Key key = new Key();
                final DynamicBloomFilter bf = bloomFilter;

                final long start = System.currentTimeMillis();
                long time = start;
                long lastCount = 0L;

                final Query query = session.createSQLQuery("SELECT location FROM EnqueuedCommand " + "UNION ALL "
                        + "SELECT location FROM CrawledCommand ").setReadOnly(true);

                ScrollableResults sr = query.scroll(ScrollMode.FORWARD_ONLY);

                // loop through the available commands
                while (sr.next() && !super.isInterrupted()) {
                    String locationStr = (String) sr.get()[0];
                    key.set(locationStr.getBytes(UTF8), 1.0);
                    bf.add(key);
                    count++;

                    if (count % 250000 == 0) {
                        final long now = System.currentTimeMillis();
                        final long last = time;
                        final long totalMs = now - start;
                        final long deltaMs = now - last;
                        final long deltaCount = count - lastCount;
                        final long totalCountLeft = cntTotal - count;
                        final int etaSec = (int) (((double) totalMs / count) * totalCountLeft / 1000);
                        logger.info(String.format(
                                "Populated URL-cache with %,d URIs in %d seconds (%,d/sec), %,d to go, ETA: %02d:%02d",
                                Long.valueOf(count), Long.valueOf(totalMs / 1000L),
                                Integer.valueOf((int) (deltaCount / (double) deltaMs * 1000.0)),
                                Long.valueOf(totalCountLeft), Integer.valueOf(etaSec / 60),
                                Integer.valueOf(etaSec % 60)));
                        lastCount = count;
                        time = now;
                    }
                }

                transaction.commit();
            } catch (Exception e) {
                if (transaction != null && transaction.isActive())
                    transaction.rollback();
                logger.error(String.format("Unexpected '%s' while populating the double URLs cache from the DB.",
                        e.getClass().getName()), e);
            } finally {
                // closing session
                if (session != null)
                    try {
                        session.close();
                    } catch (Exception e) {
                        logger.error(
                                String.format("Unexpected '%s' while closing session.", e.getClass().getName()), e);
                    }
            }
            return count;
        }
    }

    /**
     * Returns the size of the double URLs cache.
     * <p>
     * <i>Implementation note</i>: Since the {@link DynamicBloomFilter} does not freely communicate
     * the required data, it is retrieved via reflection. This method should therefore not be
     * called too frequently.
     * @return the number of {@link Key}s contained in {@link #bloomFilter}.
     */
    private int cacheSize() {
        try {
            final Field matrix = DynamicBloomFilter.class.getDeclaredField("matrix");
            final Field currentNbRecord = DynamicBloomFilter.class.getDeclaredField("currentNbRecord");
            final Field nr = DynamicBloomFilter.class.getDeclaredField("nr");
            matrix.setAccessible(true);
            currentNbRecord.setAccessible(true);
            nr.setAccessible(true);
            return (((Integer) currentNbRecord.get(bloomFilter)).intValue()
                    + ((Integer) nr.get(bloomFilter)).intValue() * (Array.getLength(matrix.get(bloomFilter)) - 1));
        } catch (Throwable e) {
            e.printStackTrace();
        }
        return -1;
    }

    /**
     * Checks the double URLs cache for the given {@link URI}.
     * This is a convienience method if only one URI has to be processed, otherwise
     * it is recommended to manually access the cache, i.e.:
     * <p>
     * <pre>
     *          final Key key = new Key();
     *          while ([...]) {
     *             key.set([...], 1.0);
     *             final boolean exists = {@link #bloomFilter}.membershipTest(key);
     *          }
     * </pre>
     * This way the the {@link Key}-Object does not have to be created newly for every {@link URI}.
     * @param location the {@link URI} to check
     * @return <code>false</code> if the given location has not previously been added to the cache,
     *         <code>true</code> otherwise. May also return <code>true</code> if the {@link URI}
     *         has not previously added. See the description of {@link DynamicBloomFilter} for details.
     * @see DynamicBloomFilter
     */
    final boolean isKnownInDoubleURLs(final URI location) {
        final Key key;
        try {
            key = new Key(location.toString().getBytes(UTF8));
        } catch (UnsupportedEncodingException e) {
            /* UTF-8 support should be implemented in the JVM */
            throw new RuntimeException(e);
        }
        return this.bloomFilter.membershipTest(key);
    }

    /**
     * Puts the {@link URI} into the double URLs cache.
     * This is a convienience method if only one URI has to be processed, otherwise
     * it is recommended to manually access the cache, i.e.:
     * <p>
     * <pre>
     *          final Key key = new Key();
     *          while ([...]) {
     *             key.set([...], 1.0);
     *             {@link #bloomFilter}.add(key);
     *          }
     * </pre>
     * This way the the {@link Key}-Object does not have to be created newly for every {@link URI}.
     * @param location the {@link URI} to put into the cache
     * @see DynamicBloomFilter
     */
    private final void putInDoubleURLs(final URI location) {
        final Key key;
        try {
            key = new Key(location.toString().getBytes(UTF8));
        } catch (UnsupportedEncodingException e) {
            /* UTF-8 support should be implemented in the JVM */
            throw new RuntimeException(e);
        }
        bloomFilter.add(key);
    }

    /* =========================================================================
     * Command management
     * ========================================================================= */

    /**
     * Speed optimize the table scheme dependent on the database
     * This should make stuff faster, but every database should work without having this function called!
     */
    private void optimizeDbSchema() {
        /* disabled because it seems to cause NPEs in derby, see
         * https://issues.apache.org/jira/browse/DERBY-3197?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#action_12543729
        System.setProperty("derby.language.logQueryPlan", "true");
         */
        Connection c = null;
        PreparedStatement p = null;
        try {
            Properties props = this.config.getProperties();
            String dbDriver = props.getProperty("connection.driver_class");
            if (dbDriver != null) {
                if (dbDriver.equals("org.apache.derby.jdbc.EmbeddedDriver") || dbDriver.equals("org.h2.Driver")) {

                    if (dbDriver.equals("org.h2.Driver")) {
                        c = DriverManager.getConnection(props.getProperty("connection.url"),
                                props.getProperty("connection.username"), props.getProperty("connection.password"));
                    } else {
                        c = DriverManager.getConnection(props.getProperty("connection.url"));
                    }

                    // create index on command-location
                    p = c.prepareStatement("CREATE INDEX ENQUEUED_LOCATION_IDX on EnqueuedCommand (location)");
                    p.execute();
                    p.close();

                    // create index on command-location
                    p = c.prepareStatement("CREATE INDEX CRAWLED_LOCATION_IDX on CrawledCommand (location)");
                    p.execute();
                    p.close();
                }
            }
        } catch (Throwable e) {
            e.printStackTrace();
        } finally {
            if (c != null)
                try {
                    c.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            if (p != null)
                try {
                    p.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
        }
    }

    public int freeCapacity() throws Exception {
        return -1;
    }

    public boolean freeCapacitySupported() {
        return false;
    }

    public boolean offerData(final URIQueueEntry data) throws Exception {
        putData(data);
        return true;
    }

    /**
     * This function is called by the {@link UrlExtractorFilter} storage thread
     * @see IDataSink#putData(Object)
     */
    public void putData(final URIQueueEntry entry) throws Exception {
        // store unknown URI
        if (!isClosed()) {
            // the map is being modified by db.storeUnknownLocations, so we need to save the size first
            final int known = storeUnknownLocations(entry.getProfileID(), entry.getDepth(), entry.getReferences());
            entry.setKnown(known);
        } else {
            logger.error(String.format("Unable to write linkmap of location '%s' to db. Database already closed.",
                    entry.getRootURI().toASCIIString()));
        }
    }

    public void start() {
        this.writerThread.start();
    }

    /**
     * @see IDataProvider#setDataSink(IDataSink)
     */
    public void setDataSink(IDataSink<ICommand> dataSink) {
        if (dataSink == null)
            throw new NullPointerException("The data-sink is null-");
        if (this.sink != null)
            throw new IllegalStateException("The data-sink was already set.");

        synchronized (this.writerThread) {
            this.sink = dataSink;
            this.writerThread.notify();
        }
    }

    public boolean isClosed() {
        return this.closed;
    }

    public void close() throws InterruptedException {
        try {
            this.logger.info("Closing command DB ...");

            // interrupt reader and writer
            this.writerThread.interrupt();

            boolean saveDoubleURLsCache = true;
            if (populateThread != null && populateThread.isAlive()) {
                populateThread.interrupt();
                // don't save the cache as it has not been populated completely
                saveDoubleURLsCache = false;
            }

            // wait for the threads to shutdown
            this.writerThread.join(2000);
            if (populateThread != null)
                populateThread.join(2000);

            // close the DB
            this.sessionFactory.close();

            // shutdown the database
            try {
                Properties props = this.config.getProperties();
                String dbDriver = props.getProperty("connection.driver_class");
                if (dbDriver != null) {
                    Connection c = null;
                    try {
                        if (dbDriver.equals("org.apache.derby.jdbc.EmbeddedDriver")) {
                            DriverManager.getConnection("jdbc:derby:;shutdown=true");
                        } else if (dbDriver.equals("org.h2.Driver")) {
                            c = DriverManager.getConnection(props.getProperty("connection.url"),
                                    props.getProperty("connection.username"),
                                    props.getProperty("connection.password"));
                            PreparedStatement p = c.prepareStatement("SHUTDOWN");
                            p.execute();
                            p.close();
                        }
                    } finally {
                        if (c != null)
                            try {
                                c.close();
                            } catch (SQLException e) {
                                e.printStackTrace();
                            }
                    }
                }
            } catch (SQLException e) {
                String errMsg = e.getMessage();
                if (!(errMsg != null && errMsg.equals("Derby system shutdown."))) {
                    this.logger.error("Unable to shutdown database.", e);
                }
            }

            // flush cache
            if (saveDoubleURLsCache)
                closeDoubleURLSet();
            if (this.manager.getStatus().equals(Status.STATUS_ALIVE)) {
                this.manager.removeCache(EHCACHE_NAME);
                this.manager = null;
            }
        } catch (Throwable e) {
            this.logger.error(String.format("Unexpected '%s' while tryping to shutdown %s: %s",
                    e.getClass().getName(), this.getClass().getSimpleName(), e.getMessage()), e);
        } finally {
            this.closed = true;
        }
    }

    /**
     * @see ICommandDB#isKnown(URI)
     */
    public boolean isKnown(URI location) {
        if (location == null)
            return false;
        if ((populateThread == null || !populateThread.isAlive()) && !isKnownInDoubleURLs(location))
            return false;
        if (isKnownInCache(location))
            return true;

        return this.isKnownInDB(location);
    }

    boolean isKnownInCache(URI location) {
        return this.urlExistsCache.get(location) != null;
    }

    boolean isKnownInDB(URI location) {
        // check enqueued commands
        boolean known = this.isKnownInDB(location, "EnqueuedCommand");
        if (known)
            return true;

        // check crawled commands 
        known = this.isKnownInDB(location, "CrawledCommand");
        return known;
    }

    boolean isKnownInDB(URI location, String queueName) {
        boolean known = false;

        Session session = null;
        Transaction transaction = null;
        try {
            session = this.sessionFactory.openSession();
            session.setFlushMode(FlushMode.COMMIT);
            session.setCacheMode(CacheMode.IGNORE);
            transaction = session.beginTransaction();

            Query query = session
                    .createQuery(
                            String.format("SELECT count(location) FROM %s as cmd WHERE location = ?", queueName))
                    .setParameter(0, location);
            Long result = (Long) query.setReadOnly(true).uniqueResult();
            known = (result != null && result.longValue() > 0);

            transaction.commit();
        } catch (Exception e) {
            if (transaction != null && transaction.isActive())
                transaction.rollback();
            this.logger.error(String.format("Unexpected '%s' while testing if location '%s' is known.",
                    e.getClass().getName(), location.toASCIIString()), e);
        } finally {
            // closing session
            if (session != null)
                try {
                    session.close();
                } catch (Exception e) {
                    this.logger.error(
                            String.format("Unexpected '%s' while closing session.", e.getClass().getName()), e);
                }
        }

        return known;
    }

    private List<ICommand> fetchNextCommands(int limit) {
        List<ICommand> result = new ArrayList<ICommand>();

        Session session = null;
        Transaction transaction = null;
        try {
            session = this.sessionFactory.openSession();
            session.setFlushMode(FlushMode.COMMIT);
            session.setCacheMode(CacheMode.IGNORE);
            transaction = session.beginTransaction();

            Query query = session.createQuery("FROM EnqueuedCommand as cmd");
            query.setFetchSize(limit); // this is important for derby because there is no limit support
            query.setMaxResults(limit); // restricting number of returned results
            query.setReadOnly(true); // read-only query
            ScrollableResults sr = query.scroll(ScrollMode.FORWARD_ONLY);

            final Key key = new Key();
            final DynamicBloomFilter bloomFilter = this.bloomFilter;
            final Cache urlExistsCache = this.urlExistsCache;

            // loop through the available commands
            while (sr.next() && result.size() < limit) {
                ICommand cmd = (ICommand) sr.get()[0];

                /* mark command as enqueued */
                session.delete("EnqueuedCommand", cmd);
                session.saveOrUpdate("CrawledCommand", cmd);

                // add command-location into caches
                key.set(cmd.getLocation().toString().getBytes(UTF8), 1.0);
                bloomFilter.add(key);
                Element element = new Element(cmd.getLocation(), null);
                urlExistsCache.put(element);

                result.add(cmd);
            }
            sr.close();

            transaction.commit();
        } catch (Exception e) {
            if (transaction != null && transaction.isActive())
                transaction.rollback();
            this.logger.error("Error while fetching commands", e);
        } finally {
            // closing session
            if (session != null)
                try {
                    session.close();
                } catch (Exception e) {
                    this.logger.error(
                            String.format("Unexpected '%s' while closing session.", e.getClass().getName()), e);
                }
        }

        return result;
    }

    private synchronized void storeCommand(ICommand cmd) {
        Session session = null;
        Transaction transaction = null;
        try {
            // open session and transaction
            session = this.sessionFactory.openSession();
            transaction = session.beginTransaction();

            // store command
            session.saveOrUpdate("EnqueuedCommand", cmd);
            cntCrawlerQueue++;
            cntTotal++;

            // add command-location into caches
            putInDoubleURLs(cmd.getLocation());
            Element element = new Element(cmd.getLocation(), null);
            this.urlExistsCache.put(element);

            // TODO: adding to bloom filter is missing here!!!

            transaction.commit();

            // signal writer that a new URL is available
            this.writerThread.signalNewDbData();
        } catch (HibernateException e) {
            if (transaction != null && transaction.isActive())
                transaction.rollback();
            this.logger.error(
                    String.format("Error while writing command with location '%s' to db.", cmd.getLocation()), e);
        } finally {
            // closing session
            if (session != null)
                try {
                    session.close();
                } catch (Exception e) {
                    this.logger.error(
                            String.format("Unexpected '%s' while closing session.", e.getClass().getName()), e);
                }
        }
    }

    /**
     * @see ICommandDB#enqueue(URI)
     */
    public boolean enqueue(URI location, int profileID, int depth) {
        if (location == null)
            return false;
        return this.storeUnknownLocations(profileID, depth,
                new LinkedList<URI>(Arrays.asList(new URI[] { location }))) == 0;
    }

    private int storeUnknownInDoubleCache(final int profileID, final int depth, final LinkedList<URI> locations,
            final Session session) throws IOException {

        final Iterator<URI> locationIterator = locations.iterator();
        final long time = System.currentTimeMillis();
        final Key key = new Key();

        final StringBuilder buf = new StringBuilder();
        final int total = locations.size();
        int counter = 0;
        int cacheChecked = 0;
        int known = 0;

        final boolean checkBloom = (populateThread == null || !populateThread.isAlive());

        final DynamicBloomFilter bloomFilter = this.bloomFilter;
        final Cache urlExistsCache = this.urlExistsCache;
        while (locationIterator.hasNext()) {
            try {
                counter++;

                final URI loc = locationIterator.next();
                if (checkBloom) {
                    // the bloom-filter key
                    key.set(loc.toString().getBytes(UTF8), 1.0);

                    if (!bloomFilter.membershipTest(key)) {
                        // creating a new command
                        final ICommand cmd = this.commandFactory.createDocument(ICommand.class);
                        cmd.setLocation(loc);
                        cmd.setProfileOID(profileID);
                        cmd.setDepth(depth);

                        // process all URIs which are not known to the double-URIs-cache;
                        // these URIs don't have to be checked against the DB again for the
                        // cache does not return false negatives
                        bloomFilter.add(key);
                        session.saveOrUpdate("EnqueuedCommand", cmd);
                        cntCrawlerQueue++;
                        cntTotal++;

                        Element element = new Element(loc, null);
                        this.urlExistsCache.put(element);
                        locationIterator.remove();
                        cacheChecked++;

                        if (this.logger.isTraceEnabled()) {
                            buf.append(String.format("\n\t[%3d] %s", Integer.valueOf(counter), loc.toString()));
                        }

                        continue;
                    }
                }

                if (urlExistsCache.get(loc) != null) {
                    locationIterator.remove();
                    known++;
                }
            } catch (UnsupportedEncodingException e) {
                /* UTF-8 support should be implemented in the JVM */
                throw new RuntimeException(e);
            }
        }

        if (this.logger.isDebugEnabled()) {
            this.logger.debug(String.format(
                    "Double-check of %d URI against caches with size %d (Bloom) + %d (ehcache) took %d ms."
                            + "\n\t%d unknown by bloom-filter" + "\n\t%d known by ehcache" + "\n\t%d left to check",
                    Integer.valueOf(total), Long.valueOf(cacheSize()),
                    Long.valueOf(urlExistsCache.getMemoryStoreSize()),
                    Long.valueOf(System.currentTimeMillis() - time), Integer.valueOf(cacheChecked),
                    Integer.valueOf(known), Integer.valueOf(locations.size())));
        }
        if (this.logger.isTraceEnabled() && cacheChecked > 0) {
            logger.trace(
                    String.format("%d new URI added to DB: %s", Integer.valueOf(cacheChecked), buf.toString()));
        }

        return known;
    }

    private int storeUnknownInDB(final int profileID, final int depth, final LinkedList<URI> locations,
            final Session session, final int chunkSize) throws IOException {

        int total = locations.size();
        int known = 0;
        final long start = System.currentTimeMillis();

        String[] queues = new String[] { "EnqueuedCommand", "CrawledCommand" };
        for (String queue : queues) {
            Iterator<URI> locationsIter = locations.iterator();
            while (locationsIter.hasNext()) {
                URI nextLocation = locationsIter.next();
                Query query = session
                        .createQuery("SELECT count(id) FROM " + queue + " WHERE location = (:nextLocation)")
                        .setParameter("nextLocation", nextLocation).setReadOnly(true);
                Long count = (Long) query.uniqueResult();
                if (count != null && count.longValue() > 0) {
                    known++;
                    locationsIter.remove();
                }
            }
        }
        final long end = System.currentTimeMillis();

        if (this.logger.isDebugEnabled()) {
            this.logger.debug(String.format(
                    "Double-check of %d URI against DB with size %d took %s ms." + "\n\t%3d unknown by DB"
                            + "\n\t%3d known by DB",
                    Integer.valueOf(total), Long.valueOf(this.size()), Long.valueOf(end - start),
                    Integer.valueOf(total - known), Integer.valueOf(known)));
        }

        // add new commands into DB
        final StringBuilder buf = new StringBuilder();
        final Cache urlExistsCache = this.urlExistsCache;
        final Key key = new Key();

        int i = 0;
        Iterator<URI> locationsIter = locations.iterator();
        while (locationsIter.hasNext()) {
            final URI location = locationsIter.next();
            cntTotal++;
            cntCrawlerQueue++;
            i++;

            // create a new command
            final ICommand cmd = this.commandFactory.createDocument(ICommand.class);
            cmd.setLocation(location);
            cmd.setProfileOID(profileID);
            cmd.setDepth(depth);

            // store new command into DB
            session.saveOrUpdate("EnqueuedCommand", cmd);

            // add to bloom filter
            key.set(location.toString().getBytes(UTF8), 1.0);
            bloomFilter.add(key);

            // add to in-memory double cache
            Element element = new Element(location, null);
            urlExistsCache.put(element);

            // debugging output
            if (this.logger.isTraceEnabled()) {
                buf.append(String.format("\n\t[%3d] %s", Integer.valueOf(i), location.toString()));
            }
        }

        if (this.logger.isTraceEnabled() && known > 0) {
            logger.trace(
                    String.format("%d false-positive URI added to DB: %s", Integer.valueOf(known), buf.toString()));
        }

        return known;
    }

    /**
     * First queries the DB to remove all known locations from the list and then updates
     * it with the new list.
     * 
     * @param profileID the ID of the {@link ICommandProfile}, newly created 
     * commands should belong to
     * @param depth depth of the new {@link ICommand} 
     * @param locations the locations to add to the DB
     * @return the number of known locations in the given list
     */
    int storeUnknownLocations(int profileID, int depth, LinkedList<URI> locations) {
        if (locations == null || locations.size() == 0)
            return 0;

        int known = 0;
        Session session = null;
        Transaction transaction = null;
        try {
            // open session and transaction
            session = this.sessionFactory.openSession();
            session.setFlushMode(FlushMode.COMMIT);
            session.setCacheMode(CacheMode.IGNORE);
            transaction = session.beginTransaction();

            // check the cache for URL existance and put the ones not known to the
            // cache into another list and remove them from the list which is checked
            // against the DB below
            known += storeUnknownInDoubleCache(profileID, depth, locations, session);

            // check which URLs are already known against the DB
            if (locations.size() > 0)
                known += storeUnknownInDB(profileID, depth, locations, session, 10);

            transaction.commit();

            // signal writer that a new URL is available
            this.writerThread.signalNewDbData();

            return known;
        } catch (Throwable e) {
            if (transaction != null && transaction.isActive())
                transaction.rollback();
            this.logger.error(String.format("Unexpected '%s' while writing %d new commands to db.",
                    e.getClass().getName(), Integer.valueOf(locations.size())), e);
        } finally {
            // closing session
            if (session != null)
                try {
                    session.close();
                } catch (Exception e) {
                    this.logger.error(
                            String.format("Unexpected '%s' while closing session.", e.getClass().getName()), e);
                }
        }

        return 0;
    }

    /**
     * @return the total size of the command db
     * @see ICommandDB#size()
     */
    public long size() {
        // return this.size(null);
        return cntTotal;
    }

    /**
     * @see ICommandDB#enqueuedSize()
     */
    public long enqueuedSize() {
        // return this.size("enqueued");
        return cntCrawlerQueue;
    }

    private long totalSize() {
        return this.size("enqueued") + this.size("crawled");
    }

    private long size(String type) {
        if (type == null)
            throw new NullPointerException();
        Long count = Long.valueOf(-1l);

        Session session = null;
        Transaction transaction = null;
        try {
            // open session and transaction
            session = this.sessionFactory.openSession();
            transaction = session.beginTransaction();

            // query size
            String sqlString = null;
            if (type.equalsIgnoreCase("enqueued")) {
                sqlString = "select count(*) from EnqueuedCommand as cmd";
            } else if (type.equalsIgnoreCase("crawled")) {
                sqlString = "select count(*) from CrawledCommand as cmd";
            }

            count = (Long) session.createQuery(sqlString).setReadOnly(true).uniqueResult();

            transaction.commit();
        } catch (HibernateException e) {
            if (transaction != null && transaction.isActive())
                transaction.rollback();
            this.logger.error(
                    String.format("Unexpected '%s' while getting size of command-db.", e.getClass().getName()), e);
        } finally {
            // closing session
            if (session != null)
                try {
                    session.close();
                } catch (Exception e) {
                    this.logger.error(
                            String.format("Unexpected '%s' while closing session.", e.getClass().getName()), e);
                }
        }

        return count.longValue();
    }

    /**
     * Resets the command queue
     */
    public void reset() {
        Session session = null;
        Transaction transaction = null;
        try {
            // open session and transaction
            session = this.sessionFactory.openSession();
            transaction = session.beginTransaction();

            // delete all commands
            session.createQuery("DELETE FROM EnqueuedCommand").executeUpdate();

            cntCrawlerQueue = cntTotal = 0L;

            transaction.commit();
        } catch (HibernateException e) {
            if (transaction != null && transaction.isActive())
                transaction.rollback();
            this.logger.error("Error while reseting queue.", e);
        } finally {
            // closing session
            if (session != null)
                try {
                    session.close();
                } catch (Exception e) {
                    this.logger.error(
                            String.format("Unexpected '%s' while closing session.", e.getClass().getName()), e);
                }
        }
    }

    /**
     * @see EventHandler#handleEvent(Event)
     */
    public void handleEvent(Event event) {
        String topic = event.getTopic();

        // check if any other component has created a command 
        if (topic != null && topic.equals(CommandEvent.TOPIC_OID_REQUIRED)) {
            // this is a synchronous event, so we have time to set a valid OID
            String location = (String) event.getProperty(CommandEvent.PROP_COMMAND_LOCATION);
            if (location != null) {
                ICommand cmd = this.commandTracker.getCommandByLocation(URI.create(location));
                if (cmd != null) {
                    this.storeCommand(cmd);
                }
            }
        }
    }

    /**
     * A {@link Thread} to read {@link ICommand commands} from the {@link CommandDB#db}
     * and to write it into the {@link CommandDB#sink data-sink}
     */
    class Writer extends Thread {
        public Writer() {
            super("CommandDB.Writer");
        }

        @Override
        public void run() {
            try {
                synchronized (this) {
                    while (CommandDB.this.sink == null)
                        this.wait();
                }

                final int chunkSize = 10;
                List<ICommand> commands = null;
                while (!Thread.currentThread().isInterrupted()) {
                    try {
                        // fetching the next command(s) from DB
                        final long time = System.currentTimeMillis();
                        commands = CommandDB.this.fetchNextCommands(chunkSize);
                        if (logger.isDebugEnabled()) {
                            logger.debug(String.format(
                                    "Fetched new chunk of %d (%d requested) new URLs to crawl in %d ms, %d queued / %d total",
                                    Integer.valueOf(commands.size()), Integer.valueOf(chunkSize),
                                    Long.valueOf(System.currentTimeMillis() - time), Long.valueOf(cntCrawlerQueue),
                                    Long.valueOf(cntTotal)));
                        }

                        // enqueue fetched commands into crawler-queue
                        if (commands != null && commands.size() > 0) {
                            final ICommandTracker commandTracker = CommandDB.this.commandTracker;
                            final IDataSink<ICommand> sink = CommandDB.this.sink;

                            for (ICommand command : commands) {
                                // notify the command-tracker about the creation of the command
                                if (commandTracker != null) {
                                    commandTracker.commandCreated(ICommandDB.class.getName(), command);
                                }

                                sink.putData(command);
                                cntCrawlerQueue--;
                            }
                        } else {
                            // sleep for a while
                            synchronized (this) {
                                this.wait(MAX_IDLE_SLEEP);
                            }
                        }
                    } catch (Exception e) {
                        if (e instanceof InterruptedException)
                            break;

                        logger.error(String.format("Unexpected '%s' while waiting reading commands from db.",
                                e.getClass().getName()), e);
                    }
                }
            } catch (InterruptedException e) {
                logger.warn("CommandDB.writer interrupted while waiting for a data-sink");
            } finally {
                logger.info("CommandDB.Writer shutdown finished.");
            }
        }

        public synchronized void signalNewDbData() {
            this.notify();
        }
    }
}