org.apache.nutch.webapp.CacheManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nutch.webapp.CacheManager.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nutch.webapp;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.webapp.common.Search;
import org.apache.nutch.webapp.common.ServiceLocator;

import com.opensymphony.oscache.base.Cache;
import com.opensymphony.oscache.base.CacheEntry;
import com.opensymphony.oscache.base.EntryRefreshPolicy;
import com.opensymphony.oscache.base.NeedsRefreshException;
import com.opensymphony.oscache.general.GeneralCacheAdministrator;

/**
 * This class is responsible for configuring the used cache and
 * delivering cached Search objects.
 * 
 * Configuration parameters can be overrided with default nutch
 * configuration mechanism.
 * 
 * Search Objects are compressed for smaller space requirements.
 */
public class CacheManager {

    public static class ByteBufferWrapper implements Serializable {

        private static final long serialVersionUID = 1L;
        byte[] contents;

        public ByteBufferWrapper(final byte[] contents) {
            this.contents = contents;
        }

        public byte[] getContents() {
            return contents;
        }

    }

    static final Log LOG = LogFactory.getLog(CacheManager.class);

    static final String CACHE_KEY = CacheManager.class.getName();

    class NutchRefreshPolicy implements EntryRefreshPolicy {

        private static final long serialVersionUID = 1L;

        public boolean needsRefresh(CacheEntry arg0) {
            return false;
        }
    }

    EntryRefreshPolicy policy = new NutchRefreshPolicy();

    Cache cache;
    GeneralCacheAdministrator cacheadmin;

    protected CacheManager(Configuration conf) {

        Properties p = new Properties();

        //use memory for caching
        boolean cacheMemory = conf.getBoolean("cache.memory", false);
        p.setProperty("cache.memory", Boolean.toString(cacheMemory));

        //the persistence class used
        String cachePersistenceClass = conf.get("cache.persistence.class",
                "org.apache.nutch.cache.CustomDiskPersistenceListener");
        p.setProperty("cache.persistence.class", cachePersistenceClass);

        //where to store cache files (if file cache used)
        String cachePath = conf.get("cache.path", ".");
        p.setProperty("cache.path", cachePath);

        //cacacity of cache (how many entries)
        int cacheCapacity = conf.getInt("cache.capacity", 1000);
        p.setProperty("cache.capacity", Integer.toString(cacheCapacity));

        cacheadmin = new GeneralCacheAdministrator(p);
        cache = cacheadmin.getCache();
    }

    public synchronized static CacheManager getInstance(Configuration conf) {
        CacheManager cache = (CacheManager) conf.getObject(CACHE_KEY);

        if (cache == null) {
            cache = new CacheManager(conf);

            conf.setObject(CACHE_KEY, cache);
        }
        return cache;
    }

    /**
     * Get Search object from cache
     * @param id key
     * @return
     * @throws NeedsRefreshException
     */
    public Search getSearch(String id, ServiceLocator locator) throws NeedsRefreshException {
        Search search = null;

        ByteBufferWrapper w = (ByteBufferWrapper) cache.getFromCache(id);
        if (w != null) {

            try {
                long time = System.currentTimeMillis();
                ByteArrayInputStream is = new ByteArrayInputStream(w.getContents());
                GZIPInputStream gs = new GZIPInputStream(is);
                DataInputStream dis = new DataInputStream(gs);

                search = new Search(locator);
                search.readFields(dis);
                long delta = System.currentTimeMillis() - time;

                if (LOG.isDebugEnabled()) {
                    LOG.debug("Decompressing cache entry took: " + delta + "ms.");
                }

                search.init();
            } catch (IOException e) {
                LOG.info("Could not get cached object: " + e);
            }
        }
        return search;
    }

    /**
     * Put Search object in cache
     * 
     * @param id key
     * @param search the search to cache
     */
    public void putSearch(String id, Search search) {
        try {
            long time = System.currentTimeMillis();
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            GZIPOutputStream gzos = new GZIPOutputStream(bos);
            DataOutputStream oos = new DataOutputStream(gzos);
            search.write(oos);
            oos.flush();
            oos.close();
            gzos.close();
            long delta = System.currentTimeMillis() - time;
            ByteBufferWrapper wrap = new ByteBufferWrapper(bos.toByteArray());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Compressing cache entry took: " + delta + "ms.");
                LOG.debug("size: " + wrap.getContents().length + " bytes");
            }
            cache.putInCache(id, wrap);
        } catch (IOException e) {
            LOG.info("cannot store object in cache: " + e);
        }
    }

    public void cancelUpdate(String key) {
        cache.cancelUpdate(key);
    }
}