org.paxle.crawler.impl.CrawlerContextLocal.java Source code

Java tutorial

Introduction

Here is the source code for org.paxle.crawler.impl.CrawlerContextLocal.java

Source

/**
 * This file is part of the Paxle project.
 * Visit http://www.paxle.net for more information.
 * Copyright 2007-2010 the original author or authors.
 *
 * Licensed under the terms of the Common Public License 1.0 ("CPL 1.0").
 * Any use, reproduction or distribution of this program constitutes the recipient's acceptance of this agreement.
 * The full license text is available under http://www.opensource.org/licenses/cpl1.0.txt
 * or in the file LICENSE.txt in the root directory of the Paxle distribution.
 *
 * Unless required by applicable law or agreed to in writing, this software is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

package org.paxle.crawler.impl;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.References;
import org.apache.felix.scr.annotations.Service;
import org.osgi.framework.Filter;
import org.osgi.framework.InvalidSyntaxException;
import org.osgi.framework.ServiceReference;
import org.osgi.service.component.ComponentContext;
import org.paxle.core.charset.ICharsetDetector;
import org.paxle.core.crypt.ICryptManager;
import org.paxle.core.doc.ICommand;
import org.paxle.core.doc.ICommandProfile;
import org.paxle.core.doc.ICommandProfileManager;
import org.paxle.core.doc.ICrawlerDocument;
import org.paxle.core.doc.IDocumentFactory;
import org.paxle.core.io.IIOTools;
import org.paxle.core.io.temp.ITempFileManager;
import org.paxle.core.mimetype.IMimeTypeDetector;
import org.paxle.crawler.CrawlerContext;
import org.paxle.crawler.ICrawlerContext;
import org.paxle.crawler.ICrawlerContextAware;
import org.paxle.crawler.ICrawlerContextLocal;
import org.paxle.crawler.ICrawlerTools;
import org.paxle.parser.ISubParser;

@Component(immediate = true)
@Service(ICrawlerContextLocal.class)
@References({
        @Reference(name = CrawlerContextLocal.REFERENCE_SUBPARSERS, referenceInterface = ISubParser.class, cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE, policy = ReferencePolicy.DYNAMIC, bind = "addSubParser", unbind = "removeSubParser", target = "(MimeTypes=*)"),
        @Reference(name = CrawlerContextLocal.REFERENCE_CRAWLERCONTEXT_AWARE, referenceInterface = ICrawlerContextAware.class, cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE, policy = ReferencePolicy.DYNAMIC, bind = "addContextAwareCrawler", unbind = "removeContextAwareCrawler"),
        @Reference(name = CrawlerContextLocal.REFERENCE_DOCFACTORY, referenceInterface = IDocumentFactory.class, cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE, policy = ReferencePolicy.DYNAMIC, bind = "addDocFactory", unbind = "removeDocFactory", target = "(docType=*)") })
public class CrawlerContextLocal extends ThreadLocal<ICrawlerContext> implements ICrawlerContextLocal {
    static final String REFERENCE_SUBPARSERS = "subParser";
    static final String REFERENCE_CRAWLERCONTEXT_AWARE = "crawlerContextAware";
    static final String REFERENCE_DOCFACTORY = "docFactory";

    private final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock();
    private final Lock r = rwl.readLock();
    private final Lock w = rwl.writeLock();

    /**
     * The {@link ComponentContext} of this component
     * @see #activate(ComponentContext)
     */
    protected ComponentContext ctx;

    @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY, policy = ReferencePolicy.DYNAMIC)
    protected IMimeTypeDetector mimeTypeDetector;

    @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY, policy = ReferencePolicy.DYNAMIC)
    protected ICharsetDetector charsetDetector;

    @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY, policy = ReferencePolicy.DYNAMIC)
    protected ICryptManager cryptManager;

    @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY, policy = ReferencePolicy.DYNAMIC)
    protected ITempFileManager tempFileManager;

    @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY, policy = ReferencePolicy.DYNAMIC)
    protected IIOTools ioTools;

    @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY, policy = ReferencePolicy.DYNAMIC)
    protected ICrawlerTools crawlerTools;

    @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY, policy = ReferencePolicy.DYNAMIC)
    protected ICommandProfileManager cmdProfileManager;

    /**
     * All {@link IDocumentFactory document-factories} registered to the system. 
     * @see #createDocumentForInterface(Class, String)
     */
    protected SortedSet<ServiceReference> docFactoryRefs = new TreeSet<ServiceReference>();

    /**
     * A list of mime-types supported by parsers installed to the system.
     * TODO: for a crawler-only installation we need to change this
     */
    protected Set<String> supportedMimeTypes = Collections.synchronizedSet(new HashSet<String>());

    /**
     * For logging
     */
    protected Log logger = LogFactory.getLog(this.getClass());

    public CrawlerContextLocal() {
        CrawlerContext.setThreadLocal(this);
    }

    public ICrawlerContext getCurrentContext() {
        return this.get();
    }

    public void removeCurrentContext() {
        this.remove();
    }

    @Activate
    protected void activate(ComponentContext context) {
        this.ctx = context;
    }

    protected void addDocFactory(ServiceReference docFactory) {
        try {
            w.lock();
            this.docFactoryRefs.add(docFactory);
        } finally {
            w.unlock();
        }
    }

    protected void removeDocFactory(ServiceReference docFactory) {
        try {
            w.lock();
            this.docFactoryRefs.remove(docFactory);
        } finally {
            w.unlock();
        }
    }

    protected void addContextAwareCrawler(ICrawlerContextAware contextAwareCrawler) {
        contextAwareCrawler.setCrawlerContextLocal(this);
    }

    protected void removeContextAwareCrawler(ICrawlerContextAware contextAwareCrawler) {
        contextAwareCrawler.setCrawlerContextLocal(null);
    }

    protected void addSubParser(ServiceReference subParser) {
        final String[] mimeTypes = this.getSubParserMimeTypes(subParser);
        for (String mimeType : mimeTypes) {
            this.supportedMimeTypes.add(mimeType.trim());
        }
    }

    protected void removeSubParser(ServiceReference subParser) {
        final String[] mimeTypes = this.getSubParserMimeTypes(subParser);
        for (String mimeType : mimeTypes) {
            this.supportedMimeTypes.remove(mimeType.trim());
        }
    }

    private String[] getSubParserMimeTypes(ServiceReference reference) {
        String[] mimeTypes = {};
        Object mimeTypesProp = reference.getProperty("MimeTypes");
        if (mimeTypesProp instanceof String)
            mimeTypes = new String[] { (String) mimeTypesProp };
        else if (mimeTypesProp instanceof String[])
            mimeTypes = (String[]) mimeTypesProp;
        return mimeTypes;
    }

    @Override
    protected ICrawlerContext initialValue() {
        return new Context();
    }

    protected <DOC> DOC createDocumentForInterface(Class<DOC> docInterface, String filter)
            throws InvalidSyntaxException, IOException {
        final Filter classFilter = ctx.getBundleContext()
                .createFilter(String.format("(%s=%s)", IDocumentFactory.DOCUMENT_TYPE, docInterface.getName()));
        final Filter propsFilter = (filter == null) ? null : ctx.getBundleContext().createFilter(filter);

        ServiceReference factoryRef = null;
        try {
            r.lock();

            // loop through all doc-factories and find one that matches 
            for (ServiceReference ref : docFactoryRefs) {
                if (classFilter.match(ref) && (propsFilter == null || propsFilter.match(ref))) {
                    factoryRef = ref;
                    break;
                }
            }
        } finally {
            r.unlock();
        }

        // no factory found
        if (factoryRef == null)
            return null;

        // creating an document
        final IDocumentFactory factory = (IDocumentFactory) ctx.locateService(REFERENCE_DOCFACTORY, factoryRef);
        if (factory == null)
            return null;
        return factory.createDocument(docInterface);
    }

    private class Context implements ICrawlerContext {
        private final HashMap<String, Object> bag = new HashMap<String, Object>();

        /**
         * @return a class that can be used to detect the charset of a resource
         *         This reference may be <code>null</code> if no 
         *         {@link ICharsetDetector charset-detector} is available.
         */
        public ICharsetDetector getCharsetDetector() {
            return charsetDetector;
        }

        public ICryptManager getCryptManager() {
            return cryptManager;
        }

        public ITempFileManager getTempFileManager() {
            return tempFileManager;
        }

        public IIOTools getIoTools() {
            return ioTools;
        }

        public ICrawlerTools getCrawlerTools() {
            return crawlerTools;
        }

        /**
         * @return a class that can be used to detect the mime-type of a resource
         *           This reference may be <code>null</code> if no 
         *         {@link IMimeTypeDetector mimetype-detector} is available.
         */
        public IMimeTypeDetector getMimeTypeDetector() {
            return mimeTypeDetector;
        }

        /**
         * @return a set of mime-types supported by the 
         *          {@link org.paxle.parser.ISubParser subparsers} that are 
         *         currently registered on the system.
         */
        public Set<String> getSupportedMimeTypes() {
            return supportedMimeTypes;
        }

        /**
         * TODO: currently this is an read-only {@link ICommandProfile}. We should wrap it with a transparent proxy
         * and should flush it back to db if one of the command-profile-properties were changed.
         */
        public ICommandProfile getCommandProfile(int profileID) {
            if (cmdProfileManager == null)
                return null;
            return cmdProfileManager.getProfileByID(profileID);
        }

        /**
         * @return the {@link ICommandProfile} that belongs to the {@link ICommand}
         * currently processed by the parser-worker thread
         */
        public ICommandProfile getCommandProfile() {
            Integer profileID = (Integer) this.getProperty("cmd.profileOID");
            if (profileID == null)
                return null;
            return this.getCommandProfile(profileID.intValue());
        }

        public ICrawlerDocument createDocument() throws IOException {
            try {
                return this.createDocument(ICrawlerDocument.class, null);
            } catch (InvalidSyntaxException e) {
                // this should not occur
                throw new RuntimeException(e.getMessage());
            }
        }

        public <DocInterface> DocInterface createDocument(Class<DocInterface> docInterface, String filter)
                throws InvalidSyntaxException, IOException {
            if (docInterface == null)
                throw new NullPointerException("The interface-class must not be null");
            return createDocumentForInterface(docInterface, filter);
        }

        /* ========================================================================
         * Function operating on the property bag
         * ======================================================================== */

        public Object getProperty(String name) {
            return this.bag.get(name);
        }

        public void setProperty(String name, Object value) {
            this.bag.put(name, value);
        }

        public void removeProperty(String name) {
            this.bag.remove(name);
        }

        public void reset() {
            this.bag.clear();
        }
    }
}