org.apache.roller.weblogger.business.jpa.JPARefererManagerImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.roller.weblogger.business.jpa.JPARefererManagerImpl.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  The ASF licenses this file to You
 * under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.  For additional information regarding
 * copyright in this work, please see the NOTICE file in the top level
 * directory of this distribution.
 */
package org.apache.roller.weblogger.business.jpa;

import java.sql.Timestamp;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Collections;
import java.util.Comparator;
import javax.persistence.Query;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.lang.StringUtils;

import org.apache.roller.weblogger.WebloggerException;
import org.apache.roller.weblogger.business.Weblogger;
import org.apache.roller.weblogger.business.referrers.RefererManager;
import org.apache.roller.weblogger.config.WebloggerRuntimeConfig;
import org.apache.roller.weblogger.pojos.WeblogReferrer;
import org.apache.roller.weblogger.pojos.StatCount;
import org.apache.roller.weblogger.pojos.WeblogEntry;
import org.apache.roller.weblogger.pojos.Weblog;
import org.apache.roller.weblogger.pojos.StatCountCountComparator;
import org.apache.roller.weblogger.util.LinkbackExtractor;
import org.apache.roller.weblogger.util.Utilities;

/*
 * JPARefererManagerImpl.java
 */
@com.google.inject.Singleton
public class JPARefererManagerImpl implements RefererManager {

    private static Log log = LogFactory.getLog(JPARefererManagerImpl.class);

    protected static final String DAYHITS = "dayHits";
    protected static final String TOTALHITS = "totalHits";

    private static final Comparator statCountCountReverseComparator = Collections
            .reverseOrder(StatCountCountComparator.getInstance());

    /** The strategy for this manager. */
    private final Weblogger roller;
    private final JPAPersistenceStrategy strategy;

    /**
     * Creates a new instance of JPARefererManagerImpl
     */
    @com.google.inject.Inject
    protected JPARefererManagerImpl(Weblogger roller, JPAPersistenceStrategy strategy) {
        log.debug("Instantiating JPA Referer Manager");
        this.roller = roller;
        this.strategy = strategy;
    }

    public void saveReferer(WeblogReferrer referer) throws WebloggerException {
        strategy.store(referer);
    }

    public void removeReferer(WeblogReferrer referer) throws WebloggerException {
        strategy.remove(referer);
    }

    /**
     * Clear referrer dayhits and remove referrers without excerpts.
     */
    public void clearReferrers() throws WebloggerException {
        clearDayHits();
        Query q = strategy.getNamedUpdate("WeblogReferrer.removeByNullOrEmptyExcerpt");
        q.executeUpdate();
    }

    /**
     * Clear referrer dayhits and remove referrers without excerpts.
     */
    public void clearReferrers(Weblog website) throws WebloggerException {
        clearDayHitsByWebsite(website);
        Query q = strategy.getNamedUpdate("WeblogReferrer.removeByNullOrEmptyExcerpt&Website");
        q.setParameter(1, website);
        q.executeUpdate();
    }

    /**
     * Apply ignoreWord/spam filters to all referers in system.
     */
    public void applyRefererFilters() throws WebloggerException {
        String spamwords = WebloggerRuntimeConfig.getProperty("spam.blacklist");
        String[] blacklist = StringUtils.split(StringUtils.deleteWhitespace(spamwords), ",");
        if (blacklist.length == 0)
            return;
        List referers = getBlackListedReferer(blacklist);
        for (Iterator iterator = referers.iterator(); iterator.hasNext();) {
            WeblogReferrer referer = (WeblogReferrer) iterator.next();
            this.strategy.remove(referer);
        }
    }

    /**
     * Apply ignoreWord/spam filters to all referers in website.
     */
    public void applyRefererFilters(Weblog website) throws WebloggerException {
        if (null == website)
            throw new WebloggerException("website is null");
        if (null == website.getBlacklist())
            return;

        String[] blacklist = StringUtils.split(StringUtils.deleteWhitespace(website.getBlacklist()), ",");
        if (blacklist.length == 0)
            return;
        List referers = getBlackListedReferer(website, blacklist);
        for (Iterator iterator = referers.iterator(); iterator.hasNext();) {
            WeblogReferrer referer = (WeblogReferrer) iterator.next();
            this.strategy.remove(referer);
        }
    }

    protected List getExistingReferers(Weblog website, String dateString, String permalink)
            throws WebloggerException {

        Query q = strategy.getNamedQuery("WeblogReferrer.getByWebsite&DateString&RefererPermalink");
        q.setParameter(1, website);
        q.setParameter(2, dateString);
        q.setParameter(3, permalink);
        return q.getResultList();
    }

    protected List getMatchingReferers(Weblog website, String requestUrl, String refererUrl)
            throws WebloggerException {

        Query q = strategy.getNamedQuery("WeblogReferrer.getByWebsite&RequestUrl&RefererUrl");
        q.setParameter(1, website);
        q.setParameter(2, requestUrl);
        q.setParameter(3, refererUrl);
        return q.getResultList();
    }

    /**
     * Returns hot weblogs as StatCount objects, in descending order by today's
     * hits.
     * @param sinceDays Restrict to last X days (or -1 for all)
     * @param offset Offset into results (for paging)
     * @param length Maximum number of results to return (for paging)
     * @return List of StatCount objects.
     */
    public List getHotWeblogs(int sinceDays, int offset, int length) throws WebloggerException {

        String msg = "Getting hot weblogs";
        List results = new ArrayList();
        Calendar cal = Calendar.getInstance();
        cal.setTime(new Date());
        cal.add(Calendar.DATE, -1 * sinceDays);
        Date startDate = cal.getTime();

        if (length == -1) {
            length = Integer.MAX_VALUE - offset;
        }

        Query q = strategy.getNamedQuery(
                "WeblogReferrer.getHotWeblogsByWebsite.enabled&Website.active&Website.lastModifiedGreater");

        if (offset != 0 || length != -1) {
            q.setFirstResult(offset);
            q.setMaxResults(length);
        }
        Timestamp start = new Timestamp(startDate.getTime());
        q.setParameter(1, Boolean.TRUE);
        q.setParameter(2, Boolean.TRUE);
        q.setParameter(3, start);
        List queryResults = (List) q.getResultList();
        for (Iterator it = queryResults.iterator(); it.hasNext();) {
            Object[] row = (Object[]) it.next();
            long hits = ((Number) row[0]).longValue();
            String websiteId = (String) row[1];
            String websiteName = (String) row[2];
            String websiteHandle = (String) row[3];
            results.add(new StatCount(websiteId, websiteHandle, websiteName, "statCount.weblogDayHits", hits));
        }
        // Original query ordered by desc hits.
        // JPA QL doesn't allow queries to be ordered by agregates; do it in memory
        Collections.sort(results, statCountCountReverseComparator);

        return results;
    }

    protected int getHits(Weblog website, String type) throws WebloggerException {
        int hits = -1;
        if (log.isDebugEnabled()) {
            log.debug("getHits: " + website.getName());
        }
        //TODO: JPAPort. This query retrieves both SUM(r.dayHits), SUM(r.totalHits)
        //The method only comsumes one of them. We can optimize the logic to retrieve only the 
        //requied SUM
        Query query = strategy.getNamedQuery("WeblogReferrer.getHitsByWebsite.enabled&Website.id");
        query.setParameter(1, Boolean.TRUE);
        query.setParameter(2, website.getId());
        List results = query.getResultList();

        Object[] resultsArray = (Object[]) results.get(0);

        if (resultsArray.length > 0 && type.equals(DAYHITS)) {
            if (resultsArray[0] != null) {
                hits = ((Long) resultsArray[0]).intValue();
            }
        } else if (resultsArray.length > 0) {
            if (resultsArray[0] != null) {
                hits = ((Long) resultsArray[1]).intValue();
            }
        } else {
            hits = 0;
        }

        return hits;
    }

    /**
     * Get all referers for specified weblog.
     * @param weblog
     * @return List of type WeblogReferrer
     */
    public List getReferers(Weblog weblog) throws WebloggerException {
        Query q = strategy.getNamedQuery("WeblogReferrer.getByWebsiteOrderByTotalHitsDesc");
        q.setParameter(1, weblog);
        return q.getResultList();
    }

    /**
     * Get all referers for specified user that were made today.
     * @param website Web site.
     * @return List of type WeblogReferrer
     */
    public List getTodaysReferers(Weblog website) throws WebloggerException {
        Query q = strategy.getNamedQuery("WeblogReferrer.getByWebsite&DayHitsGreaterZeroOrderByDayHitsDesc");
        q.setParameter(1, website);
        return q.getResultList();
    }

    /**
     * Get referers for a specified date.
     * @param website Web site.
     * @param date YYYYMMDD format of day's date.
     * @return List of type WeblogReferrer.
     * @throws org.apache.roller.weblogger.WebloggerException
     */
    public List getReferersToDate(Weblog website, String date) throws WebloggerException {

        if (website == null)
            throw new WebloggerException("website is null");

        if (date == null)
            throw new WebloggerException("Date is null");

        Query q = strategy.getNamedQuery("WeblogReferrer.getByWebsite&DateString&DuplicateOrderByTotalHitsDesc");
        q.setParameter(1, website);
        q.setParameter(2, date);
        q.setParameter(3, Boolean.FALSE);
        return q.getResultList();
    }

    /**
     * Get referers that refer to a specific weblog entry.
     * @param entryid Weblog entry ID
     * @return List of WeblogReferrer objects.
     * @throws org.apache.roller.weblogger.WebloggerException
     */
    public List getReferersToEntry(String entryid) throws WebloggerException {
        if (null == entryid)
            throw new WebloggerException("entryid is null");
        //TODO: DataMapperPort: Change calling code to pass WeblogEntry instead of id
        // we should change calling code to pass instance of WeblogEntry instead
        // of extracting and passing id. Once that is done, change the code below to
        // skip the load (Please note that the load below will always find the enty in cache)
        Query q = strategy
                .getNamedQuery("WeblogReferrer.getByWeblogEntry&TitleNotNull&ExcerptNotNullOrderByTotalHitsDesc");
        q.setParameter(1, strategy.load(WeblogEntry.class, entryid));
        return q.getResultList();
    }

    /**
     * Query for collection of referers.
     */
    protected List getReferersToWebsite(Weblog website, String refererUrl) throws WebloggerException {
        Query q = strategy.getNamedQuery("WeblogReferrer.getByWebsite&RefererUrl");
        q.setParameter(1, website);
        q.setParameter(2, refererUrl);
        return q.getResultList();
    }

    /**
     * Query for collection of referers.
     */
    protected List getReferersWithSameTitle(Weblog website, String requestUrl, String title, String excerpt)
            throws WebloggerException {
        Query q = strategy.getNamedQuery("WeblogReferrer.getByWebsite&RequestURL&TitleOrExcerpt");
        q.setParameter(1, website);
        q.setParameter(2, requestUrl);
        q.setParameter(3, title);
        q.setParameter(4, excerpt);
        return q.getResultList();
    }

    /**
     * Get user's day hits
     */
    public int getDayHits(Weblog website) throws WebloggerException {
        return getHits(website, DAYHITS);
    }

    /**
     * Get user's all-time total hits
     */
    public int getTotalHits(Weblog website) throws WebloggerException {
        return getHits(website, TOTALHITS);
    }

    /**
     * Retrieve referer by id.
     */
    public WeblogReferrer getReferer(String id) throws WebloggerException {
        return (WeblogReferrer) strategy.load(WeblogReferrer.class, id);
    }

    /**
     * Process an incoming referer.
     */
    public void processReferrer(String requestUrl, String referrerUrl, String weblogHandle, String entryAnchor,
            String dateString) {
        log.debug("processing referrer [" + referrerUrl + "] accessing [" + requestUrl + "]");

        if (weblogHandle == null)
            return;

        String selfSiteFragment = "/" + weblogHandle;
        Weblog weblog = null;
        WeblogEntry entry = null;

        // lookup the weblog now
        try {
            weblog = roller.getWeblogManager().getWeblogByHandle(weblogHandle);
            if (weblog == null)
                return;

            // now lookup weblog entry if possible
            if (entryAnchor != null) {
                // sanitize the anchor to avoid "Illegal mix of collations"
                entryAnchor = Utilities.replaceNonAlphanumeric(entryAnchor, ' ').trim();
                entry = roller.getWeblogEntryManager().getWeblogEntryByAnchor(weblog, entryAnchor);
            }
        } catch (WebloggerException re) {
            // problem looking up website, gotta bail
            log.error("Error looking up website object", re);
            return;
        }

        try {
            List matchRef = null;

            // try to find existing WeblogReferrer for referrerUrl
            if (referrerUrl == null || referrerUrl.trim().length() < 8) {
                referrerUrl = "direct";

                // Get referer specified by referer URL of direct
                matchRef = getReferersToWebsite(weblog, referrerUrl);
            } else {
                referrerUrl = Utilities.stripJsessionId(referrerUrl);

                // Query for referer with same referer and request URLs
                matchRef = getMatchingReferers(weblog, requestUrl, referrerUrl);

                // If referer was not found, try adding or leaving off 'www'
                if (matchRef.size() == 0) {
                    String secondTryUrl = null;
                    if (referrerUrl.startsWith("http://www")) {
                        secondTryUrl = "http://" + referrerUrl.substring(11);
                    } else {
                        secondTryUrl = "http://www" + referrerUrl.substring(7);
                    }

                    matchRef = getMatchingReferers(weblog, requestUrl, secondTryUrl);
                    if (matchRef.size() == 1) {
                        referrerUrl = secondTryUrl;
                    }
                }
            }

            if (matchRef.size() == 1) {
                // Referer was found in database, so bump up hit count
                WeblogReferrer ref = (WeblogReferrer) matchRef.get(0);

                ref.setDayHits(new Integer(ref.getDayHits().intValue() + 1));
                ref.setTotalHits(new Integer(ref.getTotalHits().intValue() + 1));

                log.debug("Incrementing hit count on existing referer: " + referrerUrl);

                saveReferer(ref);

            } else if (matchRef.size() == 0) {

                // Referer was not found in database, so new Referer object
                Integer one = new Integer(1);
                WeblogReferrer ref = new WeblogReferrer(null, weblog, entry, dateString, referrerUrl, null,
                        requestUrl, null, "", // Read comment above regarding Derby bug
                        Boolean.FALSE, Boolean.FALSE, one, one);

                if (log.isDebugEnabled()) {
                    log.debug("newReferer=" + ref.getRefererUrl());
                }

                String refurl = ref.getRefererUrl();

                // If not a direct or search engine then search for linkback
                boolean doLinkbackExtraction = WebloggerRuntimeConfig.getBooleanProperty("site.linkbacks.enabled");
                if (doLinkbackExtraction && entry != null && !refurl.equals("direct")
                        && !refurl.startsWith("http://google") && !refurl.startsWith("http://www.google")
                        && !refurl.startsWith("http://search.netscape") && !refurl.startsWith("http://www.blinkpro")
                        && !refurl.startsWith("http://search.msn") && !refurl.startsWith("http://search.yahoo")
                        && !refurl.startsWith("http://uk.search.yahoo")
                        && !refurl.startsWith("http://www.javablogs.com")
                        && !refurl.startsWith("http://www.teoma")) {
                    // Launch thread to extract referer linkback

                    try {
                        Weblogger mRoller = roller;
                        mRoller.getThreadManager().executeInBackground(new LinkbackExtractorRunnable(ref));
                    } catch (InterruptedException e) {
                        log.warn("Interrupted during linkback extraction", e);
                    }
                } else {
                    saveReferer(ref);
                }
            }
        } catch (WebloggerException pe) {
            log.error(pe);
        } catch (NullPointerException npe) {
            log.error(npe);
        }
    }

    /**
     * Use LinkbackExtractor to parse title and excerpt from referer
     */
    class LinkbackExtractorRunnable implements Runnable {

        private WeblogReferrer mReferer = null;

        public LinkbackExtractorRunnable(WeblogReferrer referer) {
            mReferer = referer;
        }

        public void run() {

            try {
                LinkbackExtractor lb = new LinkbackExtractor(mReferer.getRefererUrl(), mReferer.getRequestUrl());

                if (lb.getTitle() != null && lb.getExcerpt() != null) {
                    mReferer.setTitle(lb.getTitle());
                    mReferer.setExcerpt(lb.getExcerpt());

                    if (lb.getPermalink() != null) {
                        // The presence of a permalink indicates that this
                        // linkback was parsed out of an RSS feed and is
                        // presumed to be a good linkback.

                        mReferer.setRefererPermalink(lb.getPermalink());

                        // See if this request/permalink is in the DB
                        List matchRef = getExistingReferers(mReferer.getWebsite(), mReferer.getDateString(),
                                mReferer.getRefererPermalink());

                        // If it is the first, then set it to be visible
                        if (matchRef.size() == 0) {
                            mReferer.setVisible(Boolean.TRUE);
                        } else {
                            // We can't throw away duplicates or we will
                            // end up reparsing them everytime a hit comes
                            // in from one of them, but we can mark them
                            // as duplicates.
                            mReferer.setDuplicate(Boolean.TRUE);
                        }

                        saveReferer(mReferer);

                    }

                    else {
                        // Store the new referer
                        saveReferer(mReferer);

                        // Hacky Referer URL weighting kludge:
                        //
                        // If there are multple referers to a request URL,
                        // then we want to pick the best one. The others
                        // are marked as duplicates. To do this we use a
                        // weight. The weight formula is:
                        //
                        // w = URL length + (100 if URL contains anchor)

                        // LOOP: find the referer with the highest weight
                        Boolean visible = Boolean.FALSE;
                        List refs = getReferersWithSameTitle(mReferer.getWebsite(), mReferer.getRequestUrl(),
                                lb.getTitle(), lb.getExcerpt());
                        WeblogReferrer chosen = null;
                        int maxweight = 0;
                        for (Iterator rdItr = refs.iterator(); rdItr.hasNext();) {
                            WeblogReferrer referer = (WeblogReferrer) rdItr.next();

                            int weight = referer.getRefererUrl().length();
                            if (referer.getRefererUrl().indexOf('#') != -1) {
                                weight += 100;
                            }

                            if (weight > maxweight) {
                                chosen = referer;
                                maxweight = weight;
                            }

                            if (referer.getVisible().booleanValue()) {
                                // If any are visible then chosen
                                // replacement must be visible as well.
                                visible = Boolean.TRUE;
                            }

                        }

                        // LOOP: to mark all of the lower weight ones
                        // as duplicates
                        for (Iterator rdItr = refs.iterator(); rdItr.hasNext();) {
                            WeblogReferrer referer = (WeblogReferrer) rdItr.next();

                            if (referer != chosen) {
                                referer.setDuplicate(Boolean.TRUE);
                            } else {
                                referer.setDuplicate(Boolean.FALSE);
                                referer.setVisible(visible);
                            }
                            saveReferer(referer);
                        }

                    }
                } else {
                    // It is not a linkback, but store it anyway
                    saveReferer(mReferer);

                    log.info("No excerpt found at refering URL " + mReferer.getRefererUrl());
                }
            } catch (Exception e) {
                log.error("Processing linkback", e);
            } finally {
                strategy.release();
            }

        }

    }

    /**
     * Release all resources held by manager.
     */
    public void release() {
    }

    protected void clearDayHits() throws WebloggerException {
        Query query = strategy.getNamedUpdate("WeblogReferrer.clearDayHits");
        query.executeUpdate();
    }

    protected void clearDayHitsByWebsite(Weblog website) throws WebloggerException {
        Query query = strategy.getNamedUpdate("WeblogReferrer.clearDayHitsByWebsite");
        query.setParameter(1, website);
        query.executeUpdate();
    }

    protected List getBlackListedReferer(String[] blacklist) throws WebloggerException {
        StringBuffer queryString = getQueryStringForBlackList(blacklist);
        Query query = strategy.getDynamicQuery(queryString.toString());
        return (List) query.getResultList();
    }

    protected List getBlackListedReferer(Weblog website, String[] blacklist) throws WebloggerException {
        StringBuffer queryString = getQueryStringForBlackList(blacklist);
        queryString.append(" AND r.website = ?1 ");
        Query query = strategy.getDynamicQuery(queryString.toString());
        query.setParameter(1, website);
        return query.getResultList();
    }

    /**
     * Generates a JPQL query of form
     * SELECT r FROM WeblogReferrer r WHERE
     *     ( refererUrl like %blacklist[1] ..... OR refererUrl like %blacklist[n])
     * @param blacklist
     * @return
     */
    private StringBuffer getQueryStringForBlackList(String[] blacklist) {
        assert blacklist.length > 0;
        StringBuffer queryString = new StringBuffer("SELECT r FROM WeblogReferrer r WHERE (");
        //Search for any matching entry from blacklist[]
        final String OR = " OR ";
        for (int i = 0; i < blacklist.length; i++) {
            String ignoreWord = blacklist[i];
            //TODO: DataMapper port: original code use "like ignore case" as follows
            // or.add(Expression.ilike("refererUrl","%"+ignoreWord+"%"));
            // There is no equivalent for it in JPA
            queryString.append("r.refererUrl like '%").append(ignoreWord.trim()).append("%'").append(OR);
        }
        // Get rid of last OR
        queryString.delete(queryString.length() - OR.length(), queryString.length());
        queryString.append(" ) ");
        return queryString;
    }
}