org.ambraproject.service.trackback.LinkbackServiceImpl.java Source code

Introduction

Here is the source code for org.ambraproject.service.trackback.LinkbackServiceImpl.java
Source

/*
 * $HeadURL$
 * $Id$
 *
 * Copyright (c) 2007-2012 by Public Library of Science
 * http://plos.org
 * http://ambraproject.org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ambraproject.service.trackback;

import org.ambraproject.models.Article;
import org.ambraproject.models.Journal;
import org.ambraproject.models.Linkback;
import org.ambraproject.service.hibernate.HibernateServiceImpl;
import org.ambraproject.views.LinkbackView;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.StringUtils;
import org.hibernate.Criteria;
import org.hibernate.criterion.DetachedCriteria;
import org.hibernate.criterion.Order;
import org.hibernate.criterion.Projections;
import org.hibernate.criterion.Restrictions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.orm.hibernate3.HibernateTemplate;

import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public abstract class LinkbackServiceImpl extends HibernateServiceImpl implements LinkbackService {

    private static final Logger log = LoggerFactory.getLogger(LinkbackServiceImpl.class);
    protected static final String DOI_RESOLVER_HOST = "dx.doi.org";
    private static final String DEFAULT_DOI_SCHEME = "info:doi/";

    protected abstract Configuration getConfiguration();

    /**
     * {@inheritDoc}
     */
    @Override
    public BlogLinkDigest examineBlogPage(URL blogUrl, LinkValidator linkValidator) throws IOException {
        log.debug("Validating blog at {}", blogUrl);

        // Trick gets Swing's HTML parser
        HTMLEditorKit.Parser parser = (new HTMLEditorKit() {
            public Parser getParser() {
                return super.getParser();
            }
        }).getParser();

        // Read HTML file into string
        StringBuilder html = new StringBuilder();
        BufferedReader bufferedReader = null;
        try {
            InputStream inputStream = blogUrl.openStream();
            bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
            String line;
            while ((line = bufferedReader.readLine()) != null) {
                html.append(line);
            }

            //parse the html, looking for links
            LinkCallback callback = new LinkCallback(linkValidator);
            parser.parse(new StringReader(html.toString()), callback, true);
            return callback.makeDigest();
        } finally {
            //close our reader (closes all the encapsulated streams)
            if (bufferedReader != null) {
                try {
                    bufferedReader.close();
                } catch (IOException e) {
                    log.error("Error closing buffered input reader to " + blogUrl, e);
                }
            }
        }
    }

    protected static String fetchJournalName(HibernateTemplate hibernateTemplate, String eIssn) {
        return (String) hibernateTemplate.findByCriteria(DetachedCriteria.forClass(Journal.class)
                .add(Restrictions.eq("eIssn", eIssn)).setProjection(Projections.property("journalKey")), 0, 1)
                .get(0);
    }

    @Override
    @SuppressWarnings("unchecked")
    public List<LinkbackView> getLinkbacksForArticle(String articleDoi) {
        return getLinkbacksForArticle(Linkback.class, articleDoi);
    }

    protected List<LinkbackView> getLinkbacksForArticle(Class<? extends Linkback> type, String articleDoi) {
        if (StringUtils.isEmpty(articleDoi)) {
            throw new IllegalArgumentException("No Doi specified");
        }
        Long articleId;
        String articleTitle;
        try {
            Object[] articleRow = (Object[]) hibernateTemplate.findByCriteria(
                    DetachedCriteria.forClass(Article.class).add(Restrictions.eq("doi", articleDoi)).setProjection(
                            Projections.projectionList().add(Projections.id()).add(Projections.property("title"))),
                    0, 1).get(0);
            articleId = (Long) articleRow[0];
            articleTitle = (String) articleRow[1];
        } catch (IndexOutOfBoundsException e) {
            throw new IllegalArgumentException("Doi " + articleDoi + " didn't correspond to an article");
        }
        log.debug("loading up linkbacks for article {}", articleDoi);

        List<? extends Linkback> linkbacks = (List<? extends Linkback>) hibernateTemplate
                .findByCriteria(DetachedCriteria.forClass(type).add(Restrictions.eq("articleID", articleId))
                        .addOrder(Order.desc("created")).setResultTransformer(Criteria.DISTINCT_ROOT_ENTITY));
        List<LinkbackView> results = new ArrayList<LinkbackView>(linkbacks.size());
        for (Linkback linkback : linkbacks) {
            results.add(new LinkbackView(linkback, articleDoi, articleTitle));
        }

        log.info("Loaded {} linkbacks for {}", results.size(), articleDoi);
        return results;
    }

    @Override
    public int countLinkbacksForArticle(String articleDoi) {
        return countLinkbacksForArticle(Linkback.class, articleDoi);
    }

    protected int countLinkbacksForArticle(Class<? extends Linkback> type, String articleDoi) {
        if (StringUtils.isEmpty(articleDoi)) {
            throw new IllegalArgumentException("Didn't specify an article doi");
        }
        Long articleId;
        try {
            articleId = (Long) hibernateTemplate.findByCriteria(DetachedCriteria.forClass(Article.class)
                    .add(Restrictions.eq("doi", articleDoi)).setProjection(Projections.id()), 0, 1).get(0);
        } catch (IndexOutOfBoundsException e) {
            throw new IllegalArgumentException("Doi: " + articleDoi + " didn't correspond to an article");
        }

        // Get a list of row counts, one for each subtype. Return their sum.
        List<? extends Number> counts = (List<? extends Number>) hibernateTemplate.findByCriteria(DetachedCriteria
                .forClass(type).add(Restrictions.eq("articleID", articleId)).setProjection(Projections.rowCount()));
        int sum = 0;
        for (Number count : counts) {
            sum += count.intValue();
        }
        return sum;
    }

    /**
     * Parser callback that examines HTML (typically a blog post) to see if there is a link to the article URL in it. It
     * also picks up the page title, and can yield both pieces of data as a {@link BlogLinkDigest}.
     * <p/>
     * Once the parser using this callback has found enough data for a complete {@link BlogLinkDigest}, the callback will
     * throw a {@code ParserEarlyHaltException} to interrupt the parser. Any code calling the parser must catch (and will
     * generally ignore) the exception.
     */
    protected static final class LinkCallback extends HTMLEditorKit.ParserCallback {

        private final LinkValidator linkValidator;

        private boolean atTitle = false;
        private URL link = null;
        private String title = null;

        private LinkCallback(LinkValidator linkValidator) {
            this.linkValidator = linkValidator;
        }

        //Callback method
        @Override
        public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int pos) {
            if (HTML.Tag.A == tag) {
                String href = (String) attributes.getAttribute(HTML.Attribute.HREF);
                if (href == null) {
                    return;
                }

                URL blogLink;
                try {
                    blogLink = new URL(href);
                } catch (MalformedURLException e) {
                    return; // Ignore invalid or non-URL links
                }
                if (linkValidator.isValid(blogLink)) {
                    this.link = blogLink;
                }
            } else if (HTML.Tag.TITLE == tag) {
                // Valid HTML has no elements nested in <title>, so expect the next handleText call to have the title
                atTitle = true;
            }
        }

        @Override
        public void handleText(char[] data, int pos) {
            if (atTitle) {
                title = String.valueOf(data);
                atTitle = false;
            }
        }

        public BlogLinkDigest makeDigest() {
            return new BlogLinkDigest(link, title);
        }

    }

    /**
     * Signals that we have everything we need from an external HTML page. Throw it to interrupt the parser.
     */
    private static class ParserEarlyHaltException extends RuntimeException {
        private ParserEarlyHaltException() {
            super();
        }
    }

}