Java tutorial
/* * $HeadURL$ * $Id$ * * Copyright (c) 2007-2012 by Public Library of Science * http://plos.org * http://ambraproject.org * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ambraproject.service.trackback; import org.ambraproject.models.Article; import org.ambraproject.models.Journal; import org.ambraproject.models.Linkback; import org.ambraproject.service.hibernate.HibernateServiceImpl; import org.ambraproject.views.LinkbackView; import org.apache.commons.configuration.Configuration; import org.apache.commons.lang.StringUtils; import org.hibernate.Criteria; import org.hibernate.criterion.DetachedCriteria; import org.hibernate.criterion.Order; import org.hibernate.criterion.Projections; import org.hibernate.criterion.Restrictions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.orm.hibernate3.HibernateTemplate; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; public abstract class LinkbackServiceImpl extends HibernateServiceImpl implements LinkbackService { private static final Logger log = LoggerFactory.getLogger(LinkbackServiceImpl.class); protected static final String DOI_RESOLVER_HOST = "dx.doi.org"; private static final String DEFAULT_DOI_SCHEME = "info:doi/"; protected abstract Configuration getConfiguration(); /** * {@inheritDoc} */ @Override public BlogLinkDigest examineBlogPage(URL blogUrl, LinkValidator linkValidator) throws IOException { log.debug("Validating blog at {}", blogUrl); // Trick gets Swing's HTML parser HTMLEditorKit.Parser parser = (new HTMLEditorKit() { public Parser getParser() { return super.getParser(); } }).getParser(); // Read HTML file into string StringBuilder html = new StringBuilder(); BufferedReader bufferedReader = null; try { InputStream inputStream = blogUrl.openStream(); bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); String line; while ((line = bufferedReader.readLine()) != null) { html.append(line); } //parse the html, looking for links LinkCallback callback = new LinkCallback(linkValidator); parser.parse(new StringReader(html.toString()), callback, true); return callback.makeDigest(); } finally { //close our reader (closes all the encapsulated streams) if (bufferedReader != null) { try { bufferedReader.close(); } catch (IOException e) { log.error("Error closing buffered input reader to " + blogUrl, e); } } } } protected static String fetchJournalName(HibernateTemplate hibernateTemplate, String eIssn) { return (String) hibernateTemplate.findByCriteria(DetachedCriteria.forClass(Journal.class) .add(Restrictions.eq("eIssn", eIssn)).setProjection(Projections.property("journalKey")), 0, 1) .get(0); } @Override @SuppressWarnings("unchecked") public List<LinkbackView> getLinkbacksForArticle(String articleDoi) { return getLinkbacksForArticle(Linkback.class, articleDoi); } protected List<LinkbackView> getLinkbacksForArticle(Class<? extends Linkback> type, String articleDoi) { if (StringUtils.isEmpty(articleDoi)) { throw new IllegalArgumentException("No Doi specified"); } Long articleId; String articleTitle; try { Object[] articleRow = (Object[]) hibernateTemplate.findByCriteria( DetachedCriteria.forClass(Article.class).add(Restrictions.eq("doi", articleDoi)).setProjection( Projections.projectionList().add(Projections.id()).add(Projections.property("title"))), 0, 1).get(0); articleId = (Long) articleRow[0]; articleTitle = (String) articleRow[1]; } catch (IndexOutOfBoundsException e) { throw new IllegalArgumentException("Doi " + articleDoi + " didn't correspond to an article"); } log.debug("loading up linkbacks for article {}", articleDoi); List<? extends Linkback> linkbacks = (List<? extends Linkback>) hibernateTemplate .findByCriteria(DetachedCriteria.forClass(type).add(Restrictions.eq("articleID", articleId)) .addOrder(Order.desc("created")).setResultTransformer(Criteria.DISTINCT_ROOT_ENTITY)); List<LinkbackView> results = new ArrayList<LinkbackView>(linkbacks.size()); for (Linkback linkback : linkbacks) { results.add(new LinkbackView(linkback, articleDoi, articleTitle)); } log.info("Loaded {} linkbacks for {}", results.size(), articleDoi); return results; } @Override public int countLinkbacksForArticle(String articleDoi) { return countLinkbacksForArticle(Linkback.class, articleDoi); } protected int countLinkbacksForArticle(Class<? extends Linkback> type, String articleDoi) { if (StringUtils.isEmpty(articleDoi)) { throw new IllegalArgumentException("Didn't specify an article doi"); } Long articleId; try { articleId = (Long) hibernateTemplate.findByCriteria(DetachedCriteria.forClass(Article.class) .add(Restrictions.eq("doi", articleDoi)).setProjection(Projections.id()), 0, 1).get(0); } catch (IndexOutOfBoundsException e) { throw new IllegalArgumentException("Doi: " + articleDoi + " didn't correspond to an article"); } // Get a list of row counts, one for each subtype. Return their sum. List<? extends Number> counts = (List<? extends Number>) hibernateTemplate.findByCriteria(DetachedCriteria .forClass(type).add(Restrictions.eq("articleID", articleId)).setProjection(Projections.rowCount())); int sum = 0; for (Number count : counts) { sum += count.intValue(); } return sum; } /** * Parser callback that examines HTML (typically a blog post) to see if there is a link to the article URL in it. It * also picks up the page title, and can yield both pieces of data as a {@link BlogLinkDigest}. * <p/> * Once the parser using this callback has found enough data for a complete {@link BlogLinkDigest}, the callback will * throw a {@code ParserEarlyHaltException} to interrupt the parser. Any code calling the parser must catch (and will * generally ignore) the exception. */ protected static final class LinkCallback extends HTMLEditorKit.ParserCallback { private final LinkValidator linkValidator; private boolean atTitle = false; private URL link = null; private String title = null; private LinkCallback(LinkValidator linkValidator) { this.linkValidator = linkValidator; } //Callback method @Override public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int pos) { if (HTML.Tag.A == tag) { String href = (String) attributes.getAttribute(HTML.Attribute.HREF); if (href == null) { return; } URL blogLink; try { blogLink = new URL(href); } catch (MalformedURLException e) { return; // Ignore invalid or non-URL links } if (linkValidator.isValid(blogLink)) { this.link = blogLink; } } else if (HTML.Tag.TITLE == tag) { // Valid HTML has no elements nested in <title>, so expect the next handleText call to have the title atTitle = true; } } @Override public void handleText(char[] data, int pos) { if (atTitle) { title = String.valueOf(data); atTitle = false; } } public BlogLinkDigest makeDigest() { return new BlogLinkDigest(link, title); } } /** * Signals that we have everything we need from an external HTML page. Throw it to interrupt the parser. */ private static class ParserEarlyHaltException extends RuntimeException { private ParserEarlyHaltException() { super(); } } }