org.glimpse.server.news.dom.DomServerNewsChannelBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.glimpse.server.news.dom.DomServerNewsChannelBuilder.java

Source

/*
 * Copyright (C) 2009 Nicolas Dutertry
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package org.glimpse.server.news.dom;

import java.io.InputStream;
import java.text.DateFormatSymbols;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.glimpse.client.news.Enclosure;
import org.glimpse.server.news.ServerEntry;
import org.glimpse.server.news.ServerNewsChannel;
import org.glimpse.server.news.ServerNewsChannelBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class DomServerNewsChannelBuilder implements ServerNewsChannelBuilder {
    private static Log logger = LogFactory.getLog(DomServerNewsChannelBuilder.class);

    public ServerNewsChannel buildChannel(InputStream is) throws Exception {
        Document doc = createDocument(is);
        switch (getType(doc)) {
        case RSS_2_0:
            return new ServerNewsChannel(getRSSLink(doc), getRSSTitle(doc), getRSSEntries(doc));
        case ATOM:
            return new ServerNewsChannel(getAtomLink(doc), getAtomTitle(doc), getAtomEntries(doc));
        }
        return null;
    }

    private Document createDocument(InputStream is) throws Exception {
        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        return builder.parse(is);
    }

    private List<ServerEntry> getRSSEntries(Document doc) throws Exception {
        XPath xpath = XPathFactory.newInstance().newXPath();
        List<ServerEntry> rssEntries = new LinkedList<ServerEntry>();
        NodeList entries = (NodeList) xpath.evaluate("/rss/channel/item", doc, XPathConstants.NODESET);
        for (int i = 0; i < entries.getLength(); i++) {
            ServerEntry rssEntry = new ServerEntry();
            Element elmEntry = (Element) entries.item(i);
            String id = (String) xpath.evaluate("link", elmEntry, XPathConstants.STRING);
            String title = (String) xpath.evaluate("title", elmEntry, XPathConstants.STRING);
            String entryUrl = (String) xpath.evaluate("link", elmEntry, XPathConstants.STRING);
            String pubDate = (String) xpath.evaluate("pubDate", elmEntry, XPathConstants.STRING);

            NodeList enclosureList = (NodeList) xpath.evaluate("enclosure", elmEntry, XPathConstants.NODESET);
            List<Enclosure> enclosures = new LinkedList<Enclosure>();
            for (int j = 0; j < enclosureList.getLength(); j++) {
                Element elmEnclosure = (Element) enclosureList.item(j);
                Enclosure enclosure = new Enclosure();
                enclosure.setUrl(elmEnclosure.getAttribute("url"));
                enclosure.setType(elmEnclosure.getAttribute("type"));
                enclosures.add(enclosure);
            }

            String content = (String) xpath.evaluate("description", elmEntry, XPathConstants.STRING);
            content += "<p><a href=\"" + entryUrl + "\" target=\"_blank\">Lire la suite</a></p>";

            if (StringUtils.isBlank(id)) {
                id = pubDate;
            }
            if (StringUtils.isBlank(entryUrl)) {
                entryUrl = getRSSLink(doc);
            }

            rssEntry.setId(id);
            rssEntry.setTitle(title);
            rssEntry.setUrl(entryUrl);
            rssEntry.setEnclosures(enclosures);
            rssEntry.setContent(content);

            if (StringUtils.isNotBlank(pubDate)) {
                SimpleDateFormat formatter = new SimpleDateFormat("E, dd MMM yyyy HH:mm:ss Z");
                formatter.setTimeZone(TimeZone.getTimeZone("GMT"));
                formatter.setDateFormatSymbols(new DateFormatSymbols(Locale.ENGLISH));
                try {
                    Date date = formatter.parse(pubDate);
                    rssEntry.setDate(date);
                } catch (Exception e) {
                    // unparsable date
                    logger.warn("Unable to parse date <" + pubDate + ">", e);
                }
            }

            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(title) && StringUtils.isNotBlank(entryUrl)) {
                rssEntries.add(rssEntry);
            }
        }

        return rssEntries;
    }

    private List<ServerEntry> getAtomEntries(Document doc) throws Exception {
        XPath xpath = XPathFactory.newInstance().newXPath();
        List<ServerEntry> rssEntries = new LinkedList<ServerEntry>();
        NodeList entries = (NodeList) xpath.evaluate("/feed/entry", doc, XPathConstants.NODESET);
        for (int i = 0; i < entries.getLength(); i++) {
            ServerEntry rssEntry = new ServerEntry();
            Element elmEntry = (Element) entries.item(i);
            String id = (String) xpath.evaluate("id", elmEntry, XPathConstants.STRING);
            String title = (String) xpath.evaluate("title", elmEntry, XPathConstants.STRING);

            String entryUrl = null;
            List<Enclosure> enclosures = new LinkedList<Enclosure>();
            NodeList linkList = (NodeList) xpath.evaluate("link", elmEntry, XPathConstants.NODESET);
            if (linkList != null) {
                for (int j = 0; j < linkList.getLength(); j++) {
                    Element elmLink = (Element) linkList.item(j);
                    String rel = elmLink.getAttribute("rel");
                    if (StringUtils.isBlank(rel) || rel.equals("alternate")) {
                        entryUrl = elmLink.getAttribute("href");
                    } else if (rel.equals("enclosure")) {
                        Enclosure enclosure = new Enclosure();
                        enclosure.setUrl(elmLink.getAttribute("href"));
                        enclosure.setType(elmLink.getAttribute("type"));
                        enclosures.add(enclosure);
                    }
                }
            }
            String updated = (String) xpath.evaluate("updated", elmEntry, XPathConstants.STRING);

            String content = (String) xpath.evaluate("content", elmEntry, XPathConstants.STRING);
            if (StringUtils.isBlank(content)) {
                content = (String) xpath.evaluate("summary", elmEntry, XPathConstants.STRING);
                content += "<p><a href=\"" + entryUrl + "\" target=\"_blank\">Lire la suite</a></p>";
            }

            rssEntry.setId(id);
            rssEntry.setTitle(title);
            rssEntry.setUrl(entryUrl);
            rssEntry.setEnclosures(enclosures);
            rssEntry.setContent(content);

            if (StringUtils.length(updated) > 19) {
                try {
                    Date date = null;
                    if (updated.endsWith("Z")) {
                        // No timezone
                        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
                        formatter.setTimeZone(TimeZone.getTimeZone("GMT"));
                        date = formatter.parse(updated.substring(0, 19));
                    } else {
                        String timezone = updated.substring(updated.length() - 6);
                        timezone = timezone.substring(0, 3) + timezone.substring(4);
                        updated = updated.substring(0, 19) + timezone;
                        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
                        formatter.setTimeZone(TimeZone.getTimeZone("GMT"));
                        date = formatter.parse(updated);
                    }
                    if (date != null) {
                        rssEntry.setDate(date);
                    }
                } catch (Exception e) {
                    // unparsable date
                    logger.warn("Unable to parse date <" + updated + ">", e);
                }
            }

            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(title) && StringUtils.isNotBlank(entryUrl)) {
                rssEntries.add(rssEntry);
            }
        }

        return rssEntries;
    }

    private String getRSSTitle(Document doc) throws XPathExpressionException {
        XPath xpath = XPathFactory.newInstance().newXPath();
        return (String) xpath.evaluate("/rss/channel/title", doc, XPathConstants.STRING);
    }

    private String getAtomTitle(Document doc) throws XPathExpressionException {
        XPath xpath = XPathFactory.newInstance().newXPath();
        return (String) xpath.evaluate("/feed/title", doc, XPathConstants.STRING);
    }

    private String getRSSLink(Document doc) throws XPathExpressionException {
        XPath xpath = XPathFactory.newInstance().newXPath();
        return (String) xpath.evaluate("/rss/channel/link", doc, XPathConstants.STRING);
    }

    private String getAtomLink(Document doc) throws XPathExpressionException {
        XPath xpath = XPathFactory.newInstance().newXPath();
        return (String) xpath.evaluate("/feed/link[@rel='alternate']/@href", doc, XPathConstants.STRING);
    }

    private ServerNewsChannel.Type getType(Document doc) {
        return doc.getDocumentElement().getTagName().equals("feed") ? ServerNewsChannel.Type.ATOM
                : ServerNewsChannel.Type.RSS_2_0;
    }

}