at.newsagg.parser.FeedParser.java Source code

Java tutorial

Introduction

Here is the source code for at.newsagg.parser.FeedParser.java

Source

//
// Informa -- RSS Library for Java
// Copyright (c) 2002 by Niko Schmuck
//
// Niko Schmuck
// http://sourceforge.net/projects/informa
// mailto:niko_schmuck@users.sourceforge.net
//
// This library is free software.
//
// You may redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation.
//
// Version 2.1 of the license should be included with this distribution in
// the file LICENSE. If the license is not included with this distribution,
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
// MA 02139 USA.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied waranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//

// $Id: FeedParser.java,v 1.1 2005/04/21 19:41:09 vecego Exp $

package at.newsagg.parser;

import java.io.File;
import java.io.IOException;
import java.net.URL;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.xml.sax.InputSource;

import at.newsagg.model.parser.*;

//import de.nava.informa.utils.NoOpEntityResolver;

/**
 * Parser class which allows reading in of RSS news channels.
 * The concrete rules how the XML elements map to our channel object model
 * are delegated to version specific private classes.</p>
 *
 * Currently the FeedParser support RSS formats 0.9x, 1.0 (RDF) and 2.0.
 *
 * Modified for use in Springbreak. 
 * 
 *
 * @author Niko Schmuck
 * @author Roland Vecera
 */
public class FeedParser {

    private static Log logger = LogFactory.getLog(FeedParser.class);
    private RSS_0_91_ParserIF rss091;
    private RSS_1_0_ParserIF rss10;
    private RSS_2_0_ParserIF rss20;
    private Atom_0_3_ParserIF atom03;

    /**
     * @return Returns the atom3.
     */
    public Atom_0_3_ParserIF getAtom03() {
        return atom03;
    }

    /**
     * @param atom3 The atom3 to set.
     */
    public void setAtom03(Atom_0_3_ParserIF atom03) {
        this.atom03 = atom03;
    }

    /**
     * @return Returns the rss091.
     */
    public RSS_0_91_ParserIF getRss091() {
        return rss091;
    }

    /**
     * @param rss091 The rss091 to set.
     */
    public void setRss091(RSS_0_91_ParserIF rss091) {
        this.rss091 = rss091;
    }

    /**
     * @return Returns the rss10.
     */
    public RSS_1_0_ParserIF getRss10() {
        return rss10;
    }

    /**
     * @param rss10 The rss10 to set.
     */
    public void setRss10(RSS_1_0_ParserIF rss10) {
        this.rss10 = rss10;
    }

    /**
     * @return Returns the rss20.
     */
    public RSS_2_0_ParserIF getRss20() {
        return rss20;
    }

    /**
     * @param rss20 The rss20 to set.
     */
    public void setRss20(RSS_2_0_ParserIF rss20) {
        this.rss20 = rss20;
    }

    //Ende Getter/Setter
    //----------------------------------------------------------------------
    //Parser Helpers
    public ChannelIF parse(ChannelIF cBuilder, URL aURL) throws IOException, ParseException {
        return parse(cBuilder, new InputSource(aURL.toExternalForm()), aURL);
    }

    public ChannelIF parse(ChannelIF cBuilder, String url) throws IOException, ParseException {
        URL aURL = null;
        try {
            aURL = new URL(url);
        } catch (java.net.MalformedURLException e) {
            logger.warn("Could not create URL for " + url);
        }
        return parse(cBuilder, new InputSource(url), aURL);
    }

    public ChannelIF parse(ChannelIF cBuilder, File aFile) throws IOException, ParseException {
        URL aURL = null;
        try {
            aURL = aFile.toURL();
        } catch (java.net.MalformedURLException e) {
            throw new IOException("File " + aFile + " had invalid URL " + "representation.");
        }
        return parse(cBuilder, new InputSource(aURL.toExternalForm()), aURL);
    }

    public ChannelIF parse(ChannelIF cBuilder, InputSource inpSource, URL baseLocation)
            throws IOException, ParseException {
        // document reading without validation
        SAXBuilder saxBuilder = new SAXBuilder(false);
        // turn off DTD loading
        // saxBuilder.setEntityResolver(new NoOpEntityResolver());
        try {
            Document doc = saxBuilder.build(inpSource);
            ChannelIF channel = parse(cBuilder, doc);
            channel.setLocation(baseLocation);
            return channel;
        } catch (JDOMException e) {
            throw new ParseException(e);
        }
    }

    // ------------------------------------------------------------

    //
    // Eigentliche Methode hier
    // entscheidet welche RSS Variante das Document doc enthlt
    // entsprechend werden parser aufgerufen
    // ------------------------------------------------------------

    private synchronized ChannelIF parse(ChannelIF cBuilder, Document doc) throws ParseException {

        if (cBuilder == null) {
            throw new RuntimeException("Without builder no channel can " + "be created.");
        }
        logger.debug("start parsing.");
        // Get the root element (must be rss)
        Element root = doc.getRootElement();
        String rootElement = root.getName().toLowerCase();
        // Decide which parser to use
        if (rootElement.startsWith("rss")) {
            String rssVersion = root.getAttribute("version").getValue();
            if (rssVersion.indexOf("0.91") >= 0) {
                logger.info("Channel uses RSS root element (Version 0.91).");
                return rss091.parse(cBuilder, root);
            } else if (rssVersion.indexOf("0.92") >= 0) {
                logger.info("Channel uses RSS root element (Version 0.92).");
                // logger.warn("RSS 0.92 not fully supported yet, fall back to 0.91.");
                // TODO: support RSS 0.92 when aware of all subtle differences.
                return rss091.parse(cBuilder, root);
            } else if (rootElement.indexOf("0.93") >= 0) {
                logger.info("Channel uses RSS root element (Version 0.93).");
                logger.warn("RSS 0.93 not fully supported yet, fall back to 0.91.");
                // TODO: support RSS 0.93 when aware of all subtle differences.
            } else if (rootElement.indexOf("0.94") >= 0) {
                logger.info("Channel uses RSS root element (Version 0.94).");
                logger.warn("RSS 0.94 not fully supported yet, will use RSS 2.0");
                // TODO: support RSS 0.94 when aware of all subtle differences.
                return rss20.parse(cBuilder, root);
            } else if (rssVersion.indexOf("2.0") >= 0 || rssVersion.equals("2")) {
                logger.info("Channel uses RSS root element (Version 2.0).");
                return rss20.parse(cBuilder, root);
            } else {
                throw new UnsupportedFormatException("Unsupported RSS version [" + rssVersion + "].");
            }
        } else if (rootElement.indexOf("rdf") >= 0) {
            return rss10.parse(cBuilder, root);
        } else if (rootElement.indexOf("feed") >= 0) {
            String feedVersion = root.getAttribute("version").getValue();
            if (feedVersion.indexOf("0.1") >= 0 || feedVersion.indexOf("0.2") >= 0) {
                logger.info("Channel uses feed root element (Version " + feedVersion + ").");
                logger.warn("This atom version is not really supported yet, assume Atom 0.3 format");
                return atom03.parse(cBuilder, root);
            } else if (feedVersion.indexOf("0.3") >= 0) {
                logger.info("Channel uses feed root element (Version 0.3).");
                return atom03.parse(cBuilder, root);
            } else {
                throw new UnsupportedFormatException("Unsupported feed version [" + feedVersion + "].");
            }
        }

        // did not match anything
        throw new UnsupportedFormatException("Unsupported root element [" + rootElement + "].");
    }

    // ==========================================================
}