net.sasasin.sreader.batch.ContentHeaderDriver.java Source code

Java tutorial

Introduction

Here is the source code for net.sasasin.sreader.batch.ContentHeaderDriver.java

Source

/*
 * SReader is RSS/Atom feed reader with full text.
 *
 * Copyright (C) 2011, Shinnosuke Suzuki <sasasin@sasasin.net>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as 
 * published by the Free Software Foundation, either version 3 of
 * the License, or any later version.
 *   
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/>.
 */
package net.sasasin.sreader.batch;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import net.sasasin.sreader.commons.dao.ContentHeaderDao;
import net.sasasin.sreader.commons.dao.FeedUrlDao;
import net.sasasin.sreader.commons.dao.impl.ContentHeaderDaoHibernateImpl;
import net.sasasin.sreader.commons.dao.impl.FeedUrlDaoHibernateImpl;
import net.sasasin.sreader.commons.entity.ContentHeader;
import net.sasasin.sreader.commons.entity.FeedUrl;
import net.sasasin.sreader.commons.util.Md5Util;
import net.sasasin.sreader.commons.util.impl.WgetHttpComponentsImpl;

import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.SyndFeedInput;
import com.sun.syndication.io.XmlReader;

/**
 * @author sasasin
 * 
 */
public class ContentHeaderDriver {
    private static Logger logger = LoggerFactory.getLogger("net.sasasin.sreader.batch");

    private FeedUrlDao feedUrlDao = new FeedUrlDaoHibernateImpl();
    private ContentHeaderDao contentHeaderDao = new ContentHeaderDaoHibernateImpl();

    /**
     * FeedUrl??RSS???ContentHeader??
     * 
     * @param f
     * @return
     */
    public Set<ContentHeader> fetch(FeedUrl f) {
        Set<ContentHeader> c = new HashSet<ContentHeader>();
        fetchByRome(f, c);
        return c;
    }

    @SuppressWarnings("unchecked")
    private void fetchByRome(FeedUrl f, Set<ContentHeader> c) {
        try {
            // ?RSS
            InputStream is = IOUtils.toInputStream(new WgetHttpComponentsImpl(new URL(f.getUrl())).read());
            // Rome
            SyndFeed feed = new SyndFeedInput().build(new XmlReader(is));
            for (SyndEntry entry : (List<SyndEntry>) feed.getEntries()) {

                logger.info(this.getClass().getSimpleName() + " processing " + entry.getLink());

                ContentHeader ch = new ContentHeader();

                // HTTP 30xmoved?URL??
                // 30x???????new URL(entry.getLink())????
                URL entryUrl = new WgetHttpComponentsImpl(new URL(entry.getLink())).getOriginalUrl();

                ch.setUrl(entryUrl.toString());
                ch.setId(Md5Util.crypt(ch.getUrl()));
                ch.setTitle(entry.getTitle());
                ch.setFeedUrl(f);
                c.add(ch);
            }

        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (IllegalArgumentException e) {
            e.printStackTrace();
        } catch (FeedException e) {
            e.printStackTrace();
        }

    }

    public void importContentHeader(Set<ContentHeader> chs) {
        for (ContentHeader ch : chs) {
            // ????
            ContentHeader ch2 = contentHeaderDao.get(ch.getId());
            if (ch2 == null) {
                // ??????
                contentHeaderDao.save(ch);
            }
        }
    }

    public void run() {
        logger.info(this.getClass().getSimpleName() + " is started.");

        for (FeedUrl fu : feedUrlDao.findIfExistsSubscriber()) {
            // RSS/Atom feed to Set<....>
            Set<ContentHeader> s = this.fetch(fu);
            this.importContentHeader(s);
        }

        logger.info(this.getClass().getSimpleName() + " is ended.");
    }

    public static void main(String[] args) {
        // import RSS/Atom to content_header table.
        new ContentHeaderDriver().run();
    }

}