org.infoscoop.web.MultiRssServlet.java Source code

Java tutorial

Introduction

Here is the source code for org.infoscoop.web.MultiRssServlet.java

Source

/* infoScoop OpenSource
 * Copyright (C) 2010 Beacon IT Inc.
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License version 3
 * as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/lgpl-3.0-standalone.html>.
 */

package org.infoscoop.web;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.httpclient.ConnectTimeoutException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.infoscoop.dao.RssCacheDAO;
import org.infoscoop.dao.model.Rsscache;
import org.infoscoop.request.ProxyRequest;
import org.infoscoop.request.filter.RssFilter;
import org.infoscoop.request.filter.XMLFilter;
import org.infoscoop.request.filter.rss.AtomHandler;
import org.infoscoop.request.filter.rss.RssHandler;
import org.infoscoop.request.filter.rss.RssItem;
import org.infoscoop.request.filter.rss.RssJsonResultBuilder;
import org.infoscoop.request.filter.rss.RssRefineUtil;
import org.infoscoop.request.filter.rss.SortedRssJsonResultBuilder;
import org.infoscoop.util.NoOpEntityResolver;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

/**
 * The servlet to display the sorted MultiRssReader.
 * <multiRss widgetId = "">
 *   <rss method="get" url="http://hoge/hoge.rss"/>
 * </multiRss>
 * @author hr-endoh
 *
 */
public class MultiRssServlet extends HttpServlet {
    private static final long serialVersionUID = 1L;

    private static SAXParserFactory factory;
    private static DocumentBuilderFactory builderFactory;

    public MultiRssServlet() {
        // SAXParserFactory factory = SAXParserFactory.newInstance();
        factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(true);// We add it to pick up the element of the msd namespace.
        factory.setValidating(false);

        builderFactory = DocumentBuilderFactory.newInstance();
        builderFactory.setValidating(false);
    }

    private static Log log = LogFactory.getLog(MultiRssServlet.class);

    /**
     * The doGet method of the servlet. <br>
     * 
     * This method is called when a form has its tag value method equals to get.
     * 
     * @param request
     *            the request send by the client to the server
     * @param response
     *            the response send by the server to the client
     * @throws ServletException
     *             if an error occurred
     * @throws IOException
     *             if an error occurred
     */
    public void doGet(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {
        response.setStatus(403);
    }

    /**
     * The doPost method of the servlet. <br>
     * 
     * This method is called when a form has its tag value method equals to
     * post.
     * 
     * @param request
     *            the request send by the client to the server
     * @param response
     *            the response send by the server to the client
     * @throws ServletException
     *             if an error occurred
     * @throws IOException
     *             if an error occurred
     */
    public void doPost(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {
        String uid = (String) request.getSession().getAttribute("Uid");
        if (uid == null)
            uid = request.getHeader("MSDPortal-SessionId");

        String widgetId = null;
        NodeList urlList = null;
        boolean clearCache = false;
        try {
            DocumentBuilder builder = builderFactory.newDocumentBuilder();
            builder.setEntityResolver(NoOpEntityResolver.getInstance());

            Document requestDoc = builder.parse(request.getInputStream());
            Element root = requestDoc.getDocumentElement();

            widgetId = root.getAttribute("widgetId");
            urlList = root.getElementsByTagName("rss");
            clearCache = "true".equalsIgnoreCase(root.getAttribute("clearCache"));
        } catch (Exception e) {
            log.error(e.getMessage(), e);
            response.sendError(500, e.getMessage());
            return;
        }

        if (widgetId == null) {
            log.error("Must specify widgetId in request body.");
            response.sendError(500, "Must specify widgetId in request body.");
            return;
        }

        int pageSize = -1;
        String pageSizeStr = request.getHeader("X-IS-PAGESIZE");
        if (pageSizeStr != null) {
            try {
                pageSize = Integer.parseInt(pageSizeStr);
            } catch (NumberFormatException ex) {
                log.warn("init parameter \"rssPageSize\" has unjust value");
            }
        }

        //      if( pageSize < 0 )
        //         pageSize = 20;

        String pageStr = request.getHeader("X-IS-PAGE");
        try {
            if (pageStr != null) {
                int pageNum = Integer.parseInt(pageStr);
                getPageJson(response, uid, widgetId, pageNum);
            } else {
                if (clearCache)
                    RssCacheDAO.newInstance().deleteCacheByUrl(uid, widgetId);

                mergeRssAnd2JSON(request, response, uid, widgetId, pageSize, urlList);//TODO: Should not be passed by NodeList.....
            }
        } catch (Exception e) {
            log.error("", e);
            response.sendError(500, e.getMessage());
        }
    }

    private void getPageJson(HttpServletResponse response, String uid, String widgetId, int pageNum)
            throws Exception {
        InputStream jsonStream = RssCacheDAO.newInstance().getCache(uid, widgetId, pageNum);
        if (jsonStream != null) {
            response.setHeader("Content-Type", "text/plain; charset=UTF-8");
            response.setContentLength(getStreamLength(jsonStream));

            BufferedOutputStream bos = new BufferedOutputStream(response.getOutputStream());
            byte[] b = new byte[1024];
            int c = 0;
            while ((c = jsonStream.read(b)) != -1) {
                bos.write(b, 0, c);
            }
            bos.flush();
        }
        return;
    }

    private void mergeRssAnd2JSON(HttpServletRequest request, HttpServletResponse response, String uid,
            String widgetId, int pageSize, NodeList urlList) throws Exception {

        try {
            RssJsonResultBuilder resultBuilder = new SortedRssJsonResultBuilder(pageSize);

            String dateTimeFormat = request.getHeader("X-IS-DATETIMEFORMAT");
            if (dateTimeFormat != null) {
                dateTimeFormat = URLDecoder.decode(dateTimeFormat, "UTF-8");
            }
            String freshTime = request.getHeader("X-IS-FRESHTIME");
            String maxCountString = request.getHeader("X-IS-RSSMAXCOUNT");
            int maxCount = 1000;
            if (maxCountString != null) {
                try {
                    int paramMaxCount = Integer.parseInt(maxCountString);
                    if (paramMaxCount >= 0) {
                        maxCount = paramMaxCount;
                    }
                } catch (NumberFormatException e) {
                    log.warn("rssmaxcount \"" + maxCountString + "\" isn't integer value.");
                }
            }

            // Norrowing
            String titleFilter = request.getHeader("X-IS-TITLEFILTER");
            if (titleFilter != null)
                titleFilter = URLDecoder.decode(titleFilter, "UTF-8");
            String creatorFilter = request.getHeader("X-IS-CREATORFILTER");
            if (creatorFilter != null)
                creatorFilter = URLDecoder.decode(creatorFilter, "UTF-8");
            String categoryFilter = request.getHeader("X-IS-CATEGORYFILTER");
            if (categoryFilter != null)
                categoryFilter = URLDecoder.decode(categoryFilter, "UTF-8");

            int DEFAULT_TIMEOUT = 15 * 1000;

            boolean modified = false;
            Map cacheHeaders = new HashMap();
            Map errorMap = new HashMap();
            List siteCacheHeaders = new ArrayList();
            for (int i = 0; i < urlList.getLength(); i++) {
                Element rssEl = (Element) urlList.item(i);
                String url = rssEl.getAttribute("url");

                ProxyRequest proxyRequest = new ProxyRequest(url, "NoOperation");
                proxyRequest.setLocales(request.getLocales());
                proxyRequest.setPortalUid(uid);

                int timeout = request.getIntHeader("MSDPortal-Timeout") - 1000;
                proxyRequest.setTimeout((timeout > 0) ? timeout : DEFAULT_TIMEOUT);
                //proxyRequest.setTimeout(timeout);

                proxyRequest.addIgnoreHeader("user-agent");
                proxyRequest.addIgnoreHeader("X-IS-DATETIMEFORMAT");
                proxyRequest.addIgnoreHeader("X-IS-FRESHTIME");
                proxyRequest.addIgnoreHeader("X-IS-REFRESH");
                proxyRequest.addIgnoreHeader("X-IS-RSSMAXCOUNT");
                proxyRequest.addIgnoreHeader("X-IS-PAGESIZE");

                Enumeration headers = request.getHeaderNames();
                while (headers.hasMoreElements()) {
                    String headerName = (String) headers.nextElement();
                    proxyRequest.putRequestHeader(headerName, request.getHeader(headerName));
                }

                NodeList rssChildNodes = rssEl.getElementsByTagName("header");
                for (int j = 0; j < rssChildNodes.getLength(); j++) {
                    Element header = (Element) rssChildNodes.item(j);
                    if (header.getFirstChild() != null) {
                        String name = header.getAttribute("name");
                        String value = header.getFirstChild().getNodeValue();
                        if (name == null || name.trim().length() == 0 || value == null
                                || value.trim().length() == 0)
                            continue;

                        proxyRequest.putRequestHeader(name, value);
                    }
                }

                int statusCode = 0;
                String methodType = rssEl.getAttribute("method");
                try {
                    if ("post".equals(methodType)) {
                        statusCode = proxyRequest.executePost();
                    } else {
                        statusCode = proxyRequest.executeGet();
                    }
                } catch (SocketTimeoutException ex) {
                    log.error("url: [" + url + "] socket timeout.", ex);
                    errorMap.put(url, new Integer(HttpStatusCode.MSD_SC_TIMEOUT));
                } catch (ConnectTimeoutException ex) {
                    log.error("url: [" + url + "] connection timeout.", ex);
                    errorMap.put(url, new Integer(500));
                } catch (SocketException ex) {
                    log.error("url: [" + url + "] socket error.", ex);
                    errorMap.put(url, new Integer(HttpStatus.SC_NOT_FOUND));
                } catch (IOException ex) {
                    log.error("url: [" + url + "] I/O error.", ex);
                    errorMap.put(url, new Integer(HttpStatus.SC_NOT_FOUND));
                } catch (Exception ex) {
                    log.error("url: [" + url + "]" + ex.getMessage(), ex);
                    errorMap.put(url, new Integer(500));
                }

                BufferedInputStream bis = null;
                if (errorMap.containsKey(url)) {
                    // nothing

                } else if (statusCode == 204) {
                    log.warn("url:[" + url + "] is no content #" + statusCode);
                    modified = true;
                } else if (statusCode == 304) {
                    log.warn("url:[" + url + "] is not modified #" + statusCode);
                } else if (statusCode != 200) {
                    log.error("url:[" + url + "] had error status code #" + statusCode);
                    errorMap.put(url, new Integer(statusCode));
                } else {
                    log.info("url:[" + url + "] is succed #" + statusCode);

                    try {
                        modified = true;

                        bis = new BufferedInputStream(proxyRequest.getResponseBody());

                        ByteArrayOutputStream baos = new ByteArrayOutputStream();
                        byte[] buf = new byte[10240];
                        int c;
                        while ((c = bis.read(buf)) != -1) {
                            baos.write(buf, 0, c);
                            baos.flush();
                        }
                        bis.close();

                        byte[] data = baos.toByteArray();
                        baos.close();

                        //RssCacheDAO.newInstance().insertCache(uid, widgetId+url, -1,data );

                        Map responseHeaders = proxyRequest.getResponseHeaders();
                        String lastModifiedName = "Last-Modified".toLowerCase();
                        if (responseHeaders.containsKey(lastModifiedName)) {
                            siteCacheHeaders.add(lastModifiedName);
                            siteCacheHeaders.add(responseHeaders.get(lastModifiedName));
                        }

                        String etagName = "ETag".toLowerCase();
                        if (responseHeaders.containsKey(etagName)) {
                            siteCacheHeaders.add(etagName);
                            siteCacheHeaders.add(responseHeaders.get(etagName));
                        }

                        if (siteCacheHeaders.size() > 0) {
                            cacheHeaders.put(url, siteCacheHeaders.toArray());
                            siteCacheHeaders.clear();
                        }

                        bis = new BufferedInputStream(new ByteArrayInputStream(data));
                    } catch (IOException ex) {
                        log.error("rss reading " + url + " is failed.", ex);
                        cacheHeaders.remove(url);
                        errorMap.put(url, new Integer(500));

                        bis.close();
                        bis = null;
                    }
                }

                if (bis == null)
                    continue;

                RssHandler handler;

                boolean isAtom = RssFilter.isAtom(bis);

                XMLFilter.skipEmptyLine(bis);
                if (isAtom) {
                    handler = new AtomHandler(resultBuilder, dateTimeFormat, freshTime, maxCount, titleFilter,
                            creatorFilter, categoryFilter, i);
                } else {
                    handler = new RssHandler(resultBuilder, dateTimeFormat, freshTime, maxCount, titleFilter,
                            creatorFilter, categoryFilter, i);
                }

                try {
                    XMLReader reader = factory.newSAXParser().getXMLReader();
                    reader.setEntityResolver(NoOpEntityResolver.getInstance());
                    reader.setContentHandler(handler);
                    reader.parse(new InputSource(bis));
                } catch (SAXException e) {
                    log.info("Parsing rss " + url + " is failed.", e);
                    cacheHeaders.remove(url);
                    errorMap.put(url, new Integer(HttpStatusCode.MSD_SC_CONTENT_PARSE_ERROR));
                }
            }

            if (!modified && errorMap.isEmpty()) {
                log.warn("multi rss is not modified.");
                response.setStatus(304);

                return;
            } else {
                try {
                    long freshTimeLong = new Date().getTime();
                    if (freshTime != null)
                        freshTimeLong = Long.parseLong(freshTime.trim());
                    setOldData(resultBuilder, uid, widgetId, freshTimeLong, titleFilter, creatorFilter,
                            categoryFilter);

                } catch (NumberFormatException e) {
                    log.error("", e);
                }

                //}

                int pageCount = resultBuilder.getPageCount();
                // We create the result cash by all means.
                //if( pageCount > 1 ) {
                for (int pageNum = 0; pageNum < pageCount; pageNum++) {
                    RssCacheDAO.newInstance().insertCache(uid, widgetId, pageNum, resultBuilder.getResult(pageNum));
                }
                //}
            }

            response.addHeader("Content-Type", "text/plain; charset=UTF-8");

            String result = resultBuilder.getResult();

            if (!errorMap.isEmpty()) {
                JSONObject errors = new JSONObject(errorMap);
                result = "{errors:" + errors.toString() + "," + result.substring(result.indexOf("{") + 1);
            }
            if (!cacheHeaders.isEmpty()) {
                StringBuffer cacheHeadersBuf = new StringBuffer();
                cacheHeadersBuf.append("cacheHeaders : {");
                for (Iterator keys = cacheHeaders.keySet().iterator(); keys.hasNext();) {
                    String url = (String) keys.next();
                    Object[] headers = (Object[]) cacheHeaders.get(url);

                    cacheHeadersBuf.append("\"").append(url).append("\" : {");
                    for (int i = 0; i < headers.length; i += 2) {
                        cacheHeadersBuf.append("\"").append(headers[i]).append("\"");
                        cacheHeadersBuf.append(" : '").append(headers[i + 1]).append("'");
                        if (i + 2 < headers.length)
                            cacheHeadersBuf.append(",");
                    }
                    cacheHeadersBuf.append("}");
                    if (keys.hasNext())
                        cacheHeadersBuf.append(",");
                }
                cacheHeadersBuf.append("}");

                result = "{" + cacheHeadersBuf.toString() + "," + result.substring(result.indexOf("{") + 1);
            }

            response.setContentLength(result.getBytes("UTF-8").length);

            OutputStreamWriter out = new OutputStreamWriter(response.getOutputStream(), "UTF-8");
            try {
                out.write(result);

                out.flush();
            } catch (SocketException ex) {
                // ignore client abort exception
            } finally {
                if (out != null) {
                    try {
                        out.close();
                    } catch (IOException ex) {
                        // ignore
                    }
                }
            }
        } catch (Exception e) {
            log.error("unexpected error occurred.", e);
            response.sendError(500, e.getMessage());
        }

    }

    public static void main(String args[]) {

        RssJsonResultBuilder resultBuilder = new SortedRssJsonResultBuilder(5);
        long start = System.currentTimeMillis();
        List cacheList = RssCacheDAO.newInstance().getCaches("test", "tab10001_p_nikkeiBP");
        start = System.currentTimeMillis();
        //setOldData(resultBuilder, "test", "tab10001_p_nikkeiBP", 1);
        System.out.println(System.currentTimeMillis() - start);
        System.out.println(resultBuilder.getItemCount());
    }

    private static void setOldData(RssJsonResultBuilder resultBuilder, String uid, String widgetId, long freshTime,
            String titleFilter, String creatorFilter, String categoryFilter) {
        List cacheList = RssCacheDAO.newInstance().getCaches(uid, widgetId);

        for (int i = 0; i < cacheList.size(); i++) {
            Rsscache cache = (Rsscache) cacheList.get(i);
            try {
                JSONObject rssJson = new JSONObject(new String(cache.getRss(), "UTF-8"));

                if (!rssJson.has("items"))
                    continue;

                JSONArray items = rssJson.getJSONArray("items");
                for (int j = 0; j < items.length(); j++) {
                    JSONObject itemJson = items.getJSONObject(j);
                    if (itemJson.has("otherProperties")) {
                        JSONObject props = itemJson.getJSONObject("otherProperties");
                    }

                    String creator = itemJson.has("creator") ? getJSONString(itemJson.getString("creator")) : null;
                    if (!RssRefineUtil.matchCreator(creator, creatorFilter))
                        continue;

                    String title = getJSONString(itemJson.getString("title"));
                    if (!RssRefineUtil.matchTitle(title, titleFilter))
                        continue;

                    Map propMap = new HashMap();
                    RssItem item = new RssItem(title, getJSONString(itemJson.getString("link")),
                            itemJson.has("description") ? getJSONString(itemJson.getString("description")) : null,
                            itemJson.has("dateLong") ? new Date(itemJson.getLong("dateLong")) : null,
                            itemJson.has("date") ? itemJson.getString("date") : null,
                            creator != null ? creator : "",
                            itemJson.has("creatorImg") ? getJSONString(itemJson.getString("creatorImg")) : null,
                            toList(itemJson.optJSONArray("category")), propMap);
                    itemJson.has("rssUrlIndex");
                    JSONArray urlIndex = itemJson.getJSONArray("rssUrlIndex");
                    for (int k = 0; k < urlIndex.length(); k++) {
                        item.addRssUrlIndex(new Integer(urlIndex.getInt(k)));
                    }
                    resultBuilder.addItem(freshTime, item);
                }
            } catch (UnsupportedEncodingException e) {
                log.error(e.getMessage(), e);
            } catch (JSONException e) {
                log.error(e.getMessage(), e);
                try {
                    PrintWriter pw = new PrintWriter(new FileWriter("d:\\errorJson.txt"));
                    pw.println(new String(cache.getRss(), "UTF-8"));
                    pw.close();
                } catch (IOException e1) {
                    log.error("", e1);
                }
            }
        }
    }

    private static List<String> toList(JSONArray array) {
        if (array == null || array.length() == 0)
            return null;
        List<String> list = new ArrayList<String>();
        for (int i = 0; i < array.length(); i++) {
            list.add(array.optString(i));
        }
        return list;
    }

    private static String getJSONString(String _value) {
        String value = JSONObject.quote(_value);
        return value.substring(1, value.length() - 1);
    }

    private int getStreamLength(InputStream is) throws IOException {
        is.mark(1);
        byte[] b = new byte[1024];
        int i = 0;
        int length = 0;
        while ((i = is.read(b)) != -1) {
            length += i;
        }

        is.reset();

        return length;
    }

}