at.newsagg.utils.ParserUtils.java Source code

Java tutorial

Introduction

Here is the source code for at.newsagg.utils.ParserUtils.java

Source

//
// // Informa -- RSS Library for Java
// Copyright (c) 2002 by Niko Schmuck
//
// Niko Schmuck
// http://sourceforge.net/projects/informa
// mailto:niko_schmuck@users.sourceforge.net
//
// This library is free software.
//
// You may redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation.
//
// Version 2.1 of the license should be included with this distribution in
// the file LICENSE. If the license is not included with this distribution,
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
// MA 02139 USA.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied waranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//

// $Id: ParserUtils.java,v 1.4 2005/09/16 13:05:51 vecego Exp $

package at.newsagg.utils;

import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jdom.Element;
import org.jdom.Namespace;

/**
 * Utility class providing convenience methods to (XML) parsing mechanisms.
 * 
 * @author Niko Schmuck (niko@nava.de)
 */
public final class ParserUtils {

    private static Log logger = LogFactory.getLog(ParserUtils.class);

    private ParserUtils() {
    }

    public static URL getURL(String toURL) {
        URL result = null;
        try {
            if ((toURL != null) && (toURL.trim().length() > 0))
                result = new URL(toURL);
        } catch (java.net.MalformedURLException e) {
            logger.warn("Invalid URL " + toURL + " given.");
        }
        return result;
    }

    public static Namespace getDefaultNS(Element element) {
        return getNamespace(element, "");
    }

    public static Namespace getNamespace(Element element, String prefix) {
        //    Namespace ns = null;
        //    Iterator it = element.getAdditionalNamespaces().iterator();
        //    while (it.hasNext()) {
        //      Namespace curNS = (Namespace) it.next();
        //      if (curNS.getPrefix().equals(prefix)) {
        //        ns = curNS;
        //        break;
        //      }
        //    }
        Namespace ns = (prefix == null) ? element.getNamespace("") : element.getNamespace(prefix);
        return ns;
    }

    private static SimpleDateFormat[] dateFormats = null;

    static {
        final String[] possibleDateFormats = { "EEE, dd MMM yyyy HH:mm:ss z", //RFC_822
                "yyyy-MM-dd'T'HH:mm:ssZ", "yyyy-MM-dd'T'HH:mm:sszzzz", "yyyy-MM-dd'T'HH:mm:ss z",
                "yyyy-MM-dd'T'HH:mm:ssz", //ISO_8601
                "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd'T'HHmmss.SSSz", "yyyy-MM-dd'T'HH:mm:ss'Z'",
                "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd" };

        dateFormats = new SimpleDateFormat[possibleDateFormats.length];
        TimeZone gmtTZ = TimeZone.getTimeZone("GMT");
        for (int i = 0; i < possibleDateFormats.length; i++) {
            dateFormats[i] = new SimpleDateFormat(possibleDateFormats[i], Locale.ENGLISH);
            dateFormats[i].setTimeZone(gmtTZ);
        }

    }

    // Mon, 07 Oct 2002 03:16:15 GMT
    private static SimpleDateFormat dfA = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH);

    // 2002-09-19T02:51:16+0200
    private static SimpleDateFormat dfB = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");

    // 2002-09-19T02:51:16
    private static SimpleDateFormat dfC = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");

    // 2002-09-19
    private static SimpleDateFormat dfD = new SimpleDateFormat("yyyy-MM-dd");

    public static Date getDate(String strdate) {
        Date result = new Date();
        strdate = strdate.trim();
        if (strdate.length() > 10) {

            // TODO deal with +4:00 (no zero before hour)
            if ((strdate.substring(strdate.length() - 5).indexOf("+") == 0
                    || strdate.substring(strdate.length() - 5).indexOf("-") == 0)
                    && strdate.substring(strdate.length() - 5).indexOf(":") == 2) {

                String sign = strdate.substring(strdate.length() - 5, strdate.length() - 4);

                strdate = strdate.substring(0, strdate.length() - 5) + sign + "0"
                        + strdate.substring(strdate.length() - 4);
                // logger.debug("CASE1 : new date " + strdate + " ? "
                //    + strdate.substring(0, strdate.length() - 5));

            }

            String dateEnd = strdate.substring(strdate.length() - 6);

            // try to deal with -05:00 or +02:00 at end of date
            // replace with -0500 or +0200
            if ((dateEnd.indexOf("-") == 0 || dateEnd.indexOf("+") == 0) && dateEnd.indexOf(":") == 3) {
                // TODO deal with GMT-00:03
                if ("GMT".equals(strdate.substring(strdate.length() - 9, strdate.length() - 6))) {
                    logger.debug("General time zone with offset, no change ");
                } else {
                    // continue treatment
                    String oldDate = strdate;
                    String newEnd = dateEnd.substring(0, 3) + dateEnd.substring(4);
                    strdate = oldDate.substring(0, oldDate.length() - 6) + newEnd;
                    // logger.debug("!!modifying string ->"+strdate);
                }
            }
        }
        int i = 0;
        while (i < dateFormats.length) {
            try {
                result = dateFormats[i].parse(strdate);
                // logger.debug("******Parsing Success "+strdate+"->"+result+" with
                // "+dateFormats[i].toPattern());
                break;
            } catch (java.text.ParseException eA) {
                logger.debug("parsing " + strdate + " [" + dateFormats[i].toPattern()
                        + "] without success, trying again.");
                i++;
            }
        }

        return result;
    }

    /**
     * Tries different date formats to parse against the given string
     * representation to retrieve a valid Date object.
     */
    public static Date getDateOLD(String strdate) {
        Date result = null;

        try {
            result = dfA.parse(strdate);
        } catch (java.text.ParseException eA) {
            logger.warn("Error parsing date (A): " + eA.getMessage());
            try {
                result = dfB.parse(strdate);
            } catch (java.text.ParseException eB) {
                logger.warn("Error parsing date (B): " + eB.getMessage());
                try {
                    result = dfC.parse(strdate);
                    // try to retrieve the timezone anyway
                    result = extractTimeZone(strdate, result);
                } catch (java.text.ParseException eC) {
                    logger.warn("Error parsing date (C): " + eC.getMessage());
                    try {
                        result = dfD.parse(strdate);
                    } catch (java.text.ParseException eD) {
                        logger.warn("Error parsing date (D): " + eD.getMessage());
                        eD.printStackTrace();
                    }
                }
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Parsing date '" + strdate + "' resulted in: " + result);
        }
        if (result == null) {
            logger.warn("No appropiate date could be extracted from " + strdate);

        }
        return result;
    }

    private static Date extractTimeZone(String strdate, Date thedate) {
        // try to extract -06:00
        String tzSign = strdate.substring(strdate.length() - 6, strdate.length() - 5);
        String tzHour = strdate.substring(strdate.length() - 5, strdate.length() - 3);
        String tzMin = strdate.substring(strdate.length() - 2);
        if (tzSign.equals("-") || tzSign.equals("+")) {
            int h = Integer.parseInt(tzHour);
            int m = Integer.parseInt(tzMin);
            // NOTE: this is really plus, since perspective is from GMT
            if (tzSign.equals("+")) {
                h = -1 * h;
                m = -1 * m;
            }
            Calendar cal = Calendar.getInstance();
            cal.setTime(thedate);
            cal.add(Calendar.HOUR_OF_DAY, h);
            cal.add(Calendar.MINUTE, m);
            // calculate according the used timezone
            cal.add(Calendar.MILLISECOND, localTimeDiff(cal.getTimeZone(), thedate));
            thedate = cal.getTime();
        }
        return thedate;
    }

    private static int localTimeDiff(TimeZone tz, Date date) {
        if (tz.inDaylightTime(date)) {
            int dstSavings = 0;
            if (tz.useDaylightTime()) {
                dstSavings = 3600000; // shortcut, JDK 1.4 allows cleaner impl
            }
            return tz.getRawOffset() + dstSavings;
        }
        return tz.getRawOffset();
    }

    public static String formatDate(Date aDate) {
        return dfA.format(aDate);
    }

    public static String decodeBase64(String s) {
        //use private class
        return Base64Decoder.decode(s);
    }

    public static String escapeTags(String s) {
        String value = s;
        value = value.replaceAll("<", "&lt;");
        value = value.replaceAll(">", "&gt;");
        return value;
    }

    public static String unEscape(String s) {
        String value = s;
        value = value.replaceAll("&lt;", "<");
        value = value.replaceAll("&gt;", ">");
        value = value.replaceAll("&amp;", "&");
        value = value.replaceAll("&quot;", "\"");
        value = value.replaceAll("&apos;", "'");
        return value;
    }

}