com.none.tom.simplerssreader.opml.OPMLParser.java Source code

Java tutorial

Introduction

Here is the source code for com.none.tom.simplerssreader.opml.OPMLParser.java

Source

// Copyright (c) 2017-2018, Tom Geiselmann
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
// and associated documentation files (the "Software"), to deal in the Software without restriction,
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY,WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package com.none.tom.simplerssreader.opml;

import android.text.TextUtils;
import android.util.Xml;
import android.webkit.URLUtil;

import com.none.tom.simplerssreader.utils.XmlPullParserUtils;

import org.apache.commons.text.StringEscapeUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

import static com.none.tom.simplerssreader.opml.Constants.*;

@SuppressWarnings({ "StatementWithEmptyBody", "SameParameterValue", "unused", "WeakerAccess" })
public class OPMLParser {
    public static final int MODE_DEFAULT = 0;
    public static final int MODE_VALIDATE_SUBSCRIPTION_LIST = 1;

    public static OPMLFile parse(final InputStream in, final int mode)
            throws OPMLParserException, XmlPullParserException, IOException {
        final XmlPullParser parser = Xml.newPullParser();

        try {
            parser.setInput(in, null);
            parser.nextTag();

            final String encoding = parser.getInputEncoding();

            // <opml> must contain only one attribute; "version"
            if (TextUtils.isEmpty(encoding) || !encoding.equalsIgnoreCase(StandardCharsets.UTF_8.name())
                    || parser.getAttributeCount() != 1 || TextUtils.isEmpty(validateVersion(parser, false))) {
                throw new OPMLParserException(parser);
            }

            return parseOPML(parser, mode);
        } finally {
            if (in != null) {
                in.close();
            }
        }
    }

    private static OPMLFile parseOPML(final XmlPullParser parser, final int mode)
            throws OPMLParserException, XmlPullParserException, IOException {
        parser.require(XmlPullParser.START_TAG, null, OPML);

        final OPMLFile file = new OPMLFile();
        final String version = parser.getAttributeValue(null, VERSION);

        while (parser.next() != XmlPullParser.END_TAG) {
            if (parser.getEventType() != XmlPullParser.START_TAG) {
                continue;
            }

            switch (parser.getName()) {
            case HEAD:
                file.setHead(parseHead(parser, version));
                break;
            default:
                file.setBody(parseBody(parser, file.getHead(), version, mode));
                break;
            }
        }

        // Both <head> and <body> are required
        if (file.isEmpty()) {
            throw new OPMLParserException(parser);
        }

        parser.require(XmlPullParser.END_TAG, null, OPML);

        return file;
    }

    private static OPMLFile.Head parseHead(final XmlPullParser parser, final String version)
            throws OPMLParserException, XmlPullParserException, IOException {
        parser.require(XmlPullParser.START_TAG, null, HEAD);

        final OPMLFile.Head.Builder head = new OPMLFile.Head.Builder();

        while (parser.next() != XmlPullParser.END_TAG) {
            if (parser.getEventType() != XmlPullParser.START_TAG) {
                continue;
            }

            switch (parser.getName()) {
            case DATE_CREATED:
                parser.require(XmlPullParser.START_TAG, null, DATE_CREATED);
                head.setDateCreated(validateDateTime(parser, version));
                parser.require(XmlPullParser.END_TAG, null, DATE_CREATED);
                break;
            case DATE_MODIFIED:
                parser.require(XmlPullParser.START_TAG, null, DATE_MODIFIED);
                head.setDateModified(validateDateTime(parser, version));
                parser.require(XmlPullParser.END_TAG, null, DATE_MODIFIED);
                break;
            case DOCS:
                parser.require(XmlPullParser.START_TAG, null, DOCS);
                head.setDocs(validateUrl(parser, XmlPullParserUtils.getText(parser), 0));
                parser.require(XmlPullParser.END_TAG, null, DOCS);
                break;
            case EXPANSION_STATE:
                parser.require(XmlPullParser.START_TAG, null, EXPANSION_STATE);
                head.setExpansionState(validateExpansionState(parser));
                parser.require(XmlPullParser.END_TAG, null, EXPANSION_STATE);
                break;
            case OWNER_EMAIL:
                parser.require(XmlPullParser.START_TAG, null, OWNER_EMAIL);
                head.setOwnerEmail(XmlPullParserUtils.getText(parser));
                parser.require(XmlPullParser.END_TAG, null, OWNER_EMAIL);
                break;
            case OWNER_ID:
                parser.require(XmlPullParser.START_TAG, null, OWNER_ID);
                head.setOwnerId(validateUrl(parser, XmlPullParserUtils.getText(parser), 0));
                parser.require(XmlPullParser.END_TAG, null, OWNER_ID);
                break;
            case OWNER_NAME:
                parser.require(XmlPullParser.START_TAG, null, OWNER_NAME);
                head.setOwnerName(XmlPullParserUtils.getText(parser));
                parser.require(XmlPullParser.END_TAG, null, OWNER_NAME);
                break;
            case TITLE:
                parser.require(XmlPullParser.START_TAG, null, TITLE);
                head.setTitle(XmlPullParserUtils.getText(parser));
                parser.require(XmlPullParser.END_TAG, null, TITLE);
                break;
            case VERT_SCROLL_STATE:
                parser.require(XmlPullParser.START_TAG, null, VERT_SCROLL_STATE);
                head.setVertScrollState(validateInteger(parser));
                parser.require(XmlPullParser.END_TAG, null, VERT_SCROLL_STATE);
                break;
            case WINDOW_BOTTOM:
                parser.require(XmlPullParser.START_TAG, null, WINDOW_BOTTOM);
                head.setWindowBottom(validateInteger(parser));
                parser.require(XmlPullParser.END_TAG, null, WINDOW_BOTTOM);
                break;
            case WINDOW_LEFT:
                parser.require(XmlPullParser.START_TAG, null, WINDOW_LEFT);
                head.setWindowLeft(validateInteger(parser));
                parser.require(XmlPullParser.END_TAG, null, WINDOW_LEFT);
                break;
            case WINDOW_RIGHT:
                parser.require(XmlPullParser.START_TAG, null, WINDOW_RIGHT);
                head.setWindowRight(validateInteger(parser));
                parser.require(XmlPullParser.END_TAG, null, WINDOW_RIGHT);
                break;
            case WINDOW_TOP:
                parser.require(XmlPullParser.START_TAG, null, WINDOW_TOP);
                head.setWindowTop(validateInteger(parser));
                parser.require(XmlPullParser.END_TAG, null, WINDOW_TOP);
                break;
            default:
                XmlPullParserUtils.skip(parser);
                break;

            }
        }

        parser.require(XmlPullParser.END_TAG, null, HEAD);

        return head.build();
    }

    private static OPMLFile.Body parseBody(final XmlPullParser parser, final OPMLFile.Head head,
            final String version, final int mode) throws OPMLParserException, XmlPullParserException, IOException {
        parser.require(XmlPullParser.START_TAG, null, BODY);

        final OPMLFile.Body.Builder body = new OPMLFile.Body.Builder();

        while (parser.next() != XmlPullParser.END_TAG) {
            if (parser.getEventType() != XmlPullParser.START_TAG) {
                continue;
            }

            final Object result = parseOutlineOrOutlineGroup(parser, head, version, mode);

            if (result instanceof OutlineGroup) {
                body.addOutlineGroup(((OutlineGroup) result));
            } else {
                body.addOutline((Outline) result);
            }
        }

        parser.require(XmlPullParser.END_TAG, null, BODY);

        final OPMLFile.Body result = body.build();

        // <body> must contain at least one <outline>
        if (result.isEmpty()) {
            throw new OPMLParserException(parser);
        }

        return result;
    }

    private static Object parseOutlineOrOutlineGroup(final XmlPullParser parser, final OPMLFile.Head head,
            final String version, final int mode) throws OPMLParserException, XmlPullParserException, IOException {
        parser.require(XmlPullParser.START_TAG, null, OUTLINE);

        Outline outline = getOutline(parser, version, mode);

        if (TextUtils.isEmpty(outline.getXmlUrl())) {
            final OutlineGroup.Builder outlineGroup = new OutlineGroup.Builder().setText(outline.getText())
                    .setTitle(outline.getTitle());

            while (parser.next() != XmlPullParser.END_TAG) {
                if (parser.getEventType() != XmlPullParser.START_TAG) {
                    continue;
                }

                if (parser.getName().equals(OUTLINE)) {
                    parser.require(XmlPullParser.START_TAG, null, OUTLINE);

                    outline = getOutline(parser, version, mode);
                    validateSubscriptionListOutline(parser, outline, mode);
                    while (parser.next() != XmlPullParser.END_TAG) {
                    }

                    parser.require(XmlPullParser.END_TAG, null, OUTLINE);

                    outlineGroup.addSubOutline(outline);
                }
            }

            parser.require(XmlPullParser.END_TAG, null, OUTLINE);

            return outlineGroup.build();
        }

        validateSubscriptionListOutline(parser, outline, mode);
        while (parser.next() != XmlPullParser.END_TAG) {
        }

        parser.require(XmlPullParser.END_TAG, null, OUTLINE);

        return outline;
    }

    private static Outline getOutline(final XmlPullParser parser, final String version, final int mode)
            throws OPMLParserException, XmlPullParserException, IOException {
        return new Outline.Builder().setCategory(validateCategory(parser))
                .setCreated(validateDateTime(parser, version, CREATED)).setDescription(validateDescription(parser))
                .setHtmlUrl(validateUrl(parser, HTML_URL, mode))
                .setIsBreakPoint(validateBoolean(parser, IS_BREAKPOINT))
                .setIsComment(validateBoolean(parser, IS_COMMENT))
                .setLanguage(parser.getAttributeValue(null, LANGUAGE)).setText(validateText(parser))
                .setTitle(validateTitle(parser)).setType(validateType(parser, mode))
                .setUrl(validateUrl(parser, URL, mode)).setVersion(validateVersion(parser, true))
                .setXmlUrl(validateUrl(parser, XML_URL, mode)).build();
    }

    private static Boolean validateBoolean(final XmlPullParser parser, final String element)
            throws OPMLParserException {
        final String value = parser.getAttributeValue(null, element);

        if (!TextUtils.isEmpty(value)) {
            if (!(value.equals("false") || value.equals("true"))) {
                throw new OPMLParserException(parser);
            }
        }

        return Boolean.valueOf(value);
    }

    private static List<String> validateCategory(final XmlPullParser parser) throws OPMLParserException {
        final String category = StringEscapeUtils.unescapeXml(parser.getAttributeValue(null, CATEGORY));

        if (!TextUtils.isEmpty(category)) {
            final String[] categories = category.split(",");
            final List<String> result = new ArrayList<>(categories.length);

            for (final String subCategory : categories) {
                result.add(subCategory.substring(1));
            }

            return result;
        }

        return null;
    }

    private static DateTime validateDateTime(final XmlPullParser parser, final String version,
            final String... element) throws OPMLParserException, XmlPullParserException, IOException {
        final String value;

        if (element.length > 0) {
            value = parser.getAttributeValue(null, element[0]);
        } else {
            value = XmlPullParserUtils.getText(parser);
        }

        if (!TextUtils.isEmpty(value)) {
            if (version.equals("1.0") || version.equals("1.1")) {
                try {
                    return DateTimeFormat.forPattern(RFC_822_EXTENDED[1]).parseDateTime(value);
                } catch (final IllegalArgumentException e) {
                    throw new OPMLParserException(parser);
                }
            } else {
                for (final String pattern : RFC_822_EXTENDED) {
                    try {
                        return DateTimeFormat.forPattern(pattern).parseDateTime(value);
                    } catch (final IllegalArgumentException ignored) {
                    }
                }

                throw new OPMLParserException(parser);
            }
        }

        return null;
    }

    private static String validateDescription(final XmlPullParser parser) {
        return StringEscapeUtils.unescapeXml(parser.getAttributeValue(null, DESCRIPTION));
    }

    private static List<Integer> validateExpansionState(final XmlPullParser parser)
            throws OPMLParserException, XmlPullParserException, IOException {
        final String expansionState = XmlPullParserUtils.getText(parser);

        if (!TextUtils.isEmpty(expansionState)) {
            final String[] expansionStates = expansionState.split(",");
            final List<Integer> expansionStateInts = new ArrayList<>(expansionStates.length);

            for (final String state : expansionStates) {
                expansionStateInts.add(validateInteger(parser, state));
            }

            return expansionStateInts;
        }

        return null;
    }

    private static Integer validateInteger(final XmlPullParser parser, final String... element)
            throws OPMLParserException, XmlPullParserException, IOException {
        final String value;

        if (element.length > 0) {
            value = parser.getAttributeValue(null, element[0]);
        } else {
            value = XmlPullParserUtils.getText(parser);
        }

        if (!TextUtils.isEmpty(value)) {
            try {
                return Integer.parseInt(value);
            } catch (final NumberFormatException e) {
                throw new OPMLParserException(parser);
            }
        }

        return 0;
    }

    private static void validateSubscriptionListOutline(final XmlPullParser parser, final Outline outline,
            final int mode) throws OPMLParserException {
        if (mode == MODE_VALIDATE_SUBSCRIPTION_LIST
                && (TextUtils.isEmpty(outline.getType()) || TextUtils.isEmpty(outline.getXmlUrl()))) {
            throw new OPMLParserException(parser);
        }
    }

    private static String validateText(final XmlPullParser parser) throws OPMLParserException {
        final String text = StringEscapeUtils.unescapeXml(parser.getAttributeValue(null, TEXT));

        if (!TextUtils.isEmpty(text)) {
            return StringEscapeUtils.unescapeXml(text);
        } else {
            throw new OPMLParserException(parser);
        }
    }

    private static String validateTitle(final XmlPullParser parser) throws OPMLParserException {
        final String title = StringEscapeUtils.unescapeXml(parser.getAttributeValue(null, TITLE));

        if (!TextUtils.isEmpty(title)) {
            return StringEscapeUtils.unescapeXml(title);
        } else {
            throw new OPMLParserException(parser);
        }
    }

    private static String validateType(final XmlPullParser parser, final int mode) throws OPMLParserException {
        final String type = parser.getAttributeValue(null, TYPE);

        if (!TextUtils.isEmpty(type)) {
            if (!type.equals("rss") || ((type.equals("link") || type.equals("include"))
                    && TextUtils.isEmpty(validateUrl(parser, parser.getAttributeValue(null, URL), mode)))) {
                throw new OPMLParserException(parser);
            }
        }

        return type;
    }

    private static String validateUrl(final XmlPullParser parser, final String element, final int mode)
            throws OPMLParserException {
        final String url = StringEscapeUtils.unescapeXml(parser.getAttributeValue(null, element));

        if (!TextUtils.isEmpty(url)) {
            if (!URLUtil.isValidUrl(url)) {
                throw new OPMLParserException(parser);
            }
        }

        return url;
    }

    private static String validateVersion(final XmlPullParser parser, final boolean inOutline)
            throws OPMLParserException {
        final String version = parser.getAttributeValue(null, VERSION);

        if (!TextUtils.isEmpty(version)) {
            if ((inOutline && !(version.equals("RSS1") || version.equals("RSS")))
                    || (!inOutline && !(version.equals("1.0") || version.equals("1.1") || version.equals("2.0")))) {
                throw new OPMLParserException(parser);
            }
        }

        return version;
    }
}