com.celamanzi.liferay.portlets.rails286.HeadProcessor.java Source code

Introduction

Here is the source code for com.celamanzi.liferay.portlets.rails286.HeadProcessor.java
Source

/**
 * Copyright (c) 2008 Mikael Lammentausta
 *               2010 Boris Kirillov
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package com.celamanzi.liferay.portlets.rails286;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.Node;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.MetaTag;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.tags.StyleTag;
import org.htmlparser.tags.TitleTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;

public class HeadProcessor {

    // logger
    private final Log log = LogFactory.getLog(getClass().getName());

    // class variables
    private java.net.URL baseUrl = null;
    private String servlet = null;
    private String location = null; // baseURl + servlet
    private String namespace = "";

    // the page header information is stored into these variables.
    protected String title = "";
    protected String content_type = null;
    protected String encoding = null;
    private String[] javascript_blacklist = { "jquery" }; // regexps

    /** Constructors */
    protected HeadProcessor(String servlet, String namespace) {
        this.servlet = servlet;
        this.namespace = namespace;
        this.location = this.servlet;
        log.debug("Initializing HeadProcessor @: " + this.location);
    }

    protected HeadProcessor(String servlet, java.net.URL baseUrl, String namespace) {
        this.servlet = servlet;
        this.baseUrl = baseUrl;
        this.namespace = namespace;
        this.location = baseUrl.getProtocol() + "://" + baseUrl.getHost() + ":" + baseUrl.getPort() + servlet;
        log.debug("Initializing HeadProcessor @: " + this.location);
    }

    /** Parses <head>.
     *
     * Replaces <html><head> with <div id="%namespace%_head">.
     * Extracts title, some of the metadata, JavaScript and CSS tags.
     *
     */
    protected NodeList process(NodeList head) throws ParserException, Exception {
        String headString = "\n<div id=\"" + namespace + "_head\">\n";

        /** Extract <title> */
        NodeList titletags = head.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class), true);
        log.debug(titletags.size() + " title tags found");
        for (SimpleNodeIterator i = titletags.elements(); i.hasMoreNodes();) {
            TitleTag n = (TitleTag) i.nextNode();
            title += n.getTitle();
            log.debug(title);
        }

        /** Parse <meta> */
        NodeList meta = head.extractAllNodesThatMatch(new NodeClassFilter(MetaTag.class), true);
        log.debug(meta.size() + " meta tags found");
        for (SimpleNodeIterator i = meta.elements(); i.hasMoreNodes();) {
            MetaTag n = (MetaTag) i.nextNode();
            String nCheck = n.getHttpEquiv();
            if (nCheck != null && nCheck.equals("content-type")) {
                log.debug("Parsing meta content-type");
                String c = n.getMetaContent();
                //<meta http-equiv="content-type" content="text/html; charset=UTF-8"></meta>
                content_type = c.split(";")[0];
                encoding = c.split("=")[1];

                log.debug("content_type: " + content_type);
                log.debug("encoding: " + encoding);
            }
        }

        /** CSS */
        NodeList css_links = head.extractAllNodesThatMatch(
                new AndFilter(new TagNameFilter("link"), new HasAttributeFilter("rel", "stylesheet")), true);
        log.debug(css_links.size() + " css link tags found");
        //if (css_link.size() > 0) {
        for (Node node : css_links.toNodeArray()) {
            log.debug("StyleTag (href) encountered, including it to body");
            TagNode tag = (TagNode) node;
            String src = tag.getAttribute("href");
            Pattern pattern = Pattern.compile("^http");
            Matcher matcher = pattern.matcher(src);
            if (!matcher.find()) {
                // insert javascript host location
                tag.setAttribute("href", location + src);
            }
            headString += tag.toHtml();
        }

        NodeList css_style = head.extractAllNodesThatMatch(new NodeClassFilter(StyleTag.class), true);
        log.debug(css_style.size() + " style tags found");
        if (css_style.size() > 0) {
            // headString += makeInlineCSS(css_link,css_style);
            /** Just include the original CSS.
             * @since 0.3.0
             */
            log.debug("StyleTag (inline) encountered, including it to body");
            // NOTE: breaks body {} tag
            headString += css_style.toHtml();
        }

        /** JavaScript */
        NodeList scriptTags = head.extractAllNodesThatMatch(new NodeClassFilter(ScriptTag.class), true);
        log.debug(scriptTags.size() + " script tags found");
        if (scriptTags.size() > 0) {

            log.debug("Parsing <script> tags");
            for (SimpleNodeIterator i = scriptTags.elements(); i.hasMoreNodes();) {
                ScriptTag tag = (ScriptTag) i.nextNode();

                /** Does the tag contain inline code, or a link to src? */
                if (tag.getScriptCode().length() > 0) {
                    log.warn("Script tag contains inline JavaScript -- note that IT IS NOT PARSED CORRECTLY.");
                    /**
                    FIXME: HTMLparser 1.6 cannot handle inline JavaScript that contains HTML tags.
                     */
                    headString += tag.toHtml();
                } else {
                    //log.debug("Link to src - checking JavaScript blacklist");
                    //if (JavaScriptBlacklistFilter(tag)) {
                    /** @since 0.11.1 - do not check JavaScript blacklist */
                    // blacklist check passed, include the original JS.
                    String src = tag.getAttribute("src");
                    Pattern pattern = Pattern.compile("^http");
                    Matcher matcher = pattern.matcher(src);
                    if (!matcher.find()) {
                        // insert javascript host location
                        tag.setAttribute("src", location + src);
                    }
                    log.debug(tag.toHtml());
                    headString += tag.toHtml();
                    //}
                }
            } // iterator
        } // JS tags

        /** Close the <namespace>_head Div. */
        headString += "\n</div>\n";

        // Parse the String form to NodeList
        NodeList ret = new NodeList();

        /* @since 0.7.0: does not handle ParserException
         */
        // try {
        Parser parser = new Parser(headString);
        ret = parser.parse(null);

        log.debug("Done");
        return ret;
    }

    /** Compare the JavaScript src to the blacklist.
     * @since 0.3.0
     */
    private boolean JavaScriptBlacklistFilter(ScriptTag tag) throws Exception {
        String src = tag.getAttribute("src");
        if (src == null) {
            // inline script tag has already been catched, so this may mean that the tag is invalid?
            return false;
        } else {
            //log.debug("Checking if "+src+" is in blacklist");
            for (int j = 0; j < javascript_blacklist.length; j++) {
                /** Blacklist match */
                if (src.matches(".*" + javascript_blacklist[j] + ".*")) {
                    log.debug(src + " is blacklisted!");
                    return false;
                }
            }
            /** Accept JS */
            log.debug(src + " is ok");
            return true;
        }
    }

}