org.radeox.filter.XHTMLFilter.java Source code

Java tutorial

Introduction

Here is the source code for org.radeox.filter.XHTMLFilter.java

Source

/*
 * This file is part of "SnipSnap Radeox Rendering Engine".
 *
 * Copyright (c) 2002 Stephan J. Schmidt, Matthias L. Jugel
 * All Rights Reserved.
 *
 * Please visit http://radeox.org/ for updates and contact.
 *
 * --LICENSE NOTICE--
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * --LICENSE NOTICE--
 */

package org.radeox.filter;

import java.io.ByteArrayOutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xml.serializer.ToXMLStream;
import org.radeox.api.engine.context.InitialRenderContext;
import org.radeox.filter.context.FilterContext;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;

/*
 * The paragraph filter finds any text between two empty lines and inserts a
 * <p/> @author stephan @team sonicteam
 * 
 * @version $Id: ParagraphFilter.java 4158 2005-11-25 23:25:19Z
 *          ian@caret.cam.ac.uk $
 */

public class XHTMLFilter implements Filter, CacheFilter {
    private static Log log = LogFactory.getLog(XHTMLFilter.class);

    private static SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    private static final Map blockElements = new HashMap();
    static {
        saxParserFactory.setNamespaceAware(true);
        List l = new ArrayList();
        l.add("p");
        blockElements.put("hr", l); // hr cant be nested inside p
        blockElements.put("h1", l);
        blockElements.put("h2", l);
        blockElements.put("h3", l);
        blockElements.put("h4", l);
        blockElements.put("h5", l);
        blockElements.put("h6", l);
        blockElements.put("h7", l);
        blockElements.put("ul", l);
        blockElements.put("ol", l);
        blockElements.put("div", l);
        blockElements.put("blockquote", l);
    }

    private static HashMap emptyTag = new HashMap();

    static {
        // inclusion els
        emptyTag.put("img", "img");
        emptyTag.put("area", "area");
        emptyTag.put("frame", "frame");
        // non-standard inclusion els
        emptyTag.put("layer", "layer");
        emptyTag.put("embed", "embed");
        // form el
        emptyTag.put("input", "input");
        // default els
        emptyTag.put("base", "base");
        // styling els
        emptyTag.put("col", "col");
        emptyTag.put("basefont", "basefont");
        // hidden els
        emptyTag.put("link", "link");
        emptyTag.put("meta", "meta");
        // separator els
        emptyTag.put("br", "br");
        emptyTag.put("hr", "hr");
        // here because our current p implementation is broken
        // emptyTag.put("p", "p");
    }

    private static HashMap ignoreEmpty = new HashMap();

    static {
        ignoreEmpty.put("p", "p");
    }

    private InitialRenderContext initialContext;

    public String filter(String input, FilterContext context) {
        String finalOutput = input;
        try {
            DeblockFilter dbf = new DeblockFilter();
            EmptyFilter epf = new EmptyFilter();

            dbf.setBlockElements(blockElements);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            SpecialXHTMLSerializer xser = new SpecialXHTMLSerializer();
            xser.setOutputStream(baos);
            xser.setIndent(false);
            xser.setEncoding("UTF-8");
            xser.setIndentAmount(4);
            dbf.setContentHandler(epf);
            epf.setContentHander(xser.asContentHandler());

            SAXParser parser = saxParserFactory.newSAXParser();

            XMLReader xmlr = parser.getXMLReader();

            xmlr.setContentHandler(dbf);
            // log.warn("Input is "+input);
            xmlr.parse(new InputSource(new StringReader("<sr>" + input + "</sr>")));

            String output = new String(baos.toByteArray(), "UTF-8");
            int startBlock = output.indexOf("<sr>");
            int endBlock = output.indexOf("</sr>");
            finalOutput = output.substring(startBlock + 4, endBlock);
            // log.warn("Output is "+finalOutput);
        } catch (Throwable t) {
            log.error("Failed to XHTML check " + t.getMessage() + "\n Input======\n" + input + "\n=======");
            return input;
        }

        return finalOutput;
    }

    public String[] replaces() {
        return FilterPipe.NO_REPLACES;
    }

    public String[] before() {
        return FilterPipe.EMPTY_BEFORE;
    }

    public void setInitialContext(InitialRenderContext context) {
        initialContext = context;

    }

    public String getDescription() {
        return "Hand Coded XHTML filter";
    }

    public class DeblockFilter implements ContentHandler {

        private Stack s = new Stack();

        private ContentHandler ch;

        private Map blockElements = new HashMap();

        public void setContentHandler(ContentHandler ch) {
            this.ch = ch;
        }

        public void setBlockElements(Map blockElements) {
            this.blockElements = blockElements;

        }

        public void addElement(String blockElement, String unnested) {
            List l = (List) blockElements.get(blockElement);
            if (l == null) {
                l = new ArrayList();
                blockElements.put(blockElement, l);
            }
            l.add(unnested);
        }

        /**
         * Unwind the xpath stack back to the first instance of the requested
         * emement
         * 
         * @param deblockElement
         */
        private Stack closeTo(List deblockElements) throws SAXException {
            int firstIndex = s.size();
            for (int i = 0; i < s.size(); i++) {
                EStack es = (EStack) s.get(i);
                if (deblockElements.contains(es.lname)) {
                    firstIndex = i;
                }
            }
            EStack es = null;
            Stack sb = new Stack();
            while (s.size() > firstIndex) {
                es = (EStack) s.pop();
                // log.warn("Closing "+es.qname);
                ch.endElement(es.ns, es.qname, es.lname);
                sb.push(es);
            }
            // log.warn("End Close");
            return sb;
        }

        /**
         * Check each element to see if its in a list of elements which is
         * should not be inside If it is one of these elements, get a list of
         * elements, and unwind to that it is not inside the stack
         * 
         * @{inheritDoc}
         */
        public void startElement(String ns, String qname, String lname, Attributes atts) throws SAXException {
            if (blockElements.get(lname) != null) {
                s.push(new EStack(ns, qname, lname, atts, closeTo((List) blockElements.get(lname))));
            } else {
                s.push(new EStack(ns, qname, lname, atts, null));
            }
            ch.startElement(ns, qname, lname, atts);
        }

        /**
         * When we get to the end element, pop the Stack element off the stack.
         * If there is arestore path, restore the path back in place by emitting
         * start elements
         * 
         * @{inheritDoc}
         */
        public void endElement(String arg0, String arg1, String arg2) throws SAXException {
            ch.endElement(arg0, arg1, arg2);
            EStack es = (EStack) s.pop();
            if (es.restore != null) {
                while (es.restore.size() > 0) {
                    EStack esr = (EStack) es.restore.pop();
                    // log.warn("Restore "+esr.lname);
                    ch.startElement(esr.ns, esr.qname, esr.lname, esr.atts);
                    s.push(esr);
                }
            }
        }

        public void characters(char[] arg0, int arg1, int arg2) throws SAXException {
            ch.characters(arg0, arg1, arg2);
        }

        public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {
            ch.ignorableWhitespace(arg0, arg1, arg2);
        }

        public void processingInstruction(String arg0, String arg1) throws SAXException {
            ch.processingInstruction(arg0, arg1);
        }

        public void skippedEntity(String arg0) throws SAXException {
            ch.skippedEntity(arg0);
        }

        public void setDocumentLocator(Locator arg0) {
            ch.setDocumentLocator(arg0);
        }

        public void startDocument() throws SAXException {
            ch.startDocument();
        }

        public void endDocument() throws SAXException {
            ch.endDocument();
        }

        public void startPrefixMapping(String arg0, String arg1) throws SAXException {
            ch.startPrefixMapping(arg0, arg1);
        }

        public void endPrefixMapping(String arg0) throws SAXException {
            ch.endPrefixMapping(arg0);
        }

    }

    public class EmptyFilter implements ContentHandler {

        private ContentHandler next = null;

        private EStack lastElement = null;

        public EmptyFilter() {
        }

        public void setContentHander(ContentHandler handler) {
            next = handler;
        }

        public void setDocumentLocator(Locator arg0) {
            next.setDocumentLocator(arg0);
        }

        public void startDocument() throws SAXException {
            emitLast();
            next.startDocument();
        }

        public void endDocument() throws SAXException {
            emitLast();
            next.endDocument();
        }

        public void startPrefixMapping(String arg0, String arg1) throws SAXException {
            emitLast();
            next.startPrefixMapping(arg0, arg1);
        }

        public void endPrefixMapping(String arg0) throws SAXException {
            emitLast();
            next.endPrefixMapping(arg0);
        }

        public void emitLast() throws SAXException {
            if (lastElement != null) {
                // this means that there was a startElement, startElement,
                // so the lastElement MUST be emited
                next.startElement(lastElement.ns, lastElement.qname, lastElement.lname, lastElement.atts);
                lastElement = null;
            }
        }

        public void startElement(String ns, String qname, String lname, Attributes atts) throws SAXException {
            emitLast();
            if (ignoreEmpty.get(lname.toLowerCase()) != null) {
                lastElement = new EStack(ns, qname, lname, atts, null);
            } else {
                next.startElement(ns, qname, lname, atts);
            }
        }

        public void endElement(String arg0, String arg1, String arg2) throws SAXException {
            if (lastElement != null) {
                // there was a start, then an end with nothing in between
                // so ignore alltogether
                lastElement = null;
            } else {
                next.endElement(arg0, arg1, arg2);
            }
        }

        public void characters(char[] arg0, int arg1, int arg2) throws SAXException {
            emitLast();
            next.characters(arg0, arg1, arg2);
        }

        public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {
            emitLast();
            next.ignorableWhitespace(arg0, arg1, arg2);
        }

        public void processingInstruction(String arg0, String arg1) throws SAXException {
            emitLast();
            next.processingInstruction(arg0, arg1);
        }

        public void skippedEntity(String arg0) throws SAXException {
            emitLast();
            next.skippedEntity(arg0);
        }

    }

    public class EStack {
        public EStack(String ns, String qname, String lname, Attributes atts, Stack restore) {
            this.ns = ns;
            this.qname = qname;
            this.lname = lname;
            this.atts = new AttributesImpl(atts);
            this.restore = restore;
        }

        public EStack(EStack es) {
            this.ns = es.ns;
            this.qname = es.qname;
            this.lname = es.lname;
            this.atts = new AttributesImpl(es.atts);
            this.restore = es.restore;
        }

        Stack restore = null;

        String ns;

        String qname;

        String lname;

        Attributes atts;
    }

    /**
     * @author andrew
     */
    public class SpecialXHTMLSerializer extends ToXMLStream {

        private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";

        public void endElement(String namespaceURI, String localName, String name) throws SAXException {
            if ((namespaceURI != null && !"".equals(namespaceURI) && !namespaceURI.equals(XHTML_NAMESPACE))
                    || emptyTag.containsKey(localName.toLowerCase())) {
                super.endElement(namespaceURI, localName, name);
                return;
            }

            this.characters("");

            super.endElement(namespaceURI, localName, name);

        }

    }

}