com.gargoylesoftware.htmlunit.javascript.host.xml.XMLSerializer.java Source code

Java tutorial

Introduction

Here is the source code for com.gargoylesoftware.htmlunit.javascript.host.xml.XMLSerializer.java

Source

/*
 * Copyright (c) 2002-2016 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gargoylesoftware.htmlunit.javascript.host.xml;

import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_XML_SERIALIZER_BLANK_BEFORE_SELF_CLOSING;
import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_XML_SERIALIZER_HTML_DOCUMENT_FRAGMENT_ALWAYS_EMPTY;
import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_XML_SERIALIZER_NON_EMPTY_TAGS;
import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_XML_SERIALIZER_ROOT_CDATA_AS_ESCAPED_TEXT;
import static com.gargoylesoftware.htmlunit.javascript.configuration.BrowserName.CHROME;
import static com.gargoylesoftware.htmlunit.javascript.configuration.BrowserName.EDGE;
import static com.gargoylesoftware.htmlunit.javascript.configuration.BrowserName.FF;
import static com.gargoylesoftware.htmlunit.javascript.configuration.BrowserName.IE;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;

import org.w3c.dom.NamedNodeMap;

import com.gargoylesoftware.htmlunit.SgmlPage;
import com.gargoylesoftware.htmlunit.html.*;
import com.gargoylesoftware.htmlunit.javascript.SimpleScriptable;
import com.gargoylesoftware.htmlunit.javascript.configuration.JsxClass;
import com.gargoylesoftware.htmlunit.javascript.configuration.JsxConstructor;
import com.gargoylesoftware.htmlunit.javascript.configuration.JsxFunction;
import com.gargoylesoftware.htmlunit.javascript.configuration.WebBrowser;
import com.gargoylesoftware.htmlunit.javascript.host.Element;
import com.gargoylesoftware.htmlunit.javascript.host.dom.CDATASection;
import com.gargoylesoftware.htmlunit.javascript.host.dom.Document;
import com.gargoylesoftware.htmlunit.javascript.host.dom.DocumentFragment;
import com.gargoylesoftware.htmlunit.javascript.host.dom.Node;
import com.gargoylesoftware.htmlunit.javascript.host.html.HTMLDocument;
import com.gargoylesoftware.htmlunit.util.StringUtils;

/**
 * A JavaScript object for {@code XMLSerializer}.
 *
 * @author Ahmed Ashour
 * @author Darrell DeBoer
 * @author Ronald Brill
 * @author Frank Danek
 */
@JsxClass(browsers = { @WebBrowser(CHROME), @WebBrowser(FF), @WebBrowser(IE), @WebBrowser(EDGE) })
public class XMLSerializer extends SimpleScriptable {

    // this is a bit strange but it is the way FF works
    // output of empty tags are not allowed for these HTML tags
    private static final Set<String> NON_EMPTY_TAGS = new HashSet<>(Arrays.asList(HtmlAbbreviated.TAG_NAME,
            HtmlAcronym.TAG_NAME, HtmlAnchor.TAG_NAME, HtmlApplet.TAG_NAME, HtmlAddress.TAG_NAME,
            HtmlAudio.TAG_NAME, HtmlBackgroundSound.TAG_NAME, HtmlBidirectionalOverride.TAG_NAME, HtmlBig.TAG_NAME,
            HtmlBlink.TAG_NAME, HtmlBlockQuote.TAG_NAME, HtmlBody.TAG_NAME, HtmlBold.TAG_NAME, HtmlButton.TAG_NAME,
            HtmlCanvas.TAG_NAME, HtmlCaption.TAG_NAME, HtmlCenter.TAG_NAME, HtmlCitation.TAG_NAME,
            HtmlCode.TAG_NAME, HtmlDefinition.TAG_NAME, HtmlDefinitionDescription.TAG_NAME,
            HtmlDeletedText.TAG_NAME, HtmlDirectory.TAG_NAME, HtmlDivision.TAG_NAME, HtmlDefinitionList.TAG_NAME,
            HtmlDefinitionTerm.TAG_NAME, HtmlEmbed.TAG_NAME, HtmlEmphasis.TAG_NAME, HtmlFieldSet.TAG_NAME,
            HtmlFont.TAG_NAME, HtmlForm.TAG_NAME, HtmlFrame.TAG_NAME, HtmlFrameSet.TAG_NAME, HtmlHeading1.TAG_NAME,
            HtmlHeading2.TAG_NAME, HtmlHeading3.TAG_NAME, HtmlHeading4.TAG_NAME, HtmlHeading5.TAG_NAME,
            HtmlHeading6.TAG_NAME, HtmlHead.TAG_NAME, HtmlHtml.TAG_NAME, HtmlInlineFrame.TAG_NAME,
            HtmlInsertedText.TAG_NAME, HtmlIsIndex.TAG_NAME, HtmlItalic.TAG_NAME, HtmlKeyboard.TAG_NAME,
            HtmlLabel.TAG_NAME, HtmlLegend.TAG_NAME, HtmlListing.TAG_NAME, HtmlListItem.TAG_NAME, HtmlMap.TAG_NAME,
            HtmlMarquee.TAG_NAME, HtmlMenu.TAG_NAME, HtmlMultiColumn.TAG_NAME, HtmlNoBreak.TAG_NAME,
            HtmlNoEmbed.TAG_NAME, HtmlNoFrames.TAG_NAME, HtmlNoScript.TAG_NAME, HtmlObject.TAG_NAME,
            HtmlOrderedList.TAG_NAME, HtmlOptionGroup.TAG_NAME, HtmlOption.TAG_NAME, HtmlParagraph.TAG_NAME,
            HtmlPlainText.TAG_NAME, HtmlPreformattedText.TAG_NAME, HtmlInlineQuotation.TAG_NAME, HtmlS.TAG_NAME,
            HtmlSample.TAG_NAME, HtmlScript.TAG_NAME, HtmlSelect.TAG_NAME, HtmlSmall.TAG_NAME, HtmlSource.TAG_NAME,
            HtmlSpan.TAG_NAME, HtmlStrike.TAG_NAME, HtmlStrong.TAG_NAME, HtmlStyle.TAG_NAME, HtmlSubscript.TAG_NAME,
            HtmlSuperscript.TAG_NAME, HtmlTitle.TAG_NAME, HtmlTable.TAG_NAME, HtmlTableColumn.TAG_NAME,
            HtmlTableColumnGroup.TAG_NAME, HtmlTableBody.TAG_NAME, HtmlTableDataCell.TAG_NAME,
            HtmlTableHeaderCell.TAG_NAME, HtmlTableRow.TAG_NAME, HtmlTextArea.TAG_NAME, HtmlTableFooter.TAG_NAME,
            HtmlTableHeader.TAG_NAME, HtmlTeletype.TAG_NAME, HtmlUnderlined.TAG_NAME, HtmlUnorderedList.TAG_NAME,
            HtmlVariable.TAG_NAME, HtmlVideo.TAG_NAME, HtmlWordBreak.TAG_NAME, HtmlExample.TAG_NAME));

    /**
     * Default constructor.
     */
    @JsxConstructor
    public XMLSerializer() {
        // Empty.
    }

    /**
     * The subtree rooted by the specified element is serialized to a string.
     * @param root the root of the subtree to be serialized (this may be any node, even a document)
     * @return the serialized string
     */
    @JsxFunction
    public String serializeToString(Node root) {
        if (root == null) {
            return "";
        }
        if (root instanceof Document) {
            root = ((Document) root).getDocumentElement();
        } else if (root instanceof DocumentFragment) {
            if (root.getOwnerDocument() instanceof HTMLDocument
                    && getBrowserVersion().hasFeature(JS_XML_SERIALIZER_HTML_DOCUMENT_FRAGMENT_ALWAYS_EMPTY)) {
                return "";
            }
            root = root.getFirstChild();
        }
        if (root instanceof Element) {
            final StringBuilder buffer = new StringBuilder();
            final DomNode node = root.getDomNodeOrDie();
            final SgmlPage page = node.getPage();
            final boolean isHtmlPage = page != null && page.isHtmlPage();

            String forcedNamespace = null;
            if (isHtmlPage) {
                forcedNamespace = "http://www.w3.org/1999/xhtml";
            }
            toXml(1, node, buffer, forcedNamespace);

            return buffer.toString();
        }
        if (root instanceof CDATASection
                && getBrowserVersion().hasFeature(JS_XML_SERIALIZER_ROOT_CDATA_AS_ESCAPED_TEXT)) {
            final DomCDataSection domCData = (DomCDataSection) root.getDomNodeOrDie();
            final String data = domCData.getData();
            if (org.apache.commons.lang3.StringUtils.isNotBlank(data)) {
                return StringUtils.escapeXmlChars(data);
            }
        }
        return root.getDomNodeOrDie().asXml();
    }

    private void toXml(final int indent, final DomNode node, final StringBuilder buffer,
            final String foredNamespace) {
        final String nodeName = node.getNodeName();
        buffer.append('<').append(nodeName);

        String optionalPrefix = "";
        final String namespaceURI = node.getNamespaceURI();
        final String prefix = node.getPrefix();
        if (namespaceURI != null && prefix != null) {
            boolean sameNamespace = false;
            for (DomNode parentNode = node
                    .getParentNode(); parentNode instanceof DomElement; parentNode = parentNode.getParentNode()) {
                if (namespaceURI.equals(parentNode.getNamespaceURI())) {
                    sameNamespace = true;
                }
            }
            if (node.getParentNode() == null || !sameNamespace) {
                ((DomElement) node).setAttribute("xmlns:" + prefix, namespaceURI);
            }
        } else if (foredNamespace != null) {
            buffer.append(" xmlns=\"").append(foredNamespace).append('"');
            optionalPrefix = " ";
        }

        final NamedNodeMap attributesMap = node.getAttributes();
        for (int i = 0; i < attributesMap.getLength(); i++) {
            final DomAttr attrib = (DomAttr) attributesMap.item(i);
            buffer.append(' ').append(attrib.getQualifiedName()).append('=').append('"').append(attrib.getValue())
                    .append('"');
        }
        boolean startTagClosed = false;
        for (final DomNode child : node.getChildren()) {
            if (!startTagClosed) {
                buffer.append(optionalPrefix).append('>');
                startTagClosed = true;
            }
            switch (child.getNodeType()) {
            case Node.ELEMENT_NODE:
                toXml(indent + 1, child, buffer, null);
                break;

            case Node.TEXT_NODE:
                String value = child.getNodeValue();
                value = StringUtils.escapeXmlChars(value);
                buffer.append(value);
                break;

            case Node.CDATA_SECTION_NODE:
            case Node.COMMENT_NODE:
                buffer.append(child.asXml());
                break;

            default:

            }
        }
        if (!startTagClosed) {
            final String tagName = nodeName.toLowerCase(Locale.ROOT);
            final boolean nonEmptyTagsSupported = getBrowserVersion().hasFeature(JS_XML_SERIALIZER_NON_EMPTY_TAGS);
            if (nonEmptyTagsSupported && NON_EMPTY_TAGS.contains(tagName)) {
                buffer.append('>');
                buffer.append("</").append(nodeName).append('>');
            } else {
                buffer.append(optionalPrefix);
                if (buffer.charAt(buffer.length() - 1) != ' '
                        && getBrowserVersion().hasFeature(JS_XML_SERIALIZER_BLANK_BEFORE_SELF_CLOSING)) {
                    buffer.append(" ");
                }
                buffer.append("/>");
            }
        } else {
            buffer.append('<').append('/').append(nodeName).append('>');
        }
    }

}