org.apache.shindig.gadgets.rewrite.MutableContent.java Source code

Introduction

Here is the source code for org.apache.shindig.gadgets.rewrite.MutableContent.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package org.apache.shindig.gadgets.rewrite;

import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;

import org.apache.commons.io.IOUtils;
import org.apache.shindig.common.util.CharsetUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.http.HttpResponse;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.HtmlSerialization;
import org.w3c.dom.Document;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Object that maintains a String representation of arbitrary contents
 * and a consistent view of those contents as an HTML parse tree.
 */
public class MutableContent {
    private static final Map<String, Object> EMPTY_MAP = ImmutableMap.of();

    // String representation of contentBytes taking into account the correct
    // encoding of the content.
    private String content;
    private byte[] contentBytes;

    // Encoding of the content bytes. UTF-8 by default.
    private Charset contentEncoding;

    private HttpResponse contentSource;

    private Document document;
    private int numChanges;
    private final GadgetHtmlParser contentParser;
    private Map<String, Object> pipelinedData;

    private static final String MUTABLE_CONTENT_LISTENER = "MutableContentListener";
    private static final Logger logger = Logger.getLogger(MutableContent.class.getName());

    public static void notifyEdit(Document doc) {
        MutableContent mc = (MutableContent) doc.getUserData(MUTABLE_CONTENT_LISTENER);
        if (mc != null) {
            mc.documentChanged();
        }
    }

    /**
     * Construct with decoded string content
     */
    public MutableContent(GadgetHtmlParser contentParser, String content) {
        this.contentParser = contentParser;
        this.content = content;
        this.numChanges = 0;
        this.contentEncoding = Charsets.UTF_8;
    }

    /**
     * Construct with HttpResponse so we can defer string decoding until we actually need
     * the content. Given that we dont rewrite many mime types this is a performance advantage
     */
    public MutableContent(GadgetHtmlParser contentParser, HttpResponse contentSource) {
        this.contentParser = contentParser;
        this.contentSource = contentSource;
        this.contentEncoding = contentSource != null ? contentSource.getEncodingCharset() : null;
    }

    /**
     * Retrieves the current content for this object in String form.
     * If content has been retrieved in parse tree form and has
     * been edited, the String form is computed from the parse tree by
     * rendering it. It is <b>strongly</b> encouraged to avoid switching
     * between retrieval of parse tree (through {@code getParseTree}),
     * with subsequent edits and retrieval of String contents to avoid
     * repeated serialization and deserialization.
     * As a final fallback, if content has been set as bytes, interprets
     * them as a UTF8 String.
     * @return Renderable/active content.
     */
    public String getContent() {
        if (content == null) {
            if (contentSource != null) {
                content = contentSource.getResponseAsString();
                // Clear on first use
                contentSource = null;
            } else if (document != null) {
                content = HtmlSerialization.serialize(document);
            } else if (contentBytes != null) {
                Charset useEncoding = contentEncoding != null ? contentEncoding : Charsets.UTF_8;
                content = useEncoding.decode(ByteBuffer.wrap(contentBytes)).toString();
            }
        }
        return content;
    }

    /**
     * Sets the object's content as a raw String. Note, this operation
     * may clear the document if the content has changed
     * @param newContent New content.
     */
    public void setContent(String newContent) {
        // TODO - Equality check may be unnecessary overhead
        if (content == null || !content.equals(newContent)) {
            content = newContent;
            document = null;
            contentSource = null;
            contentBytes = null;
            incrementNumChanges();
        }
    }

    /**
     * Retrieves the current content for this object as an InputStream.
     * @return Active content as InputStream.
     */
    public InputStream getContentBytes() {
        return new ByteArrayInputStream(getRawContentBytes());
    }

    protected byte[] getRawContentBytes() {
        if (contentBytes == null) {
            if (contentSource != null) {
                try {
                    setContentBytesState(IOUtils.toByteArray(contentSource.getResponse()),
                            contentSource.getEncodingCharset());
                    contentSource = null;
                } catch (IOException e) {
                    // Doesn't occur; responseBytes wrapped as a ByteArrayInputStream.
                }
            } else if (content != null) {
                // If retrieving a String here, we've already converted to UTF8.
                // Be sure to reflect this when setting bytes.
                // In the case of HttpResponseBuilder, this re-sets charset in Content-Type
                // to UTF-8 rather than whatever it was before. We do this to standardize
                // on UTF-8 for all String handling.
                setContentBytesState(CharsetUtil.getUtf8Bytes(content), Charsets.UTF_8);
            } else if (document != null) {
                setContentBytesState(CharsetUtil.getUtf8Bytes(HtmlSerialization.serialize(document)),
                        Charsets.UTF_8);
            }
        }
        return contentBytes;
    }

    /**
     * Sets the object's contentBytes as the given raw input. If ever interpreted
     * as a String, the data will be decoded as the encoding specified.
     * Note, this operation may clear the document if the content has changed.
     * Also note, it's mandated that the new bytes array will NOT be modified
     * by the caller of this API. The array is not copied, for performance reasons.
     * If the caller may modify a byte array, it MUST pass in a new copy.
     * @param newBytes New content.
     */
    public void setContentBytes(byte[] newBytes, Charset newEncoding) {
        if (contentBytes == null || !Arrays.equals(contentBytes, newBytes)) {
            setContentBytesState(newBytes, newEncoding);
            document = null;
            contentSource = null;
            content = null;
            incrementNumChanges();
        }
    }

    /**
     * Sets content to new byte array, with unspecified charset. It is
     * recommended to use the {@code setContentBytes(byte[], Charset)} API instead,
     * where possible.
     * @param newBytes New content.
     */
    public final void setContentBytes(byte[] newBytes) {
        setContentBytes(newBytes, null);
    }

    /**
     * Sets internal state having to do with content bytes, from the provided
     * byte array and charset.
     * This MUST be the only place in which MutableContent's notion of encoding is mutated.
     * @param newBytes New content.
     * @param newEncoding Encoding for the bytes, or null for unspecified.
     */
    protected void setContentBytesState(byte[] newBytes, Charset newEncoding) {
        contentBytes = newBytes;
        contentEncoding = newEncoding;
    }

    /**
     * Notification that the content of the document has changed. Causes the content
     * string and bytes to be cleared.
     */
    public void documentChanged() {
        if (document != null) {
            content = null;
            contentSource = null;
            contentBytes = null;
            incrementNumChanges();
        }
    }

    /**
     * Retrieves the object contents in parsed form, if a
     * {@code GadgetHtmlParser} is configured and is able to parse the string
     * contents appropriately. To modify the object's
     * contents by parse tree after setting new String contents,
     * this method must be called again. However, this practice is highly
     * discouraged, as parsing a tree from String is a costly operation and should
     * be done at most once per rewrite.
     */
    public Document getDocument() {
        // TODO - Consider actually imposing one parse limit on rewriter pipeline
        if (document != null) {
            return document;
        }
        try {
            document = contentParser.parseDom(getContent());
            document.setUserData(MUTABLE_CONTENT_LISTENER, this, null);
        } catch (GadgetException e) {
            logger.log(Level.WARNING, "Got GadgetException when parsing content", e);
            return null;
        }
        return document;
    }

    public int getNumChanges() {
        return numChanges;
    }

    protected void incrementNumChanges() {
        ++numChanges;
    }

    /**
     * True if current state has a parsed document. Allows rewriters to switch mode based on
     * which content is most readily available
     */
    public boolean hasDocument() {
        return (document != null);
    }

    public void addPipelinedData(String key, Object value) {
        if (null == pipelinedData) {
            pipelinedData = Maps.newHashMap();
        }
        pipelinedData.put(key, value);
    }

    public Map<String, Object> getPipelinedData() {
        return (null == pipelinedData) ? EMPTY_MAP : pipelinedData;
    }
}