org.apache.hadoop.http.HtmlQuoting.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.http.HtmlQuoting.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.http;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;

/**
 * This class is responsible for quoting HTML characters.
 */
public class HtmlQuoting {
    private static final byte[] AMP_BYTES = "&".getBytes(StandardCharsets.UTF_8);
    private static final byte[] APOS_BYTES = "'".getBytes(StandardCharsets.UTF_8);
    private static final byte[] GT_BYTES = ">".getBytes(StandardCharsets.UTF_8);
    private static final byte[] LT_BYTES = "<".getBytes(StandardCharsets.UTF_8);
    private static final byte[] QUOT_BYTES = """.getBytes(StandardCharsets.UTF_8);

    /**
     * Does the given string need to be quoted?
     * @param data the string to check
     * @param off the starting position
     * @param len the number of bytes to check
     * @return does the string contain any of the active html characters?
     */
    public static boolean needsQuoting(byte[] data, int off, int len) {
        for (int i = off; i < off + len; ++i) {
            switch (data[i]) {
            case '&':
            case '<':
            case '>':
            case '\'':
            case '"':
                return true;
            default:
                break;
            }
        }
        return false;
    }

    /**
     * Does the given string need to be quoted?
     * @param str the string to check
     * @return does the string contain any of the active html characters?
     */
    public static boolean needsQuoting(String str) {
        if (str == null) {
            return false;
        }
        byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
        return needsQuoting(bytes, 0, bytes.length);
    }

    /**
     * Quote all of the active HTML characters in the given string as they
     * are added to the buffer.
     * @param output the stream to write the output to
     * @param buffer the byte array to take the characters from
     * @param off the index of the first byte to quote
     * @param len the number of bytes to quote
     */
    public static void quoteHtmlChars(OutputStream output, byte[] buffer, int off, int len) throws IOException {
        for (int i = off; i < off + len; i++) {
            switch (buffer[i]) {
            case '&':
                output.write(AMP_BYTES);
                break;
            case '<':
                output.write(LT_BYTES);
                break;
            case '>':
                output.write(GT_BYTES);
                break;
            case '\'':
                output.write(APOS_BYTES);
                break;
            case '"':
                output.write(QUOT_BYTES);
                break;
            default:
                output.write(buffer, i, 1);
            }
        }
    }

    /**
     * Quote the given item to make it html-safe.
     * @param item the string to quote
     * @return the quoted string
     */
    public static String quoteHtmlChars(String item) {
        if (item == null) {
            return null;
        }
        byte[] bytes = item.getBytes(StandardCharsets.UTF_8);
        if (needsQuoting(bytes, 0, bytes.length)) {
            ByteArrayOutputStream buffer = new ByteArrayOutputStream();
            try {
                quoteHtmlChars(buffer, bytes, 0, bytes.length);
                return buffer.toString("UTF-8");
            } catch (IOException ioe) {
                // Won't happen, since it is a bytearrayoutputstream
                return null;
            }
        } else {
            return item;
        }
    }

    /**
     * Return an output stream that quotes all of the output.
     * @param out the stream to write the quoted output to
     * @return a new stream that the application show write to
     * @throws IOException if the underlying output fails
     */
    public static OutputStream quoteOutputStream(final OutputStream out) throws IOException {
        return new OutputStream() {
            private byte[] data = new byte[1];

            @Override
            public void write(byte[] data, int off, int len) throws IOException {
                quoteHtmlChars(out, data, off, len);
            }

            @Override
            public void write(int b) throws IOException {
                data[0] = (byte) b;
                quoteHtmlChars(out, data, 0, 1);
            }

            @Override
            public void flush() throws IOException {
                out.flush();
            }

            @Override
            public void close() throws IOException {
                out.close();
            }
        };
    }

    /**
     * Remove HTML quoting from a string.
     * @param item the string to unquote
     * @return the unquoted string
     */
    public static String unquoteHtmlChars(String item) {
        if (item == null) {
            return null;
        }
        int next = item.indexOf('&');
        // nothing was quoted
        if (next == -1) {
            return item;
        }
        int len = item.length();
        int posn = 0;
        StringBuilder buffer = new StringBuilder();
        while (next != -1) {
            buffer.append(item.substring(posn, next));
            if (item.startsWith("&amp;", next)) {
                buffer.append('&');
                next += 5;
            } else if (item.startsWith("&apos;", next)) {
                buffer.append('\'');
                next += 6;
            } else if (item.startsWith("&gt;", next)) {
                buffer.append('>');
                next += 4;
            } else if (item.startsWith("&lt;", next)) {
                buffer.append('<');
                next += 4;
            } else if (item.startsWith("&quot;", next)) {
                buffer.append('"');
                next += 6;
            } else {
                int end = item.indexOf(';', next) + 1;
                if (end == 0) {
                    end = len;
                }
                throw new IllegalArgumentException("Bad HTML quoting for " + item.substring(next, end));
            }
            posn = next;
            next = item.indexOf('&', posn);
        }
        buffer.append(item.substring(posn, len));
        return buffer.toString();
    }

    public static void main(String[] args) throws Exception {
        for (String arg : args) {
            System.out.println("Original: " + arg);
            String quoted = quoteHtmlChars(arg);
            System.out.println("Quoted: " + quoted);
            String unquoted = unquoteHtmlChars(quoted);
            System.out.println("Unquoted: " + unquoted);
            System.out.println();
        }
    }
}