org.eclipse.jgit.util.QuotedString.java Source code

Java tutorial

Introduction

Here is the source code for org.eclipse.jgit.util.QuotedString.java

Source

/*
 * Copyright (C) 2008, 2019 Google Inc.
 * and other copyright owners as documented in the project's IP log.
 *
 * This program and the accompanying materials are made available
 * under the terms of the Eclipse Distribution License v1.0 which
 * accompanies this distribution, is reproduced below, and is
 * available at http://www.eclipse.org/org/documents/edl-v10.php
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following
 *   disclaimer in the documentation and/or other materials provided
 *   with the distribution.
 *
 * - Neither the name of the Eclipse Foundation, Inc. nor the
 *   names of its contributors may be used to endorse or promote
 *   products derived from this software without specific prior
 *   written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.eclipse.jgit.util;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.util.Arrays;

import org.eclipse.jgit.lib.Constants;

/**
 * Utility functions related to quoted string handling.
 */
public abstract class QuotedString {
    /** Quoting style that obeys the rules Git applies to file names */
    public static final GitPathStyle GIT_PATH = new GitPathStyle(true);

    /**
     * Quoting style that obeys the rules Git applies to file names when
     * {@code core.quotePath = false}.
     *
     * @since 5.6
     */
    public static final QuotedString GIT_PATH_MINIMAL = new GitPathStyle(false);

    /**
     * Quoting style used by the Bourne shell.
     * <p>
     * Quotes are unconditionally inserted during {@link #quote(String)}. This
     * protects shell meta-characters like <code>$</code> or <code>~</code> from
     * being recognized as special.
     */
    public static final BourneStyle BOURNE = new BourneStyle();

    /** Bourne style, but permits <code>~user</code> at the start of the string. */
    public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();

    /**
     * Quote an input string by the quoting rules.
     * <p>
     * If the input string does not require any quoting, the same String
     * reference is returned to the caller.
     * <p>
     * Otherwise a quoted string is returned, including the opening and closing
     * quotation marks at the start and end of the string. If the style does not
     * permit raw Unicode characters then the string will first be encoded in
     * UTF-8, with unprintable sequences possibly escaped by the rules.
     *
     * @param in
     *            any non-null Unicode string.
     * @return a quoted string. See above for details.
     */
    public abstract String quote(String in);

    /**
     * Clean a previously quoted input, decoding the result via UTF-8.
     * <p>
     * This method must match quote such that:
     *
     * <pre>
     * a.equals(dequote(quote(a)));
     * </pre>
     *
     * is true for any <code>a</code>.
     *
     * @param in
     *            a Unicode string to remove quoting from.
     * @return the cleaned string.
     * @see #dequote(byte[], int, int)
     */
    public String dequote(String in) {
        final byte[] b = Constants.encode(in);
        return dequote(b, 0, b.length);
    }

    /**
     * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
     * <p>
     * This method must match quote such that:
     *
     * <pre>
     * a.equals(dequote(Constants.encode(quote(a))));
     * </pre>
     *
     * is true for any <code>a</code>.
     * <p>
     * This method removes any opening/closing quotation marks added by
     * {@link #quote(String)}.
     *
     * @param in
     *            the input buffer to parse.
     * @param offset
     *            first position within <code>in</code> to scan.
     * @param end
     *            one position past in <code>in</code> to scan.
     * @return the cleaned string.
     */
    public abstract String dequote(byte[] in, int offset, int end);

    /**
     * Quoting style used by the Bourne shell.
     * <p>
     * Quotes are unconditionally inserted during {@link #quote(String)}. This
     * protects shell meta-characters like <code>$</code> or <code>~</code> from
     * being recognized as special.
     */
    public static class BourneStyle extends QuotedString {
        @Override
        public String quote(String in) {
            final StringBuilder r = new StringBuilder();
            r.append('\'');
            int start = 0, i = 0;
            for (; i < in.length(); i++) {
                switch (in.charAt(i)) {
                case '\'':
                case '!':
                    r.append(in, start, i);
                    r.append('\'');
                    r.append('\\');
                    r.append(in.charAt(i));
                    r.append('\'');
                    start = i + 1;
                    break;
                }
            }
            r.append(in, start, i);
            r.append('\'');
            return r.toString();
        }

        @Override
        public String dequote(byte[] in, int ip, int ie) {
            boolean inquote = false;
            final byte[] r = new byte[ie - ip];
            int rPtr = 0;
            while (ip < ie) {
                final byte b = in[ip++];
                switch (b) {
                case '\'':
                    inquote = !inquote;
                    continue;
                case '\\':
                    if (inquote || ip == ie)
                        r[rPtr++] = b; // literal within a quote
                    else
                        r[rPtr++] = in[ip++];
                    continue;
                default:
                    r[rPtr++] = b;
                    continue;
                }
            }
            return RawParseUtils.decode(UTF_8, r, 0, rPtr);
        }
    }

    /** Bourne style, but permits <code>~user</code> at the start of the string. */
    public static class BourneUserPathStyle extends BourneStyle {
        @Override
        public String quote(String in) {
            if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
                // If the string is just "~user" we can assume they
                // mean "~user/".
                //
                return in + "/"; //$NON-NLS-1$
            }

            if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
                // If the string is of "~/path" or "~user/path"
                // we must not escape ~/ or ~user/ from the shell.
                //
                final int i = in.indexOf('/') + 1;
                if (i == in.length())
                    return in;
                return in.substring(0, i) + super.quote(in.substring(i));
            }

            return super.quote(in);
        }
    }

    /** Quoting style that obeys the rules Git applies to file names */
    public static final class GitPathStyle extends QuotedString {
        private static final byte[] quote;
        static {
            quote = new byte[128];
            Arrays.fill(quote, (byte) -1);

            for (int i = '0'; i <= '9'; i++)
                quote[i] = 0;
            for (int i = 'a'; i <= 'z'; i++)
                quote[i] = 0;
            for (int i = 'A'; i <= 'Z'; i++)
                quote[i] = 0;
            quote[' '] = 0;
            quote['$'] = 0;
            quote['%'] = 0;
            quote['&'] = 0;
            quote['*'] = 0;
            quote['+'] = 0;
            quote[','] = 0;
            quote['-'] = 0;
            quote['.'] = 0;
            quote['/'] = 0;
            quote[':'] = 0;
            quote[';'] = 0;
            quote['='] = 0;
            quote['?'] = 0;
            quote['@'] = 0;
            quote['_'] = 0;
            quote['^'] = 0;
            quote['|'] = 0;
            quote['~'] = 0;

            quote['\u0007'] = 'a';
            quote['\b'] = 'b';
            quote['\f'] = 'f';
            quote['\n'] = 'n';
            quote['\r'] = 'r';
            quote['\t'] = 't';
            quote['\u000B'] = 'v';
            quote['\\'] = '\\';
            quote['"'] = '"';
        }

        private final boolean quoteHigh;

        @Override
        public String quote(String instr) {
            if (instr.isEmpty()) {
                return "\"\""; //$NON-NLS-1$
            }
            boolean reuse = true;
            final byte[] in = Constants.encode(instr);
            final byte[] out = new byte[4 * in.length + 2];
            int o = 0;
            out[o++] = '"';
            for (int i = 0; i < in.length; i++) {
                final int c = in[i] & 0xff;
                if (c < quote.length) {
                    final byte style = quote[c];
                    if (style == 0) {
                        out[o++] = (byte) c;
                        continue;
                    }
                    if (style > 0) {
                        reuse = false;
                        out[o++] = '\\';
                        out[o++] = style;
                        continue;
                    }
                } else if (!quoteHigh) {
                    out[o++] = (byte) c;
                    continue;
                }

                reuse = false;
                out[o++] = '\\';
                out[o++] = (byte) (((c >> 6) & 03) + '0');
                out[o++] = (byte) (((c >> 3) & 07) + '0');
                out[o++] = (byte) (((c >> 0) & 07) + '0');
            }
            if (reuse) {
                return instr;
            }
            out[o++] = '"';
            return new String(out, 0, o, UTF_8);
        }

        @Override
        public String dequote(byte[] in, int inPtr, int inEnd) {
            if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
                return dq(in, inPtr + 1, inEnd - 1);
            return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
        }

        private static String dq(byte[] in, int inPtr, int inEnd) {
            final byte[] r = new byte[inEnd - inPtr];
            int rPtr = 0;
            while (inPtr < inEnd) {
                final byte b = in[inPtr++];
                if (b != '\\') {
                    r[rPtr++] = b;
                    continue;
                }

                if (inPtr == inEnd) {
                    // Lone trailing backslash. Treat it as a literal.
                    //
                    r[rPtr++] = '\\';
                    break;
                }

                switch (in[inPtr++]) {
                case 'a':
                    r[rPtr++] = 0x07 /* \a = BEL */;
                    continue;
                case 'b':
                    r[rPtr++] = '\b';
                    continue;
                case 'f':
                    r[rPtr++] = '\f';
                    continue;
                case 'n':
                    r[rPtr++] = '\n';
                    continue;
                case 'r':
                    r[rPtr++] = '\r';
                    continue;
                case 't':
                    r[rPtr++] = '\t';
                    continue;
                case 'v':
                    r[rPtr++] = 0x0B/* \v = VT */;
                    continue;

                case '\\':
                case '"':
                    r[rPtr++] = in[inPtr - 1];
                    continue;

                case '0':
                case '1':
                case '2':
                case '3': {
                    int cp = in[inPtr - 1] - '0';
                    for (int n = 1; n < 3 && inPtr < inEnd; n++) {
                        final byte c = in[inPtr];
                        if ('0' <= c && c <= '7') {
                            cp <<= 3;
                            cp |= c - '0';
                            inPtr++;
                        } else {
                            break;
                        }
                    }
                    r[rPtr++] = (byte) cp;
                    continue;
                }

                default:
                    // Any other code is taken literally.
                    //
                    r[rPtr++] = '\\';
                    r[rPtr++] = in[inPtr - 1];
                    continue;
                }
            }

            return RawParseUtils.decode(UTF_8, r, 0, rPtr);
        }

        private GitPathStyle(boolean doQuote) {
            quoteHigh = doQuote;
        }
    }
}