Java tutorial
/* * Copyright 2012-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package com.facebook.buck.util; import com.facebook.buck.util.environment.Platform; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.CharMatcher; import com.google.common.base.Function; import com.google.common.base.Preconditions; import java.io.File; import java.nio.file.Path; import java.util.Iterator; public final class Escaper { private static final char MAKEFILE_ESCAPE_CHAR = '\\'; /** Utility class: do not instantiate. */ private Escaper() { } /** * The quoting style to use when escaping. */ public static enum Quoter { SINGLE { @Override public String quote(String str) { return '\'' + str.replace("\'", "'\\''") + '\''; } }, DOUBLE { @Override public String quote(String str) { return '"' + str.replace("\"", "\\\"") + '"'; } }, DOUBLE_WINDOWS_JAVAC { @Override public String quote(String str) { return '"' + str.replace("\\", "\\\\") + '"'; } }; /** * @return the string with this quoting style applied. */ public abstract String quote(String str); } /** * Escapes the special characters identified the {@link CharMatcher}, using single quotes. * @param matcher identifies characters to be escaped * @param str string to quote * @return possibly quoted string */ public static String escape(Quoter quoter, CharMatcher matcher, String str) { if (matcher.matchesAnyOf(str) || str.isEmpty()) { return quoter.quote(str); } else { return str; } } /** * @return a escaper function using the given quote style and escaping characters determined * by the given matcher. */ public static Function<String, String> escaper(final Quoter quoter, final CharMatcher matcher) { return input -> escape(quoter, matcher, input); } public static Function<String, String> javacEscaper() { if (Platform.detect() == Platform.WINDOWS) { return Escaper.escaper(Quoter.DOUBLE_WINDOWS_JAVAC, CharMatcher.anyOf("#'").or(CharMatcher.whitespace())); } else { return Escaper.escaper(Escaper.Quoter.DOUBLE, CharMatcher.anyOf("#\"'").or(CharMatcher.whitespace())); } } private static final CharMatcher BASH_SPECIAL_CHARS = CharMatcher.anyOf("<>|!?*[]$\\(){}\"'`&;=") .or(CharMatcher.whitespace()); /** * Bash quoting {@link com.google.common.base.Function Function} which can be passed to * {@link com.google.common.collect.Iterables#transform Iterables.transform()}. */ public static final Function<String, String> BASH_ESCAPER = escaper(Quoter.SINGLE, BASH_SPECIAL_CHARS); /** * CreateProcess (Windows) quoting {@link com.google.common.base.Function Function} which can be * passed to {@link com.google.common.collect.Iterables#transform Iterables.transform()}. */ public static final Function<String, String> CREATE_PROCESS_ESCAPER = WindowsCreateProcessEscape::quote; /** * Platform-aware shell quoting {@link com.google.common.base.Function Function} which can be * passed to {@link com.google.common.collect.Iterables#transform Iterables.transform()} * TODO(sdwilsh): get proper cmd.EXE escaping implemented on Windows */ public static final Function<String, String> SHELL_ESCAPER = Platform.detect() == Platform.WINDOWS ? CREATE_PROCESS_ESCAPER : BASH_ESCAPER; /** * Escaper for argfiles for clang and gcc. * * Based on the following docs in the gcc manual: * * {@literal @file} * Read command-line options from file. The options read are inserted * in place of the original {@literal @file} option. If file does not exist, or * cannot be read, then the option will be treated literally, and not * removed. * * Options in file are separated by whitespace. A whitespace * character may be included in an option by surrounding the entire * option in either single or double quotes. Any character (including * a backslash) may be included by prefixing the character to be * included with a backslash. The file may itself contain additional * {@literal @file} options; any such options will be processed recursively. */ public static final Function<String, String> ARGFILE_ESCAPER = escaper(Quoter.DOUBLE, CharMatcher.anyOf("\"\\").or(CharMatcher.whitespace())); /** * Quotes a string to be passed to the shell, if necessary. This works for the appropriate shell * regardless of the platform it is run on. * @param str string to escape * @return possibly escaped string */ public static String escapeAsShellString(String str) { return SHELL_ESCAPER.apply(str); } /** * Quotes a string to be passed to Bash, if necessary. Uses single quotes to prevent variable * expansion, `...` evaluation etc. * @param str string to quote * @return possibly quoted string */ public static String escapeAsBashString(String str) { if (Platform.detect() == Platform.WINDOWS) { return CREATE_PROCESS_ESCAPER.apply(str); } else { return escape(Quoter.SINGLE, BASH_SPECIAL_CHARS, str); } } public static String escapeAsBashString(Path path) { return escapeAsBashString(path.toString()); } // Adapted from org.apache.commons.lang.StringEscapeUtils /** * @return a double-quoted string with metacharacters and quotes escaped with * a backslash; non-ASCII characters escaped as \u */ public static String escapeAsPythonString(String str) { StringBuilder builder = new StringBuilder(); builder.append('"'); for (Character ch : str.toCharArray()) { // Handle Unicode. if (ch > 0xfff) { builder.append("\\u" + hex(ch)); } else if (ch > 0xff) { builder.append("\\u0" + hex(ch)); } else if (ch > 0x7f) { builder.append("\\u00" + hex(ch)); } else if (ch < 32) { switch (ch) { case '\b': builder.append('\\'); builder.append('b'); break; case '\n': builder.append('\\'); builder.append('n'); break; case '\t': builder.append('\\'); builder.append('t'); break; case '\f': builder.append('\\'); builder.append('f'); break; case '\r': builder.append('\\'); builder.append('r'); break; default: if (ch > 0xf) { builder.append("\\u00" + hex(ch)); } else { builder.append("\\u000" + hex(ch)); } break; } } else { switch (ch) { case '\'': builder.append('\\'); builder.append('\''); break; case '"': builder.append('\\'); builder.append('"'); break; case '\\': builder.append('\\'); builder.append('\\'); break; default: builder.append(ch); break; } } } builder.append('"'); return builder.toString(); } private static boolean shouldEscapeMakefileString(String escapees, String blob, int index) { Preconditions.checkArgument(blob.length() > index); for (int i = index; i < blob.length(); i++) { if (escapees.indexOf(blob.charAt(i)) != -1) { return true; } if (blob.charAt(i) != MAKEFILE_ESCAPE_CHAR) { return false; } } return true; } private static String escapeAsMakefileString(String escapees, String str) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < str.length(); i++) { if (shouldEscapeMakefileString(escapees, str, i)) { builder.append(MAKEFILE_ESCAPE_CHAR); } builder.append(str.charAt(i)); } return builder.toString().replace("$", "$$"); } /** * @return an escaped string suitable for use in a GNU makefile on the right side of a variable * assignment. */ public static String escapeAsMakefileValueString(String str) { return escapeAsMakefileString("#", str); } /** * Escapes forward slashes in a Path as a String that is safe to consume with other tools (such * as gcc). On Unix systems, this is equivalent to {@link java.nio.file.Path Path.toString()}. * @param path the Path to escape * @return the escaped Path */ public static String escapePathForCIncludeString(Path path) { if (File.separatorChar != '\\') { return path.toString(); } StringBuilder result = new StringBuilder(); if (path.startsWith(File.separator)) { result.append("\\\\"); } for (Iterator<Path> iterator = path.iterator(); iterator.hasNext();) { result.append(iterator.next()); if (iterator.hasNext()) { result.append("\\\\"); } } if (path.getNameCount() > 0 && path.endsWith(File.separator)) { result.append("\\\\"); } return result.toString(); } @VisibleForTesting static String hex(char ch) { return Integer.toHexString(ch).toUpperCase(); } /** * Unescape a path string obtained from preprocessor output, as in: * <a href="https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html"> * https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html * </a>. * @see #decodeNumericEscape(StringBuilder, String, int, int, int, int) */ public static String unescapeLineMarkerPath(String escaped) { StringBuilder ret = new StringBuilder(); for (int i = 0; i < escaped.length(); /* i incremented below */) { // consume character, advance index char c = escaped.charAt(i); i++; if (c != '\\') { ret.append(c); } else { // So sayeth the GCC docs: // "filename will never contain any non-printing characters; they are replaced with octal // escape sequences." -- https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html if (i >= escaped.length()) { throw new IllegalArgumentException("malformed string: ends with single backslash"); } c = escaped.charAt(i); // peek (don't consume) next char switch (c) { // standard escapes: http://en.cppreference.com/w/cpp/language/escape case '\\': case '"': case '\'': case '?': ret.append(c); i++; // consume it break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': i = decodeNumericEscape(ret, escaped, i, /*maxCodeLength=*/3, /*base*/8); break; case 'x': i = decodeNumericEscape(ret, escaped, i, /*maxCodeLength=*/2, /*base*/16); break; case 'u': i = decodeNumericEscape(ret, escaped, i, /*maxCodeLength=*/4, /*base*/16, /*maxCodes*/2); break; default: throw new IllegalArgumentException("malformed string: bad char in escape seq: " + c); } } } return ret.toString(); } /** * Decode a numeric escape as explained in this page: * <a href="http://en.cppreference.com/w/cpp/language/escape"> * http://en.cppreference.com/w/cpp/language/escape * </a>. The pointed-to substring shouldn't contain the leading backslash + optional 'x' or 'u'. * @param out receives decoded characters * @param escaped the string containing the escape sequence * @param pos starting index of escape (but after the backslash) * @param base number base, e.g. 8 for octal, or 16 for hex or unicode. * @param maxCodes maximum number of sequences of escape numbers to decode. This is mainly * to support unicode escape sequences which might represent one or two characters. * @return position to first character just after the consumed numeric code. Is number of * consumed code bytes + {@code pos} argument. */ public static int decodeNumericEscape(StringBuilder out, String escaped, int pos, int maxCodeLength, int base, int maxCodes) { final String table = "0123456789abcdef"; for (int code = 0; code < maxCodes; code++) { char c = 0; boolean valid = false; for (int i = 0; i < maxCodeLength && pos < escaped.length(); i++) { final int digit = table.indexOf(Character.toLowerCase(escaped.charAt(pos))); if (digit == -1 || digit >= base) { break; } // "digit" is a valid value for the digit read, in the range [0, base). // Now we can increment the position, consuming that digit. pos++; c = (char) ((c * base) + digit); valid = true; } if (valid) { out.append(c); } } return pos; } /** * Call {@link #decodeNumericEscape(StringBuilder, String, int, int, int, int)} to * parse at most one escaped character; i.e. calls that method with {@code maxCodes = 1}. */ public static int decodeNumericEscape(StringBuilder out, String escaped, int pos, int maxCodeLength, int base) { return decodeNumericEscape(out, escaped, pos, maxCodeLength, base, 1); } }