/*
* JBoss DNA (http://www.jboss.org/dna)
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership. Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
* See the AUTHORS.txt file in the distribution for a full listing of
* individual contributors.
*
* JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
* is licensed to you under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* JBoss DNA is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.BitSet;
/**
* An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a
* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL ({@link java.net.URLEncoder}
* and {@link java.net.URLDecoder} should be used for such purposes).
*
* @author Randall Hauch
*/
public class UrlEncoder {
/**
* Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and
* lower case letters, decimal digits, and a limited set of punctuation marks and symbols.
*
* <pre>
* unreserved = alphanum | mark
* mark = "-" | "_" | "." | "!" | "˜" | "*" | "'" | "(" | ")"
* </pre>
*
* Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI
* is being used in a context that does not allow the unescaped character to appear.
*/
private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256);
private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
public static final char ESCAPE_CHARACTER = '%';
static {
RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1);
RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1);
RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1);
RFC2396_UNRESERVED_CHARACTERS.set('-');
RFC2396_UNRESERVED_CHARACTERS.set('_');
RFC2396_UNRESERVED_CHARACTERS.set('.');
RFC2396_UNRESERVED_CHARACTERS.set('!');
RFC2396_UNRESERVED_CHARACTERS.set('~');
RFC2396_UNRESERVED_CHARACTERS.set('*');
RFC2396_UNRESERVED_CHARACTERS.set('\'');
RFC2396_UNRESERVED_CHARACTERS.set('(');
RFC2396_UNRESERVED_CHARACTERS.set(')');
RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone();
RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/');
}
private boolean slashEncoded = true;
/**
* {@inheritDoc}
*/
public String encode( String text ) {
if (text == null) return null;
if (text.length() == 0) return text;
final BitSet safeChars = isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
final StringBuilder result = new StringBuilder();
final CharacterIterator iter = new StringCharacterIterator(text);
for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
if (safeChars.get(c)) {
// Safe character, so just pass through ...
result.append(c);
} else {
// The character is not a safe character, and must be escaped ...
result.append(ESCAPE_CHARACTER);
result.append(Character.toLowerCase(Character.forDigit(c / 16, 16)));
result.append(Character.toLowerCase(Character.forDigit(c % 16, 16)));
}
}
return result.toString();
}
/**
* {@inheritDoc}
*/
public String decode( String encodedText ) {
if (encodedText == null) return null;
if (encodedText.length() == 0) return encodedText;
final StringBuilder result = new StringBuilder();
final CharacterIterator iter = new StringCharacterIterator(encodedText);
for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
if (c == ESCAPE_CHARACTER) {
boolean foundEscapedCharacter = false;
// Found the first character in a potential escape sequence, so grab the next two characters ...
char hexChar1 = iter.next();
char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE;
if (hexChar2 != CharacterIterator.DONE) {
// We found two more characters, but ensure they form a valid hexadecimal number ...
int hexNum1 = Character.digit(hexChar1, 16);
int hexNum2 = Character.digit(hexChar2, 16);
if (hexNum1 > -1 && hexNum2 > -1) {
foundEscapedCharacter = true;
result.append((char)(hexNum1 * 16 + hexNum2));
}
}
if (!foundEscapedCharacter) {
result.append(c);
if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1);
if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2);
}
} else {
result.append(c);
}
}
return result.toString();
}
/**
* @return slashEncoded
*/
public boolean isSlashEncoded() {
return this.slashEncoded;
}
/**
* @param slashEncoded Sets slashEncoded to the specified value.
* @return this object, for method chaining
*/
public UrlEncoder setSlashEncoded( boolean slashEncoded ) {
this.slashEncoded = slashEncoded;
return this;
}
}