Java tutorial
/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Ristretto Mail API. * * The Initial Developers of the Original Code are * Timo Stich and Frederik Dietz. * Portions created by the Initial Developers are Copyright (C) 2004 * All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ package org.minig.imap; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.util.Iterator; import java.util.LinkedList; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.codec.binary.Base64; import org.minig.imap.impl.QuotedPrintable; /** * Implementation of EncodedWord en- and decoding methods. <br> * <b>RFC(s):</b> 2047 * * @author Timo Stich <tstich@users.sourceforge.net> */ public class EncodedWord { /** * QuotedPritntable Encoding. Default. */ public static final int QUOTED_PRINTABLE = 0; /** *Base64 Encoding. Should be used to encode 16bit charsets */ public static final int BASE64 = 1; // finds a encoded word wich if of the form // =?charset?encoding(b/g)?encoded text part?= private static final Pattern encodedWordPattern = Pattern.compile("=\\?([^?]+)\\?([bBqQ])\\?([^?]+)\\?="); // filters whitespaces private static final Pattern spacePattern = Pattern.compile("\\s*"); // tokenizes a string into words private static final Pattern wordTokenizerPattern = Pattern.compile("\\b([^\\s]+[\\s]*)"); private static final Pattern whitespacePattern = Pattern.compile(" "); /** * Decodes a string that contains EncodedWords. * * @param input * a string containing EncodedWords * @return the decoded string */ public static StringBuilder decode(CharSequence input) { StringBuilder result = new StringBuilder(input.length()); int lastMatchEnd = 0; Matcher matcher = encodedWordPattern.matcher(input); Charset charset; char type; String encodedPart; while (matcher.find()) { CharSequence inbetween = input.subSequence(lastMatchEnd, matcher.start()); if (!spacePattern.matcher(inbetween).matches()) { result.append(inbetween); } try { charset = Charset.forName(matcher.group(1)); } catch (Throwable e) { charset = Charset.forName("utf-8"); } type = matcher.group(2).toLowerCase().charAt(0); encodedPart = matcher.group(3); if (type == 'q') { encodedPart = encodedPart.replace('_', ' '); // _ are WS and must be converted before normal decoding result.append(QuotedPrintable.decode(encodedPart, charset)); } else { result.append(charset.decode(ByteBuffer.wrap(Base64.decodeBase64(encodedPart)))); } lastMatchEnd = matcher.end(); } result.append(input.subSequence(lastMatchEnd, input.length())); return result; } /** * Takes a text in form of a CharSequence encoded in the given charset (e.g. * ISO-8859-1) and makes it US-ASCII compatible and RFC822 compatible for * the use as e.g. subject with special characters. <br> * This algorithm tries to achieve several goals when decoding: <li>never * encode a single character but try to encode whole words</li> <li>if two * words must be encoded and there a no more than 3 characters inbetween, * encode everything in one single encoded word</li> <li>an encoded word * must never be longer than 76 characters in total</li> <li>ensure that no * encodedWord is in a line-wrap (RFC822 advices to no have more than 78 * characters in a headerline)</li> * * @param input * the headerline * @param charset * the used charset (e.g. ISO-8859-1) * @param type * the encoding to be used * @return input encoded in EncodedWords */ public static StringBuilder encode(CharSequence input, Charset charset, int type) { StringBuilder result = new StringBuilder(input.length()); LinkedList<int[]> words = new LinkedList<int[]>(); String encodedWordPrototype; int maxLength; if (type == QUOTED_PRINTABLE) { encodedWordPrototype = "=?" + charset.displayName() + "?q?"; maxLength = 75 - encodedWordPrototype.length() - 2; } else { encodedWordPrototype = "=?" + charset.displayName() + "?b?"; maxLength = 75 - encodedWordPrototype.length() - 6; } // First find words which need to be encoded Matcher matcher = wordTokenizerPattern.matcher(input); float encodedChar = type == QUOTED_PRINTABLE ? 3.0f : 4.0f / 3.0f; float normalChar = type == QUOTED_PRINTABLE ? 1.0f : 4.0f / 3.0f; while (matcher.find()) { String word = matcher.group(1); float encodedLength = 0.0f; int start = matcher.start(); int end = matcher.end(); boolean mustEncode = false; for (int i = 0; i < word.length(); i++) { if (word.charAt(i) > 127) { encodedLength += encodedChar; mustEncode = true; } else { encodedLength += normalChar; } // Split if too long if (Math.ceil(encodedLength) > maxLength) { words.add(new int[] { start, start + i, maxLength }); word = word.substring(i); start += i; i = 0; encodedLength = 0.0f; mustEncode = false; } } if (mustEncode) words.add(new int[] { start, end, (int) Math.ceil(encodedLength) }); } // No need to create encodedWords if (words.isEmpty()) { return result.append(input); } // Second group them together if possible (see goals above) int[] last = null; for (int i = 0; i < words.size(); i++) { int[] act = words.get(i); if (last != null && (last[2] + act[2] + (act[0] - last[1]) * normalChar < maxLength) && (act[0] - last[1]) < 10) { words.remove(i--); last[1] = act[1]; last[2] += act[2] + (act[0] - last[1]) * normalChar; } else { last = act; } } // Create encodedWords Iterator<int[]> it = words.iterator(); int lastWordEnd = 0; while (it.hasNext()) { int[] act = it.next(); // create encoded part CharSequence rawWord = input.subSequence(act[0], act[1]); CharSequence encodedPart; if (type == QUOTED_PRINTABLE) { // Replace <space> with _ Matcher wsMatcher = whitespacePattern.matcher(rawWord); rawWord = wsMatcher.replaceAll("_"); encodedPart = QuotedPrintable.encode(rawWord, charset); } else { encodedPart = Base64.encodeBase64String(charset.encode(CharBuffer.wrap(rawWord)).array()); } result.append(input.subSequence(lastWordEnd, act[0])); result.append(encodedWordPrototype); result.append(encodedPart); result.append("?="); lastWordEnd = act[1]; } result.append(input.subSequence(lastWordEnd, input.length())); return result; } /** * Remove any comments as defined in RFC2822 from the String. * * @param value * @return the comment-free String */ public static final String removeComments(String value) { final int PLAIN = 0; final int QUOTED = 1; final int COMMENT = 2; StringBuilder result = new StringBuilder(value.length()); int state = PLAIN; int depth = 0; char current; for (int i = 0; i < value.length(); i++) { current = value.charAt(i); switch (current) { case ('\"'): { if (state == COMMENT) break; if (state == QUOTED) state = PLAIN; else state = QUOTED; result.append(current); break; } case ('('): { if (state == QUOTED) { result.append(current); break; } if (state == COMMENT) { depth++; } else { state = COMMENT; depth = 1; } break; } case (')'): { if (state == QUOTED) { result.append(current); break; } if (state == COMMENT) { depth--; if (depth == 0) state = PLAIN; break; } result.append(current); break; } default: { if (state != COMMENT) result.append(current); } } } return result.toString(); } }