Java InputStream Read by Charset fromUnicode(String charset, String input)

Here you can find the source of fromUnicode(String charset, String input)

Description

from Unicode

License

Open Source License

Parameter

Parameter Description
charset may be null. If null, no checks for the supported encoding would be performed
input non null

Exception

Parameter Description
UnsupportedOperationException if given charset does not supportcharacters from given text

Declaration

public static String fromUnicode(String charset, String input) throws UnsupportedOperationException 

Method Source Code

//package com.java2s;
/**/*w  ww .  j  a  v a2  s.  c om*/
 * *****************************************************************************
 * Copyright (c) 2009 Andrey Loskutov. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse
 * Public License v1.0 which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html Contributor: Andrey Loskutov -
 * initial API and implementation
 * *****************************************************************************
 */

import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    private static final Pattern UNICODE_PATTERN = Pattern.compile("\\\\u[0-9a-fA-F]{2,4}");

    /**
     *
     * @param charset may be null. If null, no checks for the supported encoding
     * would be performed
     * @param input non null
     * @throws UnsupportedOperationException if given charset does not support
     * characters from given text
     */
    public static String fromUnicode(String charset, String input) throws UnsupportedOperationException {
        StringBuffer ret = new StringBuffer();
        Matcher matcher = UNICODE_PATTERN.matcher(input);
        String error = null;
        while (matcher.find()) {
            try {
                String uniValue = matcher.group().substring(2);
                String newValue = new String(new char[] { (char) Integer.parseInt(uniValue, 16) });
                if (charset != null) {
                    error = canEncode(charset, newValue, uniValue);
                    if (error != null) {
                        break;
                    }
                }
                matcher.appendReplacement(ret, quoteReplacement(newValue));
            } catch (NumberFormatException t) {
                matcher.appendReplacement(ret, quoteReplacement(matcher.group()));
            }
        }
        if (error != null) {
            throw new UnsupportedOperationException(error);
        }
        matcher.appendTail(ret);
        return ret.toString();
    }

    /**
     *
     * @param charset non null
     * @param text non null
     * @param unicodeValue
     * @return null if text could be encoded, error message otherwise
     */
    public static String canEncode(String charset, CharSequence text, String unicodeValue) {
        Charset cs;
        try {
            cs = Charset.forName(charset);
        } catch (IllegalCharsetNameException e) {
            return "Charset name '" + charset + "' is illegal.";
        } catch (UnsupportedCharsetException e) {
            return "Charset '" + charset + "' is not supported.";
        }
        if (cs.canEncode() && cs.newEncoder().canEncode(text)) {
            return null;
        }
        return "Charset '" + charset + "' does not support encoding for \\u" + unicodeValue + ".";
    }

    private static String quoteReplacement(String s) {
        if (s.indexOf('\\') == -1 && s.indexOf('$') == -1) {
            return s;
        }
        int length = s.length();
        StringBuilder sb = new StringBuilder(length + 10);
        for (int i = 0; i < length; i++) {
            char c = s.charAt(i);
            if (c == '\\') {
                sb.append('\\').append('\\');
            } else if (c == '$') {
                sb.append('\\').append('$');
            } else {
                sb.append(c);
            }
        }
        return sb.toString();
    }
}

Related

  1. convertToCharacterSet(byte[] input, Charset fromCharset, Charset toCharSet)
  2. copyToString(InputStream in, Charset charset)
  3. createInput(String s, String charsetName)
  4. createZipInputStream(InputStream inStream, Charset charset)
  5. forceEncoding(String inputString, String targetCharset)
  6. getContentFromInputStream(InputStream in, String charset)
  7. getHeaderLen(FileInputStream in, Charset encoding)
  8. getInputCharset()
  9. getStreamAsString(InputStream source, Charset charset)