Example usage for java.nio.charset StandardCharsets UTF

Introduction

In this page you can find the example usage for java.nio.charset StandardCharsets UTF_16LE.

Prototype

Charset UTF_16LE

To view the source code for java.nio.charset StandardCharsets UTF_16LE.

Click Source Link

Document

Sixteen-bit UCS Transformation Format, little-endian byte order.

Usage

From source file:Main.java

public static void main(String[] args) throws Exception {
    System.out.println(StandardCharsets.UTF_16LE.name());
}

From source file:com.microsoft.azure.management.datalake.store.uploader.StringExtensions.java

/**
 * Finds the index in the given buffer of a newline character, either the first or the last (based on the parameters).
 * If a combined newline (\r\n), the index returned is that of the last character in the sequence.
 *
 * @param buffer The buffer to search in.
 * @param startOffset The index of the first byte to start searching at.
 * @param length The number of bytes to search, starting from the given startOffset.
 * @param reverse If true, searches from the startOffset down to the beginning of the buffer. If false, searches upwards.
 * @param encoding Indicates the type of encoding to use for the buffered bytes.
 * @param delimiter Optionally indicates the delimiter to consider as the "new line", which MUST BE a single character. If null, the default is '\\r', '\\n' and '\\r\\n'.
 * @return The index of the closest newline character in the sequence (based on direction) that was found. Returns -1 if not found.
 *//* ww w  .  j a  va 2s. c o  m*/
public static int findNewline(byte[] buffer, int startOffset, int length, boolean reverse, Charset encoding,
        String delimiter) {
    if (buffer.length == 0 || length == 0) {
        return -1;
    }

    // define the bytes per character to use
    int bytesPerChar;
    if (encoding.equals(StandardCharsets.UTF_16) || encoding.equals(StandardCharsets.UTF_16BE)
            || encoding.equals(StandardCharsets.UTF_16LE)) {
        bytesPerChar = 2;
    } else if (encoding.equals(StandardCharsets.US_ASCII) || encoding.equals(StandardCharsets.UTF_8)) {
        bytesPerChar = 1;
    } else {
        throw new IllegalArgumentException(
                "Only the following encodings are allowed: UTF-8, UTF-16, UTF-16BE, UTF16-LE and ASCII");
    }

    if (delimiter != null && !StringUtils.isEmpty(delimiter) && delimiter.length() > 1) {
        throw new IllegalArgumentException(
                "The delimiter must only be a single character or unspecified to represent the CRLF delimiter");
    }

    if (delimiter != null && !StringUtils.isEmpty(delimiter)) {
        // convert the byte array back to a String
        int startOfSegment = reverse ? startOffset - length + 1 : startOffset;
        String bytesToString = new String(buffer, startOfSegment, length, encoding);
        if (!bytesToString.contains(delimiter)) {
            // didn't find the delimiter.
            return -1;
        }

        // the index is returned, which is 0 based, so our loop must include the zero case.
        int numCharsToDelim = reverse ? bytesToString.lastIndexOf(delimiter) : bytesToString.indexOf(delimiter);
        int toReturn = 0;
        for (int i = 0; i <= numCharsToDelim; i++) {
            toReturn += Character.toString(bytesToString.charAt(startOfSegment + i)).getBytes(encoding).length;
        }

        // we get the total number of bytes, but we want to return the index (which starts at 0)
        // so we subtract 1 from the total number of bytes to get the final byte index.
        return toReturn - 1;
    }

    //endOffset is a 'sentinel' value; we use that to figure out when to stop searching
    int endOffset = reverse ? startOffset - length : startOffset + length;

    // if we are starting at the end, we need to move toward the front enough to grab the right number of bytes
    startOffset = reverse ? startOffset - (bytesPerChar - 1) : startOffset;

    if (startOffset < 0 || startOffset >= buffer.length) {
        throw new IndexOutOfBoundsException(
                "Given start offset is outside the bounds of the given buffer. In reverse cases, the start offset is modified to ensure we check the full size of the last character");
    }

    // make sure that the length we are traversing is at least as long as a single character
    if (length < bytesPerChar) {
        throw new IllegalArgumentException(
                "length must be at least as long as the length, in bytes, of a single character");
    }

    if (endOffset < -1 || endOffset > buffer.length) {
        throw new IndexOutOfBoundsException(
                "Given combination of startOffset and length would execute the search outside the bounds of the given buffer.");
    }

    int bufferEndOffset = reverse ? startOffset : startOffset + length;
    int result = -1;
    for (int charPos = startOffset; reverse ? charPos != endOffset
            : charPos + bytesPerChar - 1 < endOffset; charPos = reverse ? charPos - 1 : charPos + 1) {
        char c;
        if (bytesPerChar == 1) {
            c = (char) buffer[charPos];
        } else {
            String temp = new String(buffer, charPos, bytesPerChar, encoding);
            if (StringUtils.isEmpty(temp)) {
                continue;
            } else {
                c = temp.toCharArray()[0];
            }
        }

        if (isNewline(c, delimiter)) {
            result = charPos + bytesPerChar - 1;
            break;
        }
    }

    if ((delimiter == null || StringUtils.isEmpty(delimiter)) && !reverse
            && result < bufferEndOffset - bytesPerChar) {
        char c;
        if (bytesPerChar == 1) {
            c = (char) buffer[result + bytesPerChar];
        } else {
            String temp = new String(buffer, result + 1, bytesPerChar, encoding);
            if (StringUtils.isEmpty(temp)) {
                // this can occur if the number of bytes for characters in the string result in an empty string (an invalid code for the given encoding)
                // in this case, that means that we are done for the default delimiter.
                return result;
            } else {
                c = temp.toCharArray()[0];
            }
        }

        if (isNewline(c, delimiter)) {
            //we originally landed on a \r character; if we have a \r\n character, advance one position to include that
            result += bytesPerChar;
        }
    }

    return result;
}

From source file:org.sonar.scanner.scan.filesystem.CharsetValidationTest.java

@Test
public void testWithSourceCode() throws IOException, URISyntaxException {
    Path path = Paths.get(this.getClass().getClassLoader()
            .getResource("mediumtest/xoo/sample/xources/hello/HelloJava.xoo").toURI());
    List<String> lines = Files.readAllLines(path, StandardCharsets.UTF_8);
    String text = lines.stream().collect(StringBuffer::new, StringBuffer::append, StringBuffer::append)
            .toString();/*from   ww w  .j  a  v  a2s  .  c  om*/

    byte[] utf8 = encode(text, StandardCharsets.UTF_8);
    byte[] utf16be = encode(text, StandardCharsets.UTF_16BE);
    byte[] utf16le = encode(text, StandardCharsets.UTF_16LE);

    assertThat(charsets.isUTF8(utf8, true).charset()).isEqualTo(StandardCharsets.UTF_8);
    assertThat(charsets.isUTF16(utf16be, true).charset()).isEqualTo(StandardCharsets.UTF_16BE);
    assertThat(charsets.isUTF16(utf16le, true).charset()).isEqualTo(StandardCharsets.UTF_16LE);

    assertThat(charsets.isValidUTF16(utf16be, false)).isTrue();
    assertThat(charsets.isValidUTF16(utf16le, true)).isTrue();
}

From source file:com.buildria.mocking.serializer.JacksonJsonSerializer.java

private JsonEncoding mappingFrom(Charset charset) {
    if (StandardCharsets.UTF_8.equals(charset)) {
        return JsonEncoding.UTF8;
    }//from   w w  w .j  ava 2s  .com
    if (StandardCharsets.UTF_16BE.equals(charset)) {
        return JsonEncoding.UTF16_BE;
    }
    if (StandardCharsets.UTF_16LE.equals(charset)) {
        return JsonEncoding.UTF16_LE;
    }
    if (Charset.forName("UTF-32BE").equals(charset)) {
        return JsonEncoding.UTF32_BE;
    }
    if (Charset.forName("UTF-32LE").equals(charset)) {
        return JsonEncoding.UTF32_LE;
    }
    throw new MockingException("No charset found. " + charset.toString());
}

From source file:org.apache.tika.parser.microsoft.MSOwnerFileParser.java

/**
 * Extracts owner from MS temp file/*  ww  w  .ja  va 2 s  .  co  m*/
 */
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();
    byte[] asciiNameBytes = new byte[ASCII_CHUNK_LENGTH];
    IOUtils.readFully(stream, asciiNameBytes);
    int asciiNameLength = (int) asciiNameBytes[0];//don't need to convert to unsigned int because it can't be that long
    String asciiName = new String(asciiNameBytes, 1, asciiNameLength, StandardCharsets.US_ASCII);
    metadata.set(TikaCoreProperties.MODIFIER, asciiName);

    int unicodeCharLength = stream.read();
    if (unicodeCharLength > 0) {
        stream.read();//zero after the char length
        byte[] unicodeBytes = new byte[unicodeCharLength * 2];
        IOUtils.readFully(stream, unicodeBytes);
        String unicodeName = new String(unicodeBytes, StandardCharsets.UTF_16LE);
        metadata.set(TikaCoreProperties.MODIFIER, unicodeName);
    }
    xhtml.endDocument();
}

From source file:org.sonar.scanner.scan.filesystem.InputFileBuilderTest.java

@Test
public void should_detect_charset_from_BOM() {
    File basedir = new File("src/test/resources/org/sonar/scanner/scan/filesystem/");
    when(fs.baseDir()).thenReturn(basedir);
    when(fs.encoding()).thenReturn(StandardCharsets.US_ASCII);
    when(langDetection.language(any(InputFile.class))).thenReturn("java");
    InputFileBuilder builder = new InputFileBuilder("moduleKey", new PathResolver(), langDetection,
            statusDetection, fs, new MapSettings(), new FileMetadata());

    assertThat(createAndComplete(builder, new File(basedir, "without_BOM.txt")).charset())
            .isEqualTo(StandardCharsets.US_ASCII);
    assertThat(createAndComplete(builder, new File(basedir, "UTF-8.txt")).charset())
            .isEqualTo(StandardCharsets.UTF_8);
    assertThat(createAndComplete(builder, new File(basedir, "UTF-16BE.txt")).charset())
            .isEqualTo(StandardCharsets.UTF_16BE);
    assertThat(createAndComplete(builder, new File(basedir, "UTF-16LE.txt")).charset())
            .isEqualTo(StandardCharsets.UTF_16LE);
    assertThat(createAndComplete(builder, new File(basedir, "UTF-32BE.txt")).charset())
            .isEqualTo(InputFileBuilder.UTF_32BE);
    assertThat(createAndComplete(builder, new File(basedir, "UTF-32LE.txt")).charset())
            .isEqualTo(InputFileBuilder.UTF_32LE);

    try {// w ww.  j  av a  2 s.  com
        createAndComplete(builder, new File(basedir, "non_existing"));
        Assert.fail();
    } catch (IllegalStateException e) {
        assertThat(e.getMessage())
                .isEqualTo("Unable to read file " + new File(basedir, "non_existing").getAbsolutePath());
        assertThat(e.getCause()).isInstanceOf(FileNotFoundException.class);
    }
}

From source file:org.sonar.scanner.scan.filesystem.CharsetValidationTest.java

@Test
public void detectUTF16NewLine() throws CharacterCodingException {
    // the first char will be encoded with a null on the second byte, but we should still detect it due to the new line
    String text = "\uA100" + "\uA212" + "\n";

    byte[] utf16be = encode(text, StandardCharsets.UTF_16BE);
    byte[] utf16le = encode(text, StandardCharsets.UTF_16LE);
    byte[] utf8 = encode(text, StandardCharsets.UTF_8);
    byte[] utf32 = encode(text, Charset.forName("UTF-32LE"));

    System.out.println(Arrays.toString(utf32));

    assertThat(charsets.isUTF16(utf16le, true).charset()).isEqualTo(StandardCharsets.UTF_16LE);
    assertThat(charsets.isUTF16(utf16be, true).charset()).isEqualTo(StandardCharsets.UTF_16BE);
    assertThat(charsets.isUTF16(utf8, true).valid()).isEqualTo(Validation.MAYBE);
    // this will have a double null, so it will be yes or no based on failOnNull
    assertThat(charsets.isUTF16(utf32, true).valid()).isEqualTo(Validation.NO);
    assertThat(charsets.isUTF16(utf32, false).valid()).isEqualTo(Validation.YES);
}

From source file:org.apache.nifi.security.util.crypto.HashService.java

/**
 * Returns a {@link List} of supported {@link Charset}s on this platform. This is not a complete
 * list, as only the charsets in {@link StandardCharsets} are returned to be consistent across
 * JVM instances./*from ww w  . ja  v  a 2  s. co m*/
 *
 * @return the list of charsets
 */
public static List<Charset> getSupportedCharsets() {
    return Arrays.asList(StandardCharsets.US_ASCII, StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8,
            StandardCharsets.UTF_16BE, StandardCharsets.UTF_16LE, StandardCharsets.UTF_16);
}

From source file:org.sonar.scanner.scan.filesystem.MetadataGeneratorTest.java

@Test
public void should_detect_charset_from_BOM() {
    Path basedir = Paths.get("src/test/resources/org/sonar/scanner/scan/filesystem/");

    assertThat(createInputFileWithMetadata(basedir.resolve("without_BOM.txt")).charset())
            .isEqualTo(StandardCharsets.US_ASCII);
    assertThat(createInputFileWithMetadata(basedir.resolve("UTF-8.txt")).charset())
            .isEqualTo(StandardCharsets.UTF_8);
    assertThat(createInputFileWithMetadata(basedir.resolve("UTF-16BE.txt")).charset())
            .isEqualTo(StandardCharsets.UTF_16BE);
    assertThat(createInputFileWithMetadata(basedir.resolve("UTF-16LE.txt")).charset())
            .isEqualTo(StandardCharsets.UTF_16LE);
    assertThat(createInputFileWithMetadata(basedir.resolve("UTF-32BE.txt")).charset())
            .isEqualTo(MetadataGenerator.UTF_32BE);
    assertThat(createInputFileWithMetadata(basedir.resolve("UTF-32LE.txt")).charset())
            .isEqualTo(MetadataGenerator.UTF_32LE);
}

From source file:org.kitodo.production.ldap.LdapUser.java

/**
 * configure LdapUser with User data.//from   w  w  w. j  a  v  a 2  s. com
 *
 * @param user
 *            User object
 * @param inPassword
 *            String
 * @param inUidNumber
 *            String
 */
public void configure(User user, String inPassword, String inUidNumber)
        throws NamingException, NoSuchAlgorithmException {
    MD4 digester = new MD4();
    if (!user.getLdapGroup().getLdapServer().isReadOnly()) {

        if (Objects.nonNull(user.getLdapLogin())) {
            this.ldapLogin = user.getLdapLogin();

        } else {
            this.ldapLogin = user.getLogin();
        }

        LdapGroup ldapGroup = user.getLdapGroup();
        if (Objects.isNull(ldapGroup.getObjectClasses())) {
            throw new NamingException("no objectclass defined");
        }

        prepareAttributes(ldapGroup, user, inUidNumber);

        /*
         * Samba passwords
         */
        /* LanMgr */
        try {
            this.attributes.put("sambaLMPassword", toHexString(lmHash(inPassword)));
        } catch (InvalidKeyException | NoSuchPaddingException | BadPaddingException | IllegalBlockSizeException
                | RuntimeException e) {
            logger.error(e.getMessage(), e);
        }
        /* NTLM */
        byte[] hmm = digester.digest(inPassword.getBytes(StandardCharsets.UTF_16LE));
        this.attributes.put("sambaNTPassword", toHexString(hmm));

        /*
         * Encryption of password und Base64-Enconding
         */

        String passwordEncrytion = ldapGroup.getLdapServer().getPasswordEncryption().getTitle();

        MessageDigest md = MessageDigest.getInstance(passwordEncrytion);
        md.update(inPassword.getBytes(StandardCharsets.UTF_8));
        String encodedDigest = new String(Base64.encodeBase64(md.digest()), StandardCharsets.UTF_8);
        this.attributes.put("userPassword", "{" + passwordEncrytion + "}" + encodedDigest);
    }
}

Example usage for java.nio.charset StandardCharsets UTF_16LE

Introduction

Prototype

Document

Usage