Count the number of bytes included in the given char[].
import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
/**
* Various string manipulation methods that are more efficient then chaining
* string operations: all is done in the same buffer without creating a bunch of
* string objects.
*
* @author <a href="mailto:dev@labs.apache.org">Dungeon Project</a>
*/
public class Main {
private static final int CHAR_ONE_BYTE_MASK = 0xFFFFFF80;
private static final int CHAR_TWO_BYTES_MASK = 0xFFFFF800;
private static final int CHAR_THREE_BYTES_MASK = 0xFFFF0000;
private static final int CHAR_FOUR_BYTES_MASK = 0xFFE00000;
private static final int CHAR_FIVE_BYTES_MASK = 0xFC000000;
private static final int CHAR_SIX_BYTES_MASK = 0x80000000;
/**
* Count the number of bytes included in the given char[].
*
* @param chars
* The char array to decode
* @return The number of bytes in the char array
*/
public static final int countBytes( char[] chars )
{
if ( chars == null )
{
return 0;
}
int nbBytes = 0;
int currentPos = 0;
while ( currentPos < chars.length )
{
int nbb = countNbBytesPerChar( chars[currentPos] );
// If the number of bytes necessary to encode a character is
// above 3, we will need two UTF-16 chars
currentPos += ( nbb < 4 ? 1 : 2 );
nbBytes += nbb;
}
return nbBytes;
}
/**
* Return the number of bytes that hold an Unicode char.
*
* @param car
* The character to be decoded
* @return The number of bytes to hold the char. TODO : Should stop after
* the third byte, as a char is only 2 bytes long.
*/
public static final int countNbBytesPerChar( char car )
{
if ( ( car & CHAR_ONE_BYTE_MASK ) == 0 )
{
return 1;
}
else if ( ( car & CHAR_TWO_BYTES_MASK ) == 0 )
{
return 2;
}
else if ( ( car & CHAR_THREE_BYTES_MASK ) == 0 )
{
return 3;
}
else if ( ( car & CHAR_FOUR_BYTES_MASK ) == 0 )
{
return 4;
}
else if ( ( car & CHAR_FIVE_BYTES_MASK ) == 0 )
{
return 5;
}
else if ( ( car & CHAR_SIX_BYTES_MASK ) == 0 )
{
return 6;
}
else
{
return -1;
}
}
}
Related examples in the same category