Java tutorial
/* * Copyright (C) 2011 Laurent Caillette * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.novelang.build.unicode; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.charset.Charset; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.lang.ClassUtils; import org.apache.commons.lang.SystemUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.novelang.build.CodeGenerationTools; /** * Generates a file containing every Unicode name. * <ul> * <li>First 4 bytes: n, the number of character in the offset table. * <li>n * 4 bytes: the offsets of the names (from the start of the file). * Offsets are 32-bit, unsigned ints. * <li>8-bit characters for names, zero-terminated. * </ul> * * @author Laurent Caillette */ public class UnicodeNamesGenerator { private static final Logger LOGGER = LoggerFactory.getLogger(UnicodeNamesGenerator.class); private final File targetFile; private static final int UNSIGNED_MAX_16BIT = 256 * 256; public UnicodeNamesGenerator(final String packageName, final String namesFile, final File targetDirectory) throws IOException { this.targetFile = CodeGenerationTools.resolveTargetFile(targetDirectory, packageName, namesFile); if (targetFile.getParentFile().mkdirs()) { LOGGER.info("Created '" + targetDirectory.getAbsolutePath() + "'"); } } public void generate() throws IOException { LOGGER.info("About to generate into '" + targetFile.getAbsolutePath() + "'..."); if (targetFile.exists()) { if (targetFile.delete()) { LOGGER.info("Deleted '" + targetFile.getAbsolutePath() + "'"); } } if (!targetFile.createNewFile()) { throw new IOException("Could not create '" + targetFile.getAbsolutePath() + "'"); } LOGGER.info("Loading names..."); final Map<Character, String> characters = new UnicodeNamesTextReader().loadNames(); final OutputStream outputStream = new FileOutputStream(targetFile); LOGGER.info("Generating indexed file..."); generate(new BufferedOutputStream(outputStream, 640 * 1024), characters); outputStream.close(); } /** * Generates the offset table and the names. * * @param outputStream not flushed. * @param characterNames a Map with characters having contiguous codes that start by 0. */ public static void generate(final OutputStream outputStream, final Map<Character, String> characterNames) throws IOException { final Set<Character> characters = characterNames.keySet(); final List<Character> characterList = Lists.newArrayList(characters); Collections.sort(characterList, CHARACTER_COMPARATOR /* Needed? */ ); final int lastCharacterIndex = characterList.get(characterList.size() - 1); generate(outputStream, characterNames, lastCharacterIndex + 1); } /** * Generates the offset table and the names. * * @param outputStream not flushed. * @param characterNames a Map with characters having contiguous codes that start by 0. */ public static void generate(final OutputStream outputStream, final Map<Character, String> characterNames, final int totalCharacterCount) throws IOException { Preconditions.checkArgument(totalCharacterCount <= UNSIGNED_MAX_16BIT); final Map<Integer, Integer> offsetsFromFirstName = Maps.newHashMapWithExpectedSize(totalCharacterCount); final Map<Character, byte[]> characterNamesAsBytes = calculateCharacterNamesAsBytes(characterNames); // Find the offset of the name of each character. int writePositionFromFirstName = 0; int characterCount = 0; for (int characterIndex = 0; characterIndex < totalCharacterCount; characterIndex++) { final Character character = (char) characterIndex; if (characterNames.containsKey(character)) { offsetsFromFirstName.put(characterIndex, writePositionFromFirstName); writePositionFromFirstName += characterNamesAsBytes.get(character).length + // Real length. 1 // Terminal zero. ; characterCount++; } else { offsetsFromFirstName.put(characterIndex, null); } } LOGGER.debug("Found " + characterCount + " characters."); // Write character count. outputStream.write(asBytes(totalCharacterCount)); // Write offsets. final int offsetTableSize = totalCharacterCount * 4; for (int characterIndex = 0; characterIndex < totalCharacterCount; characterIndex++) { final byte[] bytes; final Integer value = offsetsFromFirstName.get(characterIndex); if (value == null) { bytes = ZERO_OFFSET; } else { bytes = asBytes(4 + offsetTableSize + value); } outputStream.write(bytes); } // Write names. for (int characterIndex = 0; characterIndex < totalCharacterCount; characterIndex++) { final byte[] nameBytes = characterNamesAsBytes.get((char) characterIndex); if (nameBytes != null) { outputStream.write(nameBytes); outputStream.write(TERMINAL_ZERO); } } outputStream.flush(); LOGGER.debug("Generation complete."); } /** * Getting bytes only once speeds generation up a lot. */ private static Map<Character, byte[]> calculateCharacterNamesAsBytes( final Map<Character, String> characterNames) { final Map<Character, byte[]> map = Maps.newHashMapWithExpectedSize(characterNames.size()); for (final Map.Entry<Character, String> entry : characterNames.entrySet()) { map.put(entry.getKey(), entry.getValue().replace(' ', '_').getBytes(CHARSET)); } return map; } private static final Charset CHARSET = Charset.forName("UTF-8"); private static final byte[] TERMINAL_ZERO = { 0 }; private static final byte[] ZERO_OFFSET = { 0, 0, 0, 0 }; private static final Comparator<Character> CHARACTER_COMPARATOR = new Comparator<Character>() { @Override public int compare(final Character c1, final Character c2) { return ((int) c1.charValue()) - ((int) c2.charValue()); } }; /*package*/ static byte[] asBytes(final int i) { final byte[] bytes = new byte[4]; bytes[0] = (byte) (i >>> 24); bytes[1] = (byte) (i >>> 16); bytes[2] = (byte) (i >>> 8); bytes[3] = (byte) (i & 0x000000FF); return bytes; } // ============================================== // Main, supports no arg for interactive testing. // ============================================== public static void main(final String[] args) throws IOException { final File targetDirectory; if (args.length == 0) { final File projectDirectory = SystemUtils.USER_DIR.endsWith("idea") ? new File(SystemUtils.USER_DIR).getParentFile() : new File(SystemUtils.USER_DIR); targetDirectory = new File(projectDirectory, "idea/generated/antlr"); } else if (args.length == 1) { targetDirectory = new File(args[0]); } else { throw new IllegalArgumentException( "Usage: " + ClassUtils.getShortClassName(UnicodeNamesGenerator.class) + "[target-directory]"); } new UnicodeNamesGenerator("org.novelang.parser.unicode", "names.bin", targetDirectory).generate(); } }