Java tutorial
//package com.java2s; //* Licensed Materials - Property of IBM, Miracle A/S, and * import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Map; import java.util.zip.GZIPOutputStream; public class Main { private static final int STRING_NULL = 0; private static final int STRING_FROM_LIST = 1; private static final int STRING_PREFIX_FROM_LIST = 2; private static final int STRING_FROM_BYTES = 3; private static final int STRING_COMPRESSED = 4; private static final int STRING_PREFIX_FROM_LIST_COMPRESSED = 5; public static void writeStringSmart(ByteArrayOutputStream baos, String s, Map<String, Integer> knownStrings) { if (s == null) { baos.write(STRING_NULL); return; } // String from list if (knownStrings.get(s) != null) { baos.write(STRING_FROM_LIST); writeLength(baos, knownStrings.get(s)); return; } // String prefix from list int index = -1; String bestPrefix = ""; for (String prefix : knownStrings.keySet()) { if (s.startsWith(prefix) && prefix.length() >= bestPrefix.length()) { bestPrefix = prefix; index = knownStrings.get(prefix); } } if (index != -1) { String rest = s.substring(bestPrefix.length()); if (worthToCompress(rest)) { baos.write(STRING_PREFIX_FROM_LIST_COMPRESSED); writeLength(baos, index); writeStringCompressed(baos, rest); } else { baos.write(STRING_PREFIX_FROM_LIST); writeLength(baos, index); writeString(baos, rest); } return; } if (worthToCompress(s)) { baos.write(STRING_COMPRESSED); writeStringCompressed(baos, s); return; } // Write out whole string baos.write(STRING_FROM_BYTES); writeString(baos, s); System.err.println("Unlisted string: " + s); } public static void writeLength(ByteArrayOutputStream baos, int len) { if (len < 0) { throw new RuntimeException("Invalid length < 0"); } if (len <= 127) { //MSB = 0 baos.write(len); } else if (len <= 16383) { // MSB = 10 int lowbyte = len % 64; int highbyte = len / 64; baos.write(lowbyte + 128); baos.write(highbyte); } else if (len <= 2097151) { // MSB = 110 int lowbyte = len % 32; int midbyte = (len / 32) % 256; int highbyte = len / 32 / 256; baos.write(lowbyte + 128 + 64); baos.write(midbyte); baos.write(highbyte); } else { throw new RuntimeException("Invalid length > 2^21-1"); } } static boolean worthToCompress(String s) { try { byte[] compressed = getCompressedData(s.getBytes("UTF-8")); int size = compressed.length + sizeOfWriteLength(compressed.length); return size < s.length(); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Problem with encoding", e); } } public static void writeStringCompressed(ByteArrayOutputStream baos, String s) { try { writeCompressedData(baos, s.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Cannot write string", e); } } public static void writeString(ByteArrayOutputStream baos, String s) { try { writeData(baos, s.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Cannot write string", e); } } static byte[] getCompressedData(byte[] data) { ByteArrayOutputStream ser = new ByteArrayOutputStream(); GZIPOutputStream gs; try { gs = new GZIPOutputStream(ser); gs.write(data); gs.close(); } catch (IOException e) { throw new RuntimeException("Cannot compress data", e); } byte[] compressed = ser.toByteArray(); return compressed; } static int sizeOfWriteLength(int len) { if (len < 0) { throw new RuntimeException("Invalid length < 0"); } if (len <= 127) { return 1; } else if (len <= 16383) { return 2; } else if (len <= 2097151) { return 3; } else { throw new RuntimeException("Invalid length > 2^21-1"); } } public static void writeCompressedData(ByteArrayOutputStream baos, byte[] data) { byte[] compressed = getCompressedData(data); writeLength(baos, data.length); writeData(baos, compressed); } public static void writeData(ByteArrayOutputStream baos, byte[] data) { writeLength(baos, data.length); baos.write(data, 0, data.length); } }