Java tutorial
/* This file is part of Aard Dictionary for Android <http://aarddict.org>. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3 * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License <http://www.gnu.org/licenses/gpl-3.0.txt> * for more details. * * Copyright (C) 2010 Igor Tkach */ package aarddict; import static java.lang.String.format; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.AbstractList; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.WeakHashMap; import java.util.zip.DataFormatException; import java.util.zip.Inflater; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.codehaus.jackson.map.ObjectMapper; import android.net.Uri; import android.util.Log; public final class Volume extends AbstractList<Entry> { static class FormatException extends Exception { public FormatException(String detailMessage) { super(detailMessage); } } public final static class InvalidSignatureException extends FormatException { public InvalidSignatureException() { super("Not a dictionary file"); } }; public final static class InvalidFormatVersionException extends FormatException { public InvalidFormatVersionException() { super("Invalid file format version"); } }; public final static class MetadataTooBigException extends FormatException { public MetadataTooBigException() { super("Metadata is too big"); } }; public final static class CorruptedFileException extends FormatException { public CorruptedFileException() { super("Corrupted file"); } }; private final static String TAG = Volume.class.getName(); final static Charset UTF8 = Charset.forName("utf8"); public Metadata metadata; public Header header; RandomAccessFile file; String sha1sum; private File origFile; private String articleURLTemplate; static ObjectMapper mapper = new ObjectMapper(); static { mapper.getDeserializationConfig() .set(org.codehaus.jackson.map.DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); } public Volume(File file, File cacheDir, Map<UUID, Metadata> knownMeta) throws IOException, FormatException { this.origFile = file; init(new RandomAccessFile(file, "r"), cacheDir, knownMeta); } private void init(RandomAccessFile file, File cacheDir, Map<UUID, Metadata> knownMeta) throws IOException, FormatException { this.file = file; this.header = new Header(file); this.assertFormat(); this.sha1sum = header.sha1sum; if (knownMeta.containsKey(header.uuid)) { this.metadata = knownMeta.get(header.uuid); } else { String uuidStr = header.uuid.toString(); File metadataCacheFile = new File(cacheDir, uuidStr); if (metadataCacheFile.exists()) { try { long t0 = System.currentTimeMillis(); this.metadata = mapper.readValue(metadataCacheFile, Metadata.class); knownMeta.put(header.uuid, this.metadata); Log.d(TAG, format("Loaded meta for %s from cache in %s", metadataCacheFile.getName(), (System.currentTimeMillis() - t0))); } catch (Exception e) { Log.e(TAG, format("Failed to restore meta from cache file %s ", metadataCacheFile.getName()), e); } } if (this.metadata == null) { long t0 = System.currentTimeMillis(); byte[] rawMeta = new byte[(int) header.metaLength]; file.read(rawMeta); String metadataStr = decompress(rawMeta); this.metadata = mapper.readValue(metadataStr, Metadata.class); Log.d(TAG, format("Read meta for in %s", header.uuid, (System.currentTimeMillis() - t0))); knownMeta.put(header.uuid, this.metadata); try { mapper.writeValue(metadataCacheFile, this.metadata); Log.d(TAG, format("Wrote metadata to cache file %s", metadataCacheFile.getName())); } catch (IOException e) { Log.e(TAG, format("Failed to write metadata to cache file %s", metadataCacheFile.getName()), e); } } } initArticleURLTemplate(); } private void assertFormat() throws FormatException, IOException { Log.d(TAG, "Checking signature..."); if (!this.header.signature.equals("aard")) { throw new InvalidSignatureException(); } Log.d(TAG, "Checking format version..."); if (this.header.version != 1) { throw new InvalidFormatVersionException(); } Log.d(TAG, "Checking offsets sanity..."); long fileSize = file.length(); if (header.articleOffset > fileSize || header.index1Offset > fileSize || header.index2Offset > fileSize) { throw new CorruptedFileException(); } Log.d(TAG, "Checking metadata length sanity..."); if (header.metaLength > (1 << 23)) { throw new MetadataTooBigException(); } Log.d(TAG, "Sanity check ok"); } public String getId() { return sha1sum; } public UUID getDictionaryId() { return header.uuid; } @Override public int hashCode() { return sha1sum.hashCode(); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; Volume other = (Volume) obj; if (sha1sum == null) { if (other.sha1sum != null) return false; } else if (!sha1sum.equals(other.sha1sum)) return false; return true; } public String toString() { return String.format("%s %s/%s(%s)", this.metadata.title, this.header.volume, this.header.of, this.sha1sum); }; IndexItem readIndexItem(long i) throws IOException { Header h = this.header; long pos = h.index1Offset + i * h.index1ItemSize; RandomAccessFile f = this.file; f.seek(pos); IndexItem indexItem = new IndexItem(); indexItem.keyPointer = f.readSpec(h.keyPointerSpec); indexItem.articlePointer = f.readSpec(h.articlePointerSpec); return indexItem; } String readKey(long pointer) throws IOException { Header h = this.header; long pos = h.index2Offset + pointer; RandomAccessFile f = this.file; f.seek(pos); int keyLength = (int) f.readSpec(h.keyLengthSpec); return f.readUTF8(keyLength); } Map<Long, Article> articleCache = new WeakHashMap<Long, Article>(20); Article readArticle(long pointer) throws IOException { Article a = articleCache.get(pointer); if (a != null) return a; Header h = this.header; long pos = h.articleOffset + pointer; RandomAccessFile f = this.file; f.seek(pos); long articleLength = f.readSpec(h.articleLengthSpec); byte[] articleBytes = new byte[(int) articleLength]; f.read(articleBytes); String serializedArticle = decompress(articleBytes); a = Article.fromJsonStr(serializedArticle); a.dictionaryUUID = h.uuid; a.volumeId = h.sha1sum; a.pointer = pointer; articleCache.put(pointer, a); return a; } static Iterator<Entry> EMPTY_ITERATOR = new ArrayList<Entry>().iterator(); Iterator<Entry> lookup(final LookupWord lookupWord, final Comparator<Entry> comparator) { if (lookupWord.isEmpty()) { return EMPTY_ITERATOR; } final String section = lookupWord.section; final Entry lookupEntry = new Entry(this.getId(), lookupWord.word); final int initialIndex = binarySearch(this, lookupEntry, comparator); Iterator<Entry> iterator = new Iterator<Entry>() { int index = initialIndex; Entry nextEntry; { prepareNext(); } private void prepareNext() { if (index < header.indexCount) { Entry matchedEntry = get(index); nextEntry = (0 == comparator.compare(matchedEntry, lookupEntry)) ? matchedEntry : null; index++; } else { nextEntry = null; } } public boolean hasNext() { return nextEntry != null; } public Entry next() { Entry current = nextEntry; current.section = section; prepareNext(); return current; } public void remove() { throw new UnsupportedOperationException(); } }; return iterator; } public String getArticleURL(String title) { String template = getArticleURLTemplate(); if (template != null) { return template.replace("$1", Uri.encode(title)); } return null; } public String getArticleURLTemplate() { return articleURLTemplate; } private void initArticleURLTemplate() { String[] serverAndArticlePath = getServerAndArticlePath(); String server = serverAndArticlePath[0]; String articlePath = serverAndArticlePath[1]; if (server != null && articlePath != null) { if (server.startsWith("//")) { //broken server url in metadata, missing schema, assume http server = "http:" + server; } articleURLTemplate = server + articlePath; } else { Log.d(TAG, "Not enough metadata to generate article url template"); } } @SuppressWarnings("unchecked") private String[] getServerAndArticlePath() { String[] result = new String[] { null, null }; if (metadata.siteinfo != null) { Map<String, Object> general = (Map<String, Object>) this.metadata.siteinfo.get("general"); if (general != null) { Object server = general.get("server"); Object articlePath = general.get("articlepath"); if (server != null) result[0] = server.toString(); if (articlePath != null) result[1] = articlePath.toString(); } } return result; } Map<Integer, Entry> entryCache = new WeakHashMap<Integer, Entry>(100); @Override public Entry get(int index) { Entry entry = entryCache.get(index); if (entry != null) { return entry; } try { IndexItem indexItem = readIndexItem(index); String title = readKey(indexItem.keyPointer); entry = new Entry(this.getId(), title, indexItem.articlePointer); entryCache.put(index, entry); return entry; } catch (IOException e) { throw new RuntimeException(e); } } @Override public int size() { return (int) header.indexCount; } public void close() throws IOException { file.close(); }; static String utf8(byte[] signature) { try { return new String(signature, "UTF-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); return ""; } } static String decompress(byte[] bytes) { String type = null; long t0 = System.currentTimeMillis(); try { String result = decompressZlib(bytes); type = "zlib"; return result; } catch (Exception e1) { try { String result = decompressBz2(bytes); type = "bz2"; return result; } catch (IOException e2) { String result = utf8(bytes); type = "uncompressed"; return result; } } finally { Log.d(TAG, "Decompressed " + type + " in " + (System.currentTimeMillis() - t0)); } } static String decompressZlib(byte[] bytes) throws IOException, DataFormatException { Inflater decompressor = new Inflater(); decompressor.setInput(bytes); ByteArrayOutputStream out = new ByteArrayOutputStream(); try { byte[] buf = new byte[1024]; while (!decompressor.finished()) { int count = decompressor.inflate(buf); out.write(buf, 0, count); } } finally { out.close(); } return utf8(out.toByteArray()); } static String decompressBz2(byte[] bytes) throws IOException { BZip2CompressorInputStream in = new BZip2CompressorInputStream(new ByteArrayInputStream(bytes)); int n = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(bytes.length * 5); byte[] buf = new byte[1024]; try { while (-1 != (n = in.read(buf))) { out.write(buf, 0, n); } } finally { in.close(); out.close(); } return utf8(out.toByteArray()); } static UUID uuid(byte[] data) { long msb = 0; long lsb = 0; assert data.length == 16; for (int i = 0; i < 8; i++) msb = (msb << 8) | (data[i] & 0xff); for (int i = 8; i < 16; i++) lsb = (lsb << 8) | (data[i] & 0xff); return new UUID(msb, lsb); } static <T> int binarySearch(List<? extends T> l, T key, Comparator<? super T> c) { int lo = 0; int hi = l.size(); while (lo < hi) { int mid = (lo + hi) / 2; T midVal = l.get(mid); int cmp = c.compare(midVal, key); if (cmp < 0) { lo = mid + 1; } else { hi = mid; } } return lo; } public CharSequence getDisplayTitle() { return getDisplayTitle(true); } public CharSequence getDisplayTitle(boolean withVolumeNumber) { String title; if (this.metadata.title == null) { title = this.origFile.getName(); } else { title = this.metadata.title; } StringBuilder s = new StringBuilder(title); if (this.metadata.lang != null) { s.append(String.format(" (%s)", this.metadata.lang)); } else { if (this.metadata.sitelang != null) { s.append(String.format(" (%s)", this.metadata.sitelang)); } else { if (this.metadata.index_language != null && this.metadata.article_language != null) { s.append(String.format(" (%s-%s)", this.metadata.index_language, this.metadata.article_language)); } } } if (this.header.of > 1 && withVolumeNumber) s.append(String.format(" Vol. %s", this.header.volume)); return s.toString(); } private static final char[] HEX_DIGITS = "0123456789abcdef".toCharArray(); public static String toHex(byte[] data) { char[] chars = new char[data.length * 2]; for (int i = 0; i < data.length; i++) { chars[i * 2] = HEX_DIGITS[(data[i] >> 4) & 0xf]; chars[i * 2 + 1] = HEX_DIGITS[data[i] & 0xf]; } return new String(chars); } public void verify(VerifyProgressListener listener) throws IOException, NoSuchAlgorithmException { FileInputStream fis = new FileInputStream(origFile); fis.skip(44); byte[] buff = new byte[1 << 16]; MessageDigest m = MessageDigest.getInstance("SHA-1"); int readCount; long totalReadCount = 0; double totalBytes = origFile.length() - 44; boolean proceed = true; while ((readCount = fis.read(buff)) != -1) { m.update(buff, 0, readCount); totalReadCount += readCount; proceed = listener.updateProgress(this, totalReadCount / totalBytes); } fis.close(); if (proceed) { String calculated = Volume.toHex(m.digest()); Log.d(TAG, "calculated: " + calculated + " actual: " + sha1sum); listener.verified(this, calculated.equals(this.sha1sum)); } } }