Java tutorial
/* * Copyright 2011, Bernhard J. Berger * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.berber.kindle.annotator.lib; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; import org.apache.commons.configuration.CompositeConfiguration; import org.apache.log4j.Logger; /** * Reader for Kindle annotation files (PDR extension). * * @author Bernhard J. Berger */ public class KindleAnnotationReader { /** * Magic file header value */ private static final int MAGIC_VALUE = 0xDEADCABB; /** * The log instance */ private final static Logger LOG = Logger.getLogger(KindleAnnotationReader.class); /** * Source of annotations */ private final File pdrFile; /** * PDR stream */ private FileInputStream fileStream; private DataInputStream pdrStream; /** * Output stream if we are in debug mode. */ private OutputStream debugStream = null; /** * configuration object */ private final CompositeConfiguration cc; /** * Creates a new annotation reader for PDR files generated by the kindle * device. * * @param pdfFile * The pdf file you want to read annotations for. */ public KindleAnnotationReader(final @Nonnull CompositeConfiguration cc, final @Nonnull File pdfFile) { assert pdfFile.toString().endsWith(".pdf"); pdrFile = new File(pdfFile.toString().substring(0, pdfFile.toString().length() - 1) + "r"); this.cc = cc; if (!pdrFile.exists()) { LOG.error("Cannot find PDR-file for " + pdfFile); } if (isDebuggingEnabled()) { try { debugStream = new FileOutputStream(pdfFile.toString() + ".log"); } catch (final FileNotFoundException e) { debugStream = null; } } } /** * Checks if debugging is enabled. */ private boolean isDebuggingEnabled() { return cc.getBoolean("dumpDebugFile", false); } /** * Reads the pdr file and extracts all annotation information. * * @return A list of annotations. */ public @Nonnull List<Annotation> read() { final List<Annotation> result = new LinkedList<Annotation>(); if (!pdrFile.exists()) { return result; } if (!pdrFile.canRead()) { LOG.error("Cannnot read PDR-file " + pdrFile); return result; } try { fileStream = new FileInputStream(pdrFile); pdrStream = new DataInputStream(fileStream); final int magic = readUnsigned32(); if (magic != MAGIC_VALUE) { LOG.error("Magic file header is wrong " + Integer.toHexString(magic)); return result; } writeDebug("[Magic String]\n"); skipBytes(1); @SuppressWarnings("unused") int lastOpenedPage = readUnsigned32(); writeDebug("\n[Last opened page]\n"); int numberOfBookmarks = readUnsigned32(); LOG.info("Number of bookmarks " + numberOfBookmarks); for (int i = 0; i < numberOfBookmarks; ++i) { skipBytes(1); // skipping unknown data int page = pdrStream.readInt(); // reading page number writeDebug(" [page]"); readPascalString(); // page name writeDebug(" [page name]\n"); result.add(new Bookmark(cc, page)); } skipBytes(20); // skipping unknown data final int numberOfMarkings = pdrStream.readInt(); LOG.info("Number of markings " + numberOfMarkings); writeDebug("\n[Number of markings " + numberOfMarkings + "]\n"); for (int i = 0; i < numberOfMarkings; ++i) { // read start skipBytes(1); // skipping unknown data int page1 = pdrStream.readInt(); // reading page number writeDebug(" [page]"); readPascalString(); // page name writeDebug(" [page name]"); readPascalString(); // skipping pdfloc entry writeDebug(" [pdfloc] "); writeDebug("[" + pdrStream.readFloat() + "]"); // skipBytes(4); // skipping unknown data double x1 = pdrStream.readDouble(), // start x y1 = pdrStream.readDouble(); // start y writeDebug(" [x1]"); writeDebug(" [y1]"); // read end int page2 = pdrStream.readInt(); // reading page number writeDebug(" [page]"); readPascalString(); // page name writeDebug(" [page name]"); readPascalString(); // skipping pdfloc entry writeDebug(" [pdfloc] "); writeDebug("[" + pdrStream.readFloat() + "]"); // qskipBytes(4); // skipping unknown data double x2 = pdrStream.readDouble(), // end x y2 = pdrStream.readDouble(); // end y writeDebug(" [x2]"); writeDebug(" [y2] "); skipBytes(2); // skipping unknown data writeDebug("\n"); result.add(new Marking(cc, page1, x1, y1, page2, x2, y2)); } int numberOfComments = pdrStream.readInt(); LOG.info("Number of comments " + numberOfComments); writeDebug("\n[Number of comments " + numberOfComments + "]\n"); for (int i = 0; i < numberOfComments; ++i) { skipBytes(1); // skipping unknown data int page = pdrStream.readInt(); // reading page number writeDebug(" [page]"); readPascalString(); // page name writeDebug(" [page name]"); double x = pdrStream.readDouble(), // reading x y = pdrStream.readDouble(); // reading y writeDebug(" [x]"); writeDebug(" [y]"); readPascalString(); // skipping pdfloc entry writeDebug(" [pdfloc]"); String content = readPascalString(); // reading comment writeDebug(" [content]\n"); result.add(new Comment(cc, page, x, y, content)); } int finalEntry = readUnsigned32(); writeDebug("\n[Final entry " + finalEntry + "]"); LOG.info("Number of available bytes " + pdrStream.available()); } catch (FileNotFoundException e) { LOG.error("Cannot find pdr-file " + pdrFile); } catch (IOException e) { LOG.error("IO error occured while reading " + pdrFile); } finally { closePdrStream(); closeDebugStream(); } mergeAnnotations(result); return result; } /** * Merge comments and markings at the same position. * * @param annotations * List of annotations */ private void mergeAnnotations(final @Nonnull List<Annotation> annotations) { final Map<Integer, Map<Double, Map<Double, List<Comment>>>> comments = new HashMap<Integer, Map<Double, Map<Double, List<Comment>>>>(); // fill the map Map<Page, Map<xPosition, Map<yPosition, List<Comment>>>> for (final Annotation annotation : annotations) { if (!(annotation instanceof Comment)) { continue; } final Comment comment = (Comment) annotation; // get or create page map for comment Map<Double, Map<Double, List<Comment>>> pageMap = comments.get(comment.getPage()); if (pageMap == null) { pageMap = new HashMap<Double, Map<Double, List<Comment>>>(); comments.put(comment.getPage(), pageMap); } // get or create x position map for comment within page Map<Double, List<Comment>> xFactorMap = pageMap.get(comment.getXPositionFactor()); if (xFactorMap == null) { xFactorMap = new HashMap<Double, List<Comment>>(); pageMap.put(comment.getXPositionFactor(), xFactorMap); } // get or create y position list for comment within x map List<Comment> yFactorList = xFactorMap.get(comment.getYPositionFactor()); if (yFactorList == null) { yFactorList = new LinkedList<Comment>(); xFactorMap.put(comment.getYPositionFactor(), yFactorList); } // add the comment yFactorList.add(comment); } // if we merge two annotations we have to kill one. These will be stored // temporarily in the killSet. final List<Annotation> killSet = new LinkedList<Annotation>(); // match markings with already collected comments for (final Annotation annotation : annotations) { if (!(annotation instanceof Marking)) { continue; } final Marking marking = (Marking) annotation; try { final List<Comment> commentList = comments.get(marking.getPage()) .get(marking.getRightXPositionFactor()).get(marking.getUpperYPositionFactor()); final Comment comment = commentList.remove(0); marking.addComment(comment); // merge them killSet.add(comment); // remove it later } catch (NullPointerException e) { // no matching element found } } // remove all matched comments annotations.removeAll(killSet); } /** * Close all PDR related file streams */ private void closePdrStream() { try { if (pdrStream != null) { pdrStream.close(); } } catch (IOException e) { LOG.warn("Cannot close pdr stream."); } try { if (fileStream != null) { fileStream.close(); } } catch (IOException e) { LOG.warn("Cannot close pdr file stream."); } } /** * Close debug stream if necessary. */ private void closeDebugStream() { if (debugStream != null) { try { debugStream.close(); } catch (IOException e) { LOG.warn("Error while closing debug stream"); } } } /** * Writes a message to the debug stream if it exists. * * @param message The message to write. */ private void writeDebug(final String message) { if (debugStream != null) { try { debugStream.write(message.getBytes()); } catch (IOException e) { LOG.warn("Error while writing debug log"); } } } /** * Read a pascal string from input stream. A pascal stream is a string * length marker followed by the content. * * @return A string read from input * * @throws IOException If there is an IO error. */ private @Nonnull String readPascalString() throws IOException { int length = pdrStream.readShort(); byte[] rawString = new byte[length]; pdrStream.readFully(rawString); return new String(rawString); } /** * Skips n bytes from input and logs them to the debug stream. * * @param byteCount Number of bytes to skip. * * @throws IOException If there is an IO error. */ private void skipBytes(final @Nonnegative int byteCount) throws IOException { // read data to a temporary buffer we will discard byte skippedData[] = new byte[byteCount]; pdrStream.readFully(skippedData); // if there is a debug stream we will print each bytes hex value if (debugStream != null) { boolean first = true; for (int index = 0; index < skippedData.length; ++index) { if (first) { first = false; } else { debugStream.write(" ".getBytes()); } String hexString = Integer.toHexString(skippedData[index]); if (hexString.length() == 1) { hexString = "0" + hexString; } else if (hexString.startsWith("ffffff")) { hexString = hexString.substring(6, hexString.length()); } debugStream.write(hexString.getBytes()); } } } /** * Read a unsigned 32 Bit int. */ private int readUnsigned32() throws IOException { return pdrStream.readInt(); } }