de.berber.kindle.annotator.lib.KindleAnnotationReader.java Source code

Java tutorial

Introduction

Here is the source code for de.berber.kindle.annotator.lib.KindleAnnotationReader.java

Source

/*
 * Copyright 2011, Bernhard J. Berger
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package de.berber.kindle.annotator.lib;

import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;

import org.apache.commons.configuration.CompositeConfiguration;
import org.apache.log4j.Logger;

/**
 * Reader for Kindle annotation files (PDR extension).
 * 
 * @author Bernhard J. Berger
 */
public class KindleAnnotationReader {
    /**
     * Magic file header value
     */
    private static final int MAGIC_VALUE = 0xDEADCABB;

    /**
     * The log instance
     */
    private final static Logger LOG = Logger.getLogger(KindleAnnotationReader.class);

    /**
     * Source of annotations
     */
    private final File pdrFile;

    /**
     * PDR stream
     */
    private FileInputStream fileStream;
    private DataInputStream pdrStream;

    /**
     * Output stream if we are in debug mode.
     */
    private OutputStream debugStream = null;

    /**
     * configuration object
     */
    private final CompositeConfiguration cc;

    /**
     * Creates a new annotation reader for PDR files generated by the kindle
     * device.
     * 
     * @param pdfFile
     *            The pdf file you want to read annotations for.
     */
    public KindleAnnotationReader(final @Nonnull CompositeConfiguration cc, final @Nonnull File pdfFile) {
        assert pdfFile.toString().endsWith(".pdf");

        pdrFile = new File(pdfFile.toString().substring(0, pdfFile.toString().length() - 1) + "r");
        this.cc = cc;

        if (!pdrFile.exists()) {
            LOG.error("Cannot find PDR-file for " + pdfFile);
        }

        if (isDebuggingEnabled()) {
            try {
                debugStream = new FileOutputStream(pdfFile.toString() + ".log");
            } catch (final FileNotFoundException e) {
                debugStream = null;
            }
        }
    }

    /**
     * Checks if debugging is enabled.
     */
    private boolean isDebuggingEnabled() {
        return cc.getBoolean("dumpDebugFile", false);
    }

    /**
     * Reads the pdr file and extracts all annotation information.
     * 
     * @return A list of annotations.
     */
    public @Nonnull List<Annotation> read() {
        final List<Annotation> result = new LinkedList<Annotation>();

        if (!pdrFile.exists()) {
            return result;
        }

        if (!pdrFile.canRead()) {
            LOG.error("Cannnot read PDR-file " + pdrFile);
            return result;
        }

        try {
            fileStream = new FileInputStream(pdrFile);
            pdrStream = new DataInputStream(fileStream);

            final int magic = readUnsigned32();
            if (magic != MAGIC_VALUE) {
                LOG.error("Magic file header is wrong " + Integer.toHexString(magic));
                return result;
            }

            writeDebug("[Magic String]\n");

            skipBytes(1);
            @SuppressWarnings("unused")
            int lastOpenedPage = readUnsigned32();
            writeDebug("\n[Last opened page]\n");

            int numberOfBookmarks = readUnsigned32();
            LOG.info("Number of bookmarks " + numberOfBookmarks);

            for (int i = 0; i < numberOfBookmarks; ++i) {
                skipBytes(1); // skipping unknown data
                int page = pdrStream.readInt(); // reading page number
                writeDebug(" [page]");
                readPascalString(); // page name
                writeDebug(" [page name]\n");

                result.add(new Bookmark(cc, page));
            }

            skipBytes(20); // skipping unknown data

            final int numberOfMarkings = pdrStream.readInt();
            LOG.info("Number of markings " + numberOfMarkings);
            writeDebug("\n[Number of markings " + numberOfMarkings + "]\n");

            for (int i = 0; i < numberOfMarkings; ++i) {
                // read start
                skipBytes(1); // skipping unknown data
                int page1 = pdrStream.readInt(); // reading page number
                writeDebug(" [page]");
                readPascalString(); // page name
                writeDebug(" [page name]");
                readPascalString(); // skipping pdfloc entry
                writeDebug(" [pdfloc] ");
                writeDebug("[" + pdrStream.readFloat() + "]");
                // skipBytes(4); // skipping unknown data
                double x1 = pdrStream.readDouble(), // start x
                        y1 = pdrStream.readDouble(); // start y
                writeDebug(" [x1]");
                writeDebug(" [y1]");

                // read end
                int page2 = pdrStream.readInt(); // reading page number
                writeDebug(" [page]");
                readPascalString(); // page name
                writeDebug(" [page name]");
                readPascalString(); // skipping pdfloc entry
                writeDebug(" [pdfloc] ");
                writeDebug("[" + pdrStream.readFloat() + "]");
                // qskipBytes(4); // skipping unknown data
                double x2 = pdrStream.readDouble(), // end x
                        y2 = pdrStream.readDouble(); // end y
                writeDebug(" [x2]");
                writeDebug(" [y2] ");
                skipBytes(2); // skipping unknown data
                writeDebug("\n");

                result.add(new Marking(cc, page1, x1, y1, page2, x2, y2));
            }

            int numberOfComments = pdrStream.readInt();
            LOG.info("Number of comments " + numberOfComments);
            writeDebug("\n[Number of comments " + numberOfComments + "]\n");

            for (int i = 0; i < numberOfComments; ++i) {
                skipBytes(1); // skipping unknown data
                int page = pdrStream.readInt(); // reading page number
                writeDebug(" [page]");
                readPascalString(); // page name
                writeDebug(" [page name]");
                double x = pdrStream.readDouble(), // reading x
                        y = pdrStream.readDouble(); // reading y
                writeDebug(" [x]");
                writeDebug(" [y]");

                readPascalString(); // skipping pdfloc entry
                writeDebug(" [pdfloc]");
                String content = readPascalString(); // reading comment
                writeDebug(" [content]\n");

                result.add(new Comment(cc, page, x, y, content));
            }

            int finalEntry = readUnsigned32();

            writeDebug("\n[Final entry " + finalEntry + "]");

            LOG.info("Number of available bytes " + pdrStream.available());
        } catch (FileNotFoundException e) {
            LOG.error("Cannot find pdr-file " + pdrFile);
        } catch (IOException e) {
            LOG.error("IO error occured while reading " + pdrFile);
        } finally {
            closePdrStream();
            closeDebugStream();
        }

        mergeAnnotations(result);

        return result;
    }

    /**
     * Merge comments and markings at the same position.
     * 
     * @param annotations
     *            List of annotations
     */
    private void mergeAnnotations(final @Nonnull List<Annotation> annotations) {
        final Map<Integer, Map<Double, Map<Double, List<Comment>>>> comments = new HashMap<Integer, Map<Double, Map<Double, List<Comment>>>>();

        // fill the map Map<Page, Map<xPosition, Map<yPosition, List<Comment>>>>
        for (final Annotation annotation : annotations) {
            if (!(annotation instanceof Comment)) {
                continue;
            }

            final Comment comment = (Comment) annotation;

            // get or create page map for comment
            Map<Double, Map<Double, List<Comment>>> pageMap = comments.get(comment.getPage());
            if (pageMap == null) {
                pageMap = new HashMap<Double, Map<Double, List<Comment>>>();
                comments.put(comment.getPage(), pageMap);
            }

            // get or create x position map for comment within page
            Map<Double, List<Comment>> xFactorMap = pageMap.get(comment.getXPositionFactor());
            if (xFactorMap == null) {
                xFactorMap = new HashMap<Double, List<Comment>>();
                pageMap.put(comment.getXPositionFactor(), xFactorMap);
            }

            // get or create y position list for comment within x map
            List<Comment> yFactorList = xFactorMap.get(comment.getYPositionFactor());
            if (yFactorList == null) {
                yFactorList = new LinkedList<Comment>();
                xFactorMap.put(comment.getYPositionFactor(), yFactorList);
            }

            // add the comment
            yFactorList.add(comment);
        }

        // if we merge two annotations we have to kill one. These will be stored
        // temporarily in the killSet.
        final List<Annotation> killSet = new LinkedList<Annotation>();

        // match markings with already collected comments
        for (final Annotation annotation : annotations) {
            if (!(annotation instanceof Marking)) {
                continue;
            }

            final Marking marking = (Marking) annotation;

            try {
                final List<Comment> commentList = comments.get(marking.getPage())
                        .get(marking.getRightXPositionFactor()).get(marking.getUpperYPositionFactor());
                final Comment comment = commentList.remove(0);

                marking.addComment(comment); // merge them
                killSet.add(comment); // remove it later
            } catch (NullPointerException e) {
                // no matching element found
            }
        }

        // remove all matched comments
        annotations.removeAll(killSet);
    }

    /**
     * Close all PDR related file streams
     */
    private void closePdrStream() {
        try {
            if (pdrStream != null) {
                pdrStream.close();
            }
        } catch (IOException e) {
            LOG.warn("Cannot close pdr stream.");
        }

        try {
            if (fileStream != null) {
                fileStream.close();
            }
        } catch (IOException e) {
            LOG.warn("Cannot close pdr file stream.");
        }
    }

    /**
     * Close debug stream if necessary.
     */
    private void closeDebugStream() {
        if (debugStream != null) {
            try {
                debugStream.close();
            } catch (IOException e) {
                LOG.warn("Error while closing debug stream");
            }
        }
    }

    /**
     * Writes a message to the debug stream if it exists.
     * 
     * @param message The message to write.
     */
    private void writeDebug(final String message) {
        if (debugStream != null) {
            try {
                debugStream.write(message.getBytes());
            } catch (IOException e) {
                LOG.warn("Error while writing debug log");
            }
        }
    }

    /**
     * Read a pascal string from input stream. A pascal stream is a string
     *   length marker followed by the content.
     *   
     * @return A string read from input
     * 
     * @throws IOException If there is an IO error.
     */
    private @Nonnull String readPascalString() throws IOException {
        int length = pdrStream.readShort();
        byte[] rawString = new byte[length];

        pdrStream.readFully(rawString);
        return new String(rawString);
    }

    /**
     * Skips n bytes from input and logs them to the debug stream.
     * 
     * @param byteCount Number of bytes to skip.
     * 
     * @throws IOException If there is an IO error.
     */
    private void skipBytes(final @Nonnegative int byteCount) throws IOException {
        // read data to a temporary buffer we will discard
        byte skippedData[] = new byte[byteCount];
        pdrStream.readFully(skippedData);

        // if there is a debug stream we will print each bytes hex value
        if (debugStream != null) {
            boolean first = true;
            for (int index = 0; index < skippedData.length; ++index) {
                if (first) {
                    first = false;
                } else {
                    debugStream.write(" ".getBytes());
                }

                String hexString = Integer.toHexString(skippedData[index]);
                if (hexString.length() == 1) {
                    hexString = "0" + hexString;
                } else if (hexString.startsWith("ffffff")) {
                    hexString = hexString.substring(6, hexString.length());
                }
                debugStream.write(hexString.getBytes());
            }
        }
    }

    /**
     * Read a unsigned 32 Bit int.
     */
    private int readUnsigned32() throws IOException {
        return pdrStream.readInt();
    }
}