Android Open Source - PdfParse-android P D F Document






From Project

Back to project page PdfParse-android.

License

The source code is released under:

GNU Lesser General Public License

If you think the Android project PdfParse-android listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.

Java Source Code

/*
 * Copyright (c) 2013 Anton Golinko/*w w w .ja v a  2s .  c  o  m*/
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
 * USA
 */

package org.pdfparse;

import org.pdfparse.cos.*;
import org.pdfparse.exception.EParseError;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;


public class PDFDocument implements ParsingEvent {
    private String filename;
    private String filepath;
    private boolean loaded;

    private ParsingContext context;
    private XRef xref;
    private PDFRawData data;


    private COSReference rootID = null;
    private COSReference infoID = null;

    private COSDictionary encryption = null;

    private PDFDocInfo documentInfo = null;
    private PDFDocCatalog documentCatalog = null;
    private byte[][] documentId = {null,null};
    private boolean documentIsEncrypted = false;
    private float documentVersion = 0.0f;

    public PDFDocument() {
       data = new PDFRawData();
       context = new ParsingContext();
       xref = new XRef(data, context);

       context.objectCache = xref;
    }

    public void close() {
        xref.done();
        data.src = null;
        loaded = false;
    }

    public PDFDocument(String filename) throws EParseError, IOException {
        this();
        File file = new File(filename);
        open(file);
    }

    public PDFDocument(File file) throws EParseError, IOException {
        this();
        open(file);
    }

    public PDFDocument(byte[] buffer) throws EParseError {
        this();

        this.filename = "internal";
        this.filepath = "internal";

        data.src = buffer;
        data.pos = 0;
        data.length = buffer.length;

        open(data);
    }

    private void open(File file) throws EParseError, IOException {
        this.filename = file.getName();
        this.filepath = file.getParent();


        FileInputStream fin = new FileInputStream(file);
        FileChannel channel = fin.getChannel();

        data.src = new byte[(int) file.length()];
        data.pos = 0;
        data.length = (int) file.length();

        ByteBuffer bb = ByteBuffer.wrap(data.src);
        bb.order(ByteOrder.BIG_ENDIAN);
        channel.read(bb);


        open(data);

    }

    private void open(PDFRawData data) throws EParseError {


        if (data.length < 10) {
            throw new EParseError("This is not a valid PDF file");
        }

        // Check the PDF header & version -----------------------
        data.pos = 0;
        if (  !( data.checkSignature(PDFKeywords.PDF_HEADER) || data.checkSignature(PDFKeywords.FDF_HEADER) ) ) {
            if (!context.allowScan)
                throw new EParseError("This is not a PDF file");

            while ( !(data.checkSignature(PDFKeywords.PDF_HEADER) || data.checkSignature(PDFKeywords.FDF_HEADER))
                    && (data.pos < context.headerLookupRange) && (data.pos < data.length) ) data.pos++;

            if (  !(data.checkSignature(PDFKeywords.PDF_HEADER) || data.checkSignature(PDFKeywords.FDF_HEADER)) )
                throw new EParseError("This is not a PDF file (PDF header not found)");
        }

        if (data.length - data.pos < 10)
            throw new EParseError("This is not a valid PDF file");


        if ((data.src[data.pos + 5] != '1') || (data.src[data.pos + 7] < '1') || (data.src[data.pos + 7] > '8')) {
            throw new EParseError("PDF version is not supported");
        }

        documentVersion = (data.src[data.pos + 5] - '0') + (data.src[data.pos + 7] - '0')*0.1f;


        // Scan for EOF -----------------------------------------
        if (data.reverseScan(data.length, PDFKeywords.EOF, context.eofLookupRange) < 0)
            throw new EParseError("Missing end of file marker");

        // Scan for 'startxref' marker --------------------------
        if (data.reverseScan(data.pos, PDFKeywords.STARTXREF, 100) < 0)
            throw new EParseError("Missing 'startxref' marker");


        // Fetch XREF offset ------------------------------------
        data.pos += 10;
        data.skipWS();

        int xref_offset = COSNumber.readInteger(data);

        if ((xref_offset == 0) || (xref_offset >= data.length)) {
            throw new EParseError("Invalid xref offset");
        }

        data.pos = xref_offset;
        xref.parse(data, this);


    }

    public void setErrorHandlingPolicy(int policy) {
        if ((policy < 0) || (policy > 3))
            throw new IllegalArgumentException("Policy should be between 0 and 3");

        context.errorHandlingPolicy = policy;
    }
    public int getErrorHandlingPolicy() {
        return context.errorHandlingPolicy;
    }

    /**
     * Tell if this document is encrypted or not.
     *
     * @return true If this document is encrypted.
     */
    public boolean isEncrypted() {
        return documentIsEncrypted;
    }

    public byte[][] getDocumentId() {
        return documentId;
    }

    /**
     * Get the document info dictionary.  This is guaranteed to not return null.
     *
     * @return The documents /Info dictionary
     */
    public PDFDocInfo getDocumentInfo() throws EParseError {
        if (documentInfo != null)
            return documentInfo;

        COSDictionary dictInfo;
        try {
            dictInfo = xref.getDictionary(infoID.id, infoID.gen, false);
        } catch (EParseError e) {
            if (context.errorHandlingPolicy == ParsingContext.EP_THROW_EXCEPTION)
                throw e;
            dictInfo = null;
        }

        documentInfo = new PDFDocInfo(dictInfo, xref);
        return documentInfo;
    }

    /**
     * This will get the document CATALOG. This is guaranteed to not return null.
     *
     * @return The documents /Root dictionary
     */
    public PDFDocCatalog getDocumentCatalog() throws EParseError {
        if (documentCatalog == null)
        {
            COSDictionary dictRoot;
            dictRoot = xref.getDictionary(rootID, true);

            documentCatalog = new PDFDocCatalog(context, dictRoot);
        }
        return documentCatalog;
    }

    @Override
    public int onTrailerFound(COSDictionary trailer, int ordering) {
        if (ordering == 0) {
            rootID = trailer.getReference(COSName.ROOT);
            infoID = trailer.getReference(COSName.INFO);

            documentIsEncrypted = trailer.containsKey(COSName.ENCRYPT);

            COSArray Ids = trailer.getArray(COSName.ID, null);
            if (((Ids == null) || (Ids.size()!=2)) && documentIsEncrypted)
                throw new EParseError("Missing (required) file identifier for encrypted document");

            if (Ids != null) {
                if (Ids.size() != 2) {
                    if ((context.errorHandlingPolicy == ParsingContext.EP_THROW_EXCEPTION) || documentIsEncrypted)
                        throw new EParseError("Invalid document ID array size (should be 2)");
                    Ids = null;
                } else {
                    if ((Ids.get(0) instanceof COSString) && (Ids.get(1) instanceof COSString)) {
                        documentId[0] = ((COSString)Ids.get(0)).getBinaryValue();
                        documentId[1] = ((COSString)Ids.get(1)).getBinaryValue();
                    } else if (context.errorHandlingPolicy == ParsingContext.EP_THROW_EXCEPTION)
                        throw new EParseError("Invalid document ID");
                }
            } // Ids != null
        }
        return ParsingEvent.CONTINUE;
    }

    @Override
    public int onEncryptionDictFound(COSDictionary enc, int ordering) {
        if (ordering == 0)
            encryption = enc;
        return ParsingEvent.CONTINUE;
    }

    @Override
    public int onNotSupported(String msg) {
        //throw new UnsupportedOperationException("Not supported yet.");
        return ParsingEvent.CONTINUE;
    }

    public void dbgDump() {
        //xref.dbgPrintAll();
        xref.parseAndCacheAll();
        //cache.dbgSaveAllStreams(filepath + File.separator + "[" + filename + "]" );
        //cache.dbgSaveAllObjects(filepath + File.separator + "[" + filename + "]" );

    }
}




Java Source Code List

org.pdfparse.PDFDefines.java
org.pdfparse.PDFDefines.java
org.pdfparse.PDFDocCatalog.java
org.pdfparse.PDFDocCatalog.java
org.pdfparse.PDFDocInfo.java
org.pdfparse.PDFDocInfo.java
org.pdfparse.PDFDocument.java
org.pdfparse.PDFDocument.java
org.pdfparse.PDFKeywords.java
org.pdfparse.PDFKeywords.java
org.pdfparse.PDFPageNode.java
org.pdfparse.PDFPageNode.java
org.pdfparse.PDFPage.java
org.pdfparse.PDFPage.java
org.pdfparse.PDFRawData.java
org.pdfparse.PDFRawData.java
org.pdfparse.PDFRectangle.java
org.pdfparse.PDFRectangle.java
org.pdfparse.ParsingContext.java
org.pdfparse.ParsingContext.java
org.pdfparse.ParsingEvent.java
org.pdfparse.ParsingEvent.java
org.pdfparse.ParsingGetObject.java
org.pdfparse.ParsingGetObject.java
org.pdfparse.XRefEntry.java
org.pdfparse.XRefEntry.java
org.pdfparse.XRef.java
org.pdfparse.XRef.java
org.pdfparse.cos.COSArray.java
org.pdfparse.cos.COSArray.java
org.pdfparse.cos.COSBool.java
org.pdfparse.cos.COSBool.java
org.pdfparse.cos.COSDictionary.java
org.pdfparse.cos.COSDictionary.java
org.pdfparse.cos.COSName.java
org.pdfparse.cos.COSName.java
org.pdfparse.cos.COSNull.java
org.pdfparse.cos.COSNull.java
org.pdfparse.cos.COSNumber.java
org.pdfparse.cos.COSNumber.java
org.pdfparse.cos.COSObject.java
org.pdfparse.cos.COSObject.java
org.pdfparse.cos.COSReference.java
org.pdfparse.cos.COSReference.java
org.pdfparse.cos.COSStream.java
org.pdfparse.cos.COSStream.java
org.pdfparse.cos.COSString.java
org.pdfparse.cos.COSString.java
org.pdfparse.exception.EDateConvertError.java
org.pdfparse.exception.EDateConvertError.java
org.pdfparse.exception.ENotSupported.java
org.pdfparse.exception.ENotSupported.java
org.pdfparse.exception.EParseError.java
org.pdfparse.exception.EParseError.java
org.pdfparse.filter.LZWDecoder.java
org.pdfparse.filter.LZWDecoder.java
org.pdfparse.filter.StreamDecoder.java
org.pdfparse.filter.StreamDecoder.java
org.pdfparse.filter.TIFFLZWDecoder.java
org.pdfparse.filter.TIFFLZWDecoder.java
org.pdfparse.utils.ByteBuffer.java
org.pdfparse.utils.ByteBuffer.java
org.pdfparse.utils.DateConverter.java
org.pdfparse.utils.DateConverter.java
org.pdfparse.utils.IntHashtable.java
org.pdfparse.utils.IntHashtable.java