org.apache.pdfbox.pdmodel.PDDocument.java Source code

Introduction

Here is the source code for org.apache.pdfbox.pdmodel.PDDocument.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel;

import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdfwriter.COSWriter;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandlerFactory;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;

/**
 * This is the in-memory representation of the PDF document.
 * The #close() method must be called once the document is no longer needed.
 * 
 * @author Ben Litchfield
 */
public class PDDocument implements Closeable {
    private static final Log LOG = LogFactory.getLog(PDDocument.class);

    private final COSDocument document;

    // cached values
    private PDDocumentInformation documentInformation;
    private PDDocumentCatalog documentCatalog;

    // the encryption will be cached here. When the document is decrypted then
    // the COSDocument will not have an "Encrypt" dictionary anymore and this object must be used
    private PDEncryption encryption;

    // holds a flag which tells us if we should remove all security from this documents.
    private boolean allSecurityToBeRemoved;

    // keep tracking customized documentId for the trailer. If null, a new id will be generated
    // this ID doesn't represent the actual documentId from the trailer
    private Long documentId;

    // the pdf to be read
    private final RandomAccessRead pdfSource;

    // the access permissions of the document
    private AccessPermission accessPermission;

    // fonts to subset before saving
    private final Set<PDFont> fontsToSubset = new HashSet<PDFont>();

    // Signature interface
    private SignatureInterface signInterface;

    // document-wide cached resources
    private ResourceCache resourceCache = new DefaultResourceCache();

    /**
     * Creates an empty PDF document.
     * You need to add at least one page for the document to be valid.
     */
    public PDDocument() {
        this(false);
    }

    /**
     * Creates an empty PDF document.
     * You need to add at least one page for the document to be valid.
     *
     * @param useScratchFiles enables the usage of a scratch file if set to true
     */
    public PDDocument(boolean useScratchFiles) {
        this(useScratchFiles, null);
    }

    /**
     * Creates an empty PDF document.
     * You need to add at least one page for the document to be valid.
     *
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     */
    public PDDocument(MemoryUsageSetting memUsageSetting) {
        this(true, memUsageSetting);
    }

    /**
     * Internal constructor which support setting scratch file usage
     * via boolean parameter or directly (new). This will be only needed
     * as long as the new ScratchFile handling is tested.
     * 
     * <p>You need to add at least one page for the document to be valid.</p>
     *
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     */
    private PDDocument(boolean useScratchFiles, MemoryUsageSetting memUsageSetting) {
        ScratchFile scratchFile = null;
        if (memUsageSetting != null) {
            try {
                scratchFile = new ScratchFile(memUsageSetting);
            } catch (IOException ioe) {
                LOG.warn("Error initializing scratch file: " + ioe.getMessage()
                        + ". Fall back to main memory usage only.");
                try {
                    scratchFile = new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly());
                } catch (IOException ioe2) {
                }
            }
        }

        document = scratchFile != null ? new COSDocument(scratchFile) : new COSDocument(useScratchFiles);
        pdfSource = null;

        // First we need a trailer
        COSDictionary trailer = new COSDictionary();
        document.setTrailer(trailer);

        // Next we need the root dictionary.
        COSDictionary rootDictionary = new COSDictionary();
        trailer.setItem(COSName.ROOT, rootDictionary);
        rootDictionary.setItem(COSName.TYPE, COSName.CATALOG);
        rootDictionary.setItem(COSName.VERSION, COSName.getPDFName("1.4"));

        // next we need the pages tree structure
        COSDictionary pages = new COSDictionary();
        rootDictionary.setItem(COSName.PAGES, pages);
        pages.setItem(COSName.TYPE, COSName.PAGES);
        COSArray kidsArray = new COSArray();
        pages.setItem(COSName.KIDS, kidsArray);
        pages.setItem(COSName.COUNT, COSInteger.ZERO);
    }

    /**
     * This will add a page to the document. This is a convenience method, that will add the page to the root of the
     * hierarchy and set the parent of the page to the root.
     * 
     * @param page The page to add to the document.
     */
    public void addPage(PDPage page) {
        getPages().add(page);
    }

    /**
     * Add a signature.
     * 
     * @param sigObject is the PDSignatureField model
     * @param signatureInterface is a interface which provides signing capabilities
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException {
        addSignature(sigObject, signatureInterface, new SignatureOptions());
    }

    /**
     * This will add a signature to the document.
     * 
     * @param sigObject is the PDSignatureField model
     * @param signatureInterface is a interface which provides signing capabilities
     * @param options signature options
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface, SignatureOptions options)
            throws IOException {
        // Reserve content
        // We need to reserve some space for the signature. Some signatures including
        // big certificate chain and we need enough space to store it.
        int preferedSignatureSize = options.getPreferedSignatureSize();
        if (preferedSignatureSize > 0) {
            sigObject.setContents(new byte[preferedSignatureSize]);
        } else {
            sigObject.setContents(new byte[0x2500]);
        }

        // Reserve ByteRange
        sigObject.setByteRange(new int[] { 0, 1000000000, 1000000000, 1000000000 });

        signInterface = signatureInterface;

        //
        // Create SignatureForm for signature
        // and appending it to the document
        //

        // Get the first page
        PDDocumentCatalog catalog = getDocumentCatalog();
        int pageCount = catalog.getPages().getCount();
        if (pageCount == 0) {
            throw new IllegalStateException("Cannot sign an empty document");
        }

        int startIndex = Math.min(Math.max(options.getPage(), 0), pageCount - 1);
        PDPage page = catalog.getPages().get(startIndex);

        // Get the AcroForm from the Root-Dictionary and append the annotation
        PDAcroForm acroForm = catalog.getAcroForm();
        catalog.getCOSObject().setNeedToBeUpdated(true);

        if (acroForm == null) {
            acroForm = new PDAcroForm(this);
            catalog.setAcroForm(acroForm);
        } else {
            acroForm.getCOSObject().setNeedToBeUpdated(true);
        }

        // For invisible signatures, the annotation has a rectangle array with values [ 0 0 0 0 ]. This annotation is
        // usually attached to the viewed page when the signature is created. Despite not having an appearance, the
        // annotation AP and N dictionaries may be present in some versions of Acrobat. If present, N references the
        // DSBlankXObj (blank) XObject.

        // Create Annotation / Field for signature
        List<PDAnnotation> annotations = page.getAnnotations();

        List<PDField> fields = acroForm.getFields();
        if (fields == null) {
            fields = new ArrayList<PDField>();
            acroForm.setFields(fields);
        }
        PDSignatureField signatureField = findSignatureField(fields, sigObject);
        if (signatureField == null) {
            signatureField = new PDSignatureField(acroForm);
            // append the signature object
            signatureField.setValue(sigObject);
            // backward linking
            signatureField.getWidgets().get(0).setPage(page);
        }
        // to conform PDF/A-1 requirement:
        // The /F key's Print flag bit shall be set to 1 and its Hidden, Invisible and NoView flag bits shall be set to 0
        signatureField.getWidgets().get(0).setPrinted(true);

        // Set the AcroForm Fields
        List<PDField> acroFormFields = acroForm.getFields();
        acroForm.getCOSObject().setDirect(true);
        acroForm.setSignaturesExist(true);
        acroForm.setAppendOnly(true);

        boolean checkFields = checkSignatureField(acroFormFields, signatureField);

        // Get the object from the visual signature
        COSDocument visualSignature = options.getVisualSignature();

        // Distinction of case for visual and non-visual signature
        if (visualSignature == null) {
            prepareNonVisibleSignature(signatureField, acroForm);
        } else {
            prepareVisibleSignature(signatureField, acroForm, visualSignature);
        }

        // Get the annotations of the page and append the signature-annotation to it
        // take care that page and acroforms do not share the same array (if so, we don't need to add it twice)
        if (!(annotations instanceof COSArrayList && acroFormFields instanceof COSArrayList
                && ((COSArrayList) annotations).toList().equals(((COSArrayList) acroFormFields).toList())
                && checkFields)) {
            annotations.add(signatureField.getWidgets().get(0));
        }
        page.getCOSObject().setNeedToBeUpdated(true);
    }

    // search acroform field list for signature field with specific signature dictionary
    private PDSignatureField findSignatureField(List<PDField> fields, PDSignature sigObject) {
        PDSignatureField signatureField = null;
        for (PDField pdField : fields) {
            if (pdField instanceof PDSignatureField) {
                PDSignature signature = ((PDSignatureField) pdField).getSignature();
                if (signature != null && signature.getCOSObject().equals(sigObject.getCOSObject())) {
                    signatureField = (PDSignatureField) pdField;
                }
            }
        }
        return signatureField;
    }

    // return true if the field already existed in the field list, in that case, it is marked for update
    private boolean checkSignatureField(List<PDField> acroFormFields, PDSignatureField signatureField) {
        boolean checkFields = false;
        for (PDField field : acroFormFields) {
            if (field instanceof PDSignatureField && field.getCOSObject().equals(signatureField.getCOSObject())) {
                checkFields = true;
                signatureField.getCOSObject().setNeedToBeUpdated(true);
                break;
            }
            // fixme: this code does not check non-terminal fields, there could be a descendant signature
        }
        if (!checkFields) {
            acroFormFields.add(signatureField);
        }
        return checkFields;
    }

    private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm,
            COSDocument visualSignature) {
        // Obtain visual signature object
        boolean annotNotFound = true;
        boolean sigFieldNotFound = true;
        for (COSObject cosObject : visualSignature.getObjects()) {
            if (!annotNotFound && !sigFieldNotFound) {
                break;
            }

            COSBase base = cosObject.getObject();
            if (base instanceof COSDictionary) {
                COSDictionary cosBaseDict = (COSDictionary) base;

                // Search for signature annotation
                COSBase type = cosBaseDict.getDictionaryObject(COSName.TYPE);
                if (annotNotFound && COSName.ANNOT.equals(type)) {
                    assignSignatureRectangle(signatureField, cosBaseDict);
                    annotNotFound = false;
                }

                // Search for signature field
                COSBase ft = cosBaseDict.getDictionaryObject(COSName.FT);
                COSBase apDict = cosBaseDict.getDictionaryObject(COSName.AP);
                if (sigFieldNotFound && COSName.SIG.equals(ft) && apDict != null) {
                    assignAppearanceDictionary(signatureField, cosBaseDict);
                    assignAcroFormDefaultResource(acroForm, cosBaseDict);
                    sigFieldNotFound = false;
                }
            }
        }

        if (annotNotFound || sigFieldNotFound) {
            throw new IllegalArgumentException("Template is missing required objects");
        }
    }

    private void assignSignatureRectangle(PDSignatureField signatureField, COSDictionary cosBaseDict) {
        // Read and set the Rectangle for visual signature
        COSArray rectAry = (COSArray) cosBaseDict.getDictionaryObject(COSName.RECT);
        PDRectangle rect = new PDRectangle(rectAry);
        signatureField.getWidgets().get(0).setRectangle(rect);
    }

    private void assignAppearanceDictionary(PDSignatureField signatureField, COSDictionary dict) {
        // read and set Appearance Dictionary
        PDAppearanceDictionary ap = new PDAppearanceDictionary(
                (COSDictionary) dict.getDictionaryObject(COSName.AP));
        ap.getCOSObject().setDirect(true);
        signatureField.getWidgets().get(0).setAppearance(ap);
    }

    private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary dict) {
        // read and set AcroForm DefaultResource
        COSDictionary dr = (COSDictionary) dict.getDictionaryObject(COSName.DR);
        if (dr != null) {
            dr.setDirect(true);
            dr.setNeedToBeUpdated(true);
            COSDictionary acroFormDict = acroForm.getCOSObject();
            acroFormDict.setItem(COSName.DR, dr);
        }
    }

    private void prepareNonVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm)
            throws IOException {
        // Set rectangle for non-visual signature to rectangle array [ 0 0 0 0 ]
        signatureField.getWidgets().get(0).setRectangle(new PDRectangle());
        // Clear AcroForm / Set DefaultRessource
        acroForm.setDefaultResources(null);
        // Set empty Appearance-Dictionary
        PDAppearanceDictionary ap = new PDAppearanceDictionary();

        // Create empty visual appearance stream
        COSStream apsStream = getDocument().createCOSStream();
        apsStream.createOutputStream().close();
        PDAppearanceStream aps = new PDAppearanceStream(apsStream);
        COSDictionary cosObject = (COSDictionary) aps.getCOSObject();
        cosObject.setItem(COSName.SUBTYPE, COSName.FORM);
        cosObject.setItem(COSName.BBOX, new PDRectangle());

        ap.setNormalAppearance(aps);
        ap.getCOSObject().setDirect(true);
        signatureField.getWidgets().get(0).setAppearance(ap);
    }

    /**
     * This will add a signature field to the document.
     * 
     * @param sigFields are the PDSignatureFields that should be added to the document
     * @param signatureInterface is a interface which provides signing capabilities
     * @param options signature options
     * @throws IOException if there is an error creating required fields
     */
    public void addSignatureField(List<PDSignatureField> sigFields, SignatureInterface signatureInterface,
            SignatureOptions options) throws IOException {
        PDDocumentCatalog catalog = getDocumentCatalog();
        catalog.getCOSObject().setNeedToBeUpdated(true);

        PDAcroForm acroForm = catalog.getAcroForm();
        if (acroForm == null) {
            acroForm = new PDAcroForm(this);
            catalog.setAcroForm(acroForm);
        }
        COSDictionary acroFormDict = acroForm.getCOSObject();
        acroFormDict.setDirect(true);
        acroFormDict.setNeedToBeUpdated(true);
        if (!acroForm.isSignaturesExist()) {
            // 1 if at least one signature field is available
            acroForm.setSignaturesExist(true);
        }

        List<PDField> acroformFields = acroForm.getFields();

        for (PDSignatureField sigField : sigFields) {
            sigField.getCOSObject().setNeedToBeUpdated(true);

            // Check if the field already exists
            checkSignatureField(acroformFields, sigField);

            // Check if we need to add a signature
            if (sigField.getSignature() != null) {
                sigField.getCOSObject().setNeedToBeUpdated(true);
                if (options == null) {
                    // TODO ??
                }
                addSignature(sigField.getSignature(), signatureInterface, options);
            }
        }
    }

    /**
     * Remove the page from the document.
     * 
     * @param page The page to remove from the document.
     */
    public void removePage(PDPage page) {
        getPages().remove(page);
    }

    /**
     * Remove the page from the document.
     * 
     * @param pageNumber 0 based index to page number.
     */
    public void removePage(int pageNumber) {
        getPages().remove(pageNumber);
    }

    /**
     * This will import and copy the contents from another location. Currently the content stream is stored in a scratch
     * file. The scratch file is associated with the document. If you are adding a page to this document from another
     * document and want to copy the contents to this document's scratch file then use this method otherwise just use
     * the addPage method.
     * 
     * @param page The page to import.
     * @return The page that was imported.
     * 
     * @throws IOException If there is an error copying the page.
     */
    public PDPage importPage(PDPage page) throws IOException {
        PDPage importedPage = new PDPage(new COSDictionary(page.getCOSObject()), resourceCache);
        InputStream in = null;
        try {
            in = page.getContents();
            if (in != null) {
                PDStream dest = new PDStream(this, page.getContents(), COSName.FLATE_DECODE);
                importedPage.setContents(dest);
            }
            addPage(importedPage);
        } catch (IOException e) {
            IOUtils.closeQuietly(in);
        }

        return importedPage;
    }

    /**
     * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
     * 
     * @param doc The COSDocument that this document wraps.
     */
    public PDDocument(COSDocument doc) {
        this(doc, null);
    }

    /**
     * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
     * 
     * @param doc The COSDocument that this document wraps.
     * @param source the parser which is used to read the pdf
     */
    public PDDocument(COSDocument doc, RandomAccessRead source) {
        this(doc, source, null);
    }

    /**
     * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
     * 
     * @param doc The COSDocument that this document wraps.
     * @param source the parser which is used to read the pdf
     * @param permission he access permissions of the pdf
     * 
     */
    public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission) {
        document = doc;
        pdfSource = source;
        accessPermission = permission;
    }

    /**
     * This will get the low level document.
     * 
     * @return The document that this layer sits on top of.
     */
    public COSDocument getDocument() {
        return document;
    }

    /**
     * This will get the document info dictionary. This is guaranteed to not return null.
     * 
     * @return The documents /Info dictionary
     */
    public PDDocumentInformation getDocumentInformation() {
        if (documentInformation == null) {
            COSDictionary trailer = document.getTrailer();
            COSDictionary infoDic = (COSDictionary) trailer.getDictionaryObject(COSName.INFO);
            if (infoDic == null) {
                infoDic = new COSDictionary();
                trailer.setItem(COSName.INFO, infoDic);
            }
            documentInformation = new PDDocumentInformation(infoDic);
        }
        return documentInformation;
    }

    /**
     * This will set the document information for this document.
     * 
     * @param info The updated document information.
     */
    public void setDocumentInformation(PDDocumentInformation info) {
        documentInformation = info;
        document.getTrailer().setItem(COSName.INFO, info.getCOSObject());
    }

    /**
     * This will get the document CATALOG. This is guaranteed to not return null.
     * 
     * @return The documents /Root dictionary
     */
    public PDDocumentCatalog getDocumentCatalog() {
        if (documentCatalog == null) {
            COSDictionary trailer = document.getTrailer();
            COSBase dictionary = trailer.getDictionaryObject(COSName.ROOT);
            if (dictionary instanceof COSDictionary) {
                documentCatalog = new PDDocumentCatalog(this, (COSDictionary) dictionary);
            } else {
                documentCatalog = new PDDocumentCatalog(this);
            }
        }
        return documentCatalog;
    }

    /**
     * This will tell if this document is encrypted or not.
     * 
     * @return true If this document is encrypted.
     */
    public boolean isEncrypted() {
        return document.isEncrypted();
    }

    /**
     * This will get the encryption dictionary for this document. This will still return the parameters if the document
     * was decrypted. As the encryption architecture in PDF documents is plugable this returns an abstract class,
     * but the only supported subclass at this time is a
     * PDStandardEncryption object.
     *
     * @return The encryption dictionary(most likely a PDStandardEncryption object)
     */
    public PDEncryption getEncryption() {
        if (encryption == null && isEncrypted()) {
            encryption = new PDEncryption(document.getEncryptionDictionary());
        }
        return encryption;
    }

    /**
     * This will set the encryption dictionary for this document.
     * 
     * @param encryption The encryption dictionary(most likely a PDStandardEncryption object)
     * 
     * @throws IOException If there is an error determining which security handler to use.
     */
    public void setEncryptionDictionary(PDEncryption encryption) throws IOException {
        this.encryption = encryption;
    }

    /**
     * This will return the last signature.
     * 
     * @return the last signature as <code>PDSignatureField</code>.
     * @throws IOException if no document catalog can be found.
     */
    public PDSignature getLastSignatureDictionary() throws IOException {
        List<PDSignature> signatureDictionaries = getSignatureDictionaries();
        int size = signatureDictionaries.size();
        if (size > 0) {
            return signatureDictionaries.get(size - 1);
        }
        return null;
    }

    /**
     * Retrieve all signature fields from the document.
     * 
     * @return a <code>List</code> of <code>PDSignatureField</code>s
     * @throws IOException if no document catalog can be found.
     */
    public List<PDSignatureField> getSignatureFields() throws IOException {
        List<PDSignatureField> fields = new ArrayList<PDSignatureField>();
        PDAcroForm acroForm = getDocumentCatalog().getAcroForm();
        if (acroForm != null) {
            // fixme: non-terminal fields are ignored, could have descendant signatures
            for (PDField field : acroForm.getFields()) {
                if (field instanceof PDSignatureField) {
                    fields.add((PDSignatureField) field);
                }
            }
        }
        return fields;
    }

    /**
     * Retrieve all signature dictionaries from the document.
     * 
     * @return a <code>List</code> of <code>PDSignatureField</code>s
     * @throws IOException if no document catalog can be found.
     */
    public List<PDSignature> getSignatureDictionaries() throws IOException {
        List<PDSignature> signatures = new ArrayList<PDSignature>();
        for (PDSignatureField field : getSignatureFields()) {
            COSBase value = field.getCOSObject().getDictionaryObject(COSName.V);
            if (value != null) {
                signatures.add(new PDSignature((COSDictionary) value));
            }
        }
        return signatures;
    }

    /**
     * Returns the list of fonts which will be subset before the document is saved.
     */
    Set<PDFont> getFontsToSubset() {
        return fontsToSubset;
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file) throws IOException {
        return load(file, "", false);
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, boolean useScratchFiles) throws IOException {
        return load(file, "", null, null, useScratchFiles);
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) throws IOException {
        return load(file, "", null, null, memUsageSetting);
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password) throws IOException {
        return load(file, password, null, null, false);
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, boolean useScratchFiles) throws IOException {
        return load(file, password, null, null, useScratchFiles);
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting)
            throws IOException {
        return load(file, password, null, null, memUsageSetting);
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, InputStream keyStore, String alias)
            throws IOException {
        return load(file, password, keyStore, alias, false);
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, InputStream keyStore, String alias,
            boolean useScratchFiles) throws IOException {
        RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file);
        PDFParser parser = new PDFParser(raFile, password, keyStore, alias, useScratchFiles);
        parser.parse();
        return parser.getPDDocument();
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, InputStream keyStore, String alias,
            MemoryUsageSetting memUsageSetting) throws IOException {
        RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file);
        PDFParser parser = new PDFParser(raFile, password, keyStore, alias, new ScratchFile(memUsageSetting));
        parser.parse();
        return parser.getPDDocument();
    }

    /**
     * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
     * 
     * @param input stream that contains the document.
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input) throws IOException {
        return load(input, "", null, null, false);
    }

    /**
     * Parses a PDF. Depending on the parameter useScratchFiles the given input
     * stream is either copied to the memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, boolean useScratchFiles) throws IOException {
        return load(input, "", null, null, useScratchFiles);
    }

    /**
     * Parses a PDF. Depending on the parameter useScratchFiles the given input
     * stream is either copied to the memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) throws IOException {
        return load(input, "", null, null, memUsageSetting);
    }

    /**
     * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password) throws IOException {
        return load(input, password, null, null, false);
    }

    /**
     * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias)
            throws IOException {
        return load(input, password, keyStore, alias, false);
    }

    /**
     * Parses a PDF. Depending on the parameter useScratchFiles the given input
     * stream is either copied to the memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, boolean useScratchFiles) throws IOException {
        return load(input, password, null, null, useScratchFiles);
    }

    /**
     * Parses a PDF. Depending on the parameter useScratchFiles the given input
     * stream is either copied to the memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, MemoryUsageSetting memUsageSetting)
            throws IOException {
        return load(input, password, null, null, memUsageSetting);
    }

    /**
     * Parses a PDF. Depending on the parameter useScratchFiles the given input
     * stream is either copied to the memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias,
            boolean useScratchFiles) throws IOException {
        RandomAccessRead source;
        if (useScratchFiles) {
            source = new RandomAccessBufferedFileInputStream(input);
        } else {
            source = new RandomAccessBuffer(input);
        }
        PDFParser parser = new PDFParser(source, password, keyStore, alias, useScratchFiles);
        parser.parse();
        return parser.getPDDocument();
    }

    /**
     * Parses a PDF. Depending on the parameter useScratchFiles the given input
     * stream is either copied to the memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias,
            MemoryUsageSetting memUsageSetting) throws IOException {
        ScratchFile scratchFile = new ScratchFile(memUsageSetting);
        RandomAccessRead source = scratchFile.createBuffer(input);
        PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile);
        parser.parse();
        return parser.getPDDocument();
    }

    /**
     * Parses a PDF.
     * 
     * @param input byte array that contains the document.
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(byte[] input) throws IOException {
        return load(input, "");
    }

    /**
     * Parses a PDF.
     * 
     * @param input byte array that contains the document.
     * @param password password to be used for decryption
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(byte[] input, String password) throws IOException {
        return load(input, password, null, null);
    }

    /**
     * Parses a PDF.
     * 
     * @param input byte array that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(byte[] input, String password, InputStream keyStore, String alias)
            throws IOException {
        RandomAccessRead source = new RandomAccessBuffer(input);
        PDFParser parser = new PDFParser(source, password, keyStore, alias, false);
        parser.parse();
        return parser.getPDDocument();
    }

    /**
     * Save the document to a file.
     * 
     * @param fileName The file to save as.
     *
     * @throws IOException if the output could not be written
     */
    public void save(String fileName) throws IOException {
        save(new File(fileName));
    }

    /**
     * Save the document to a file.
     * 
     * @param file The file to save as.
     *
     * @throws IOException if the output could not be written
     */
    public void save(File file) throws IOException {
        save(new FileOutputStream(file));
    }

    /**
     * This will save the document to an output stream.
     * 
     * @param output The stream to write to.
     *
     * @throws IOException if the output could not be written
     */
    public void save(OutputStream output) throws IOException {
        if (document.isClosed()) {
            throw new IOException("Cannot save a document which has been closed");
        }

        // subset designated fonts
        for (PDFont font : fontsToSubset) {
            font.subset();
        }
        fontsToSubset.clear();

        // save PDF
        COSWriter writer = new COSWriter(output);
        try {
            writer.write(this);
            writer.close();
        } finally {
            writer.close();
        }
    }

    /**
      * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a file.
      *
      * @param output stream to write
      * @throws IOException if the output could not be written
      * @throws IllegalStateException if the document was not loaded from a file.
      */
    public void saveIncremental(OutputStream output) throws IOException {
        InputStream input = new RandomAccessInputStream(pdfSource);
        COSWriter writer = null;
        try {
            writer = new COSWriter(output, input);
            writer.write(this, signInterface);
            writer.close();
        } finally {
            if (writer != null) {
                writer.close();
            }
        }
    }

    /**
     * Returns the page at the given index.
     *
     * @param pageIndex the page index
     * @return the page at the given index.
     */
    public PDPage getPage(int pageIndex) // todo: REPLACE most calls to this method with BELOW method
    {
        return getDocumentCatalog().getPages().get(pageIndex);
    }

    // todo: new!
    public PDPageTree getPages() {
        return getDocumentCatalog().getPages();
    }

    /**
     * This will return the total page count of the PDF document.
     * 
     * @return The total number of pages in the PDF document.
     */
    public int getNumberOfPages() {
        return getDocumentCatalog().getPages().getCount();
    }

    /**
     * This will close the underlying COSDocument object.
     * 
     * @throws IOException If there is an error releasing resources.
     */
    @Override
    public void close() throws IOException {
        if (!document.isClosed()) {
            // close all intermediate I/O streams
            document.close();

            // close the source PDF stream, if we read from one
            if (pdfSource != null) {
                pdfSource.close();
            }
        }
    }

    /**
     * Protects the document with a protection policy. The document content will be really
     * encrypted when it will be saved. This method only marks the document for encryption. It also
     * calls {@link #setAllSecurityToBeRemoved(boolean)} with a false argument if it was set to true
     * previously and logs a warning.
     *
     * @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy
     * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy
     *
     * @param policy The protection policy.
     * @throws IOException if there isn't any suitable security handler.
     */
    public void protect(ProtectionPolicy policy) throws IOException {
        if (isAllSecurityToBeRemoved()) {
            LOG.warn("do not call setAllSecurityToBeRemoved(true) before calling protect(), "
                    + "as protect() implies setAllSecurityToBeRemoved(false)");
            setAllSecurityToBeRemoved(false);
        }

        if (!isEncrypted()) {
            encryption = new PDEncryption();
        }

        SecurityHandler securityHandler = SecurityHandlerFactory.INSTANCE.newSecurityHandlerForPolicy(policy);
        if (securityHandler == null) {
            throw new IOException("No security handler for policy " + policy);
        }

        getEncryption().setSecurityHandler(securityHandler);
    }

    /**
     * Returns the access permissions granted when the document was decrypted. If the document was not decrypted this
     * method returns the access permission for a document owner (ie can do everything). The returned object is in read
     * only mode so that permissions cannot be changed. Methods providing access to content should rely on this object
     * to verify if the current user is allowed to proceed.
     * 
     * @return the access permissions for the current user on the document.
     */
    public AccessPermission getCurrentAccessPermission() {
        if (accessPermission == null) {
            accessPermission = AccessPermission.getOwnerAccessPermission();
        }
        return accessPermission;
    }

    /**
     * Indicates if all security is removed or not when writing the pdf.
     * 
     * @return returns true if all security shall be removed otherwise false
     */
    public boolean isAllSecurityToBeRemoved() {
        return allSecurityToBeRemoved;
    }

    /**
     * Activates/Deactivates the removal of all security when writing the pdf.
     * 
     * @param removeAllSecurity remove all security if set to true
     */
    public void setAllSecurityToBeRemoved(boolean removeAllSecurity) {
        allSecurityToBeRemoved = removeAllSecurity;
    }

    /**
     * Provides the document ID.
     *
     * @return the dcoument ID
     */
    public Long getDocumentId() {
        return documentId;
    }

    /**
     * Sets the document ID to the given value.
     * 
     * @param docId the new document ID
     */
    public void setDocumentId(Long docId) {
        documentId = docId;
    }

    /**
     * Returns the PDF specification version this document conforms to.
     *
     * @return the PDF version (e.g. 1.4f)
     */
    public float getVersion() {
        float headerVersionFloat = getDocument().getVersion();
        // there may be a second version information in the document catalog starting with 1.4
        if (headerVersionFloat >= 1.4f) {
            String catalogVersion = getDocumentCatalog().getVersion();
            float catalogVersionFloat = -1;
            if (catalogVersion != null) {
                try {
                    catalogVersionFloat = Float.parseFloat(catalogVersion);
                } catch (NumberFormatException exception) {
                    LOG.error("Can't extract the version number of the document catalog.", exception);
                }
            }
            // the most recent version is the correct one
            return Math.max(catalogVersionFloat, headerVersionFloat);
        } else {
            return headerVersionFloat;
        }
    }

    /**
     * Sets the PDF specification version for this document.
     *
     * @param newVersion the new PDF version (e.g. 1.4f)
     * 
     */
    public void setVersion(float newVersion) {
        float currentVersion = getVersion();
        // nothing to do?
        if (newVersion == currentVersion) {
            return;
        }
        // the version can't be downgraded
        if (newVersion < currentVersion) {
            LOG.error("It's not allowed to downgrade the version of a pdf.");
            return;
        }
        // update the catalog version if the document version is >= 1.4
        if (getDocument().getVersion() >= 1.4f) {
            getDocumentCatalog().setVersion(Float.toString(newVersion));
        } else {
            // versions < 1.4f have a version header only
            getDocument().setVersion(newVersion);
        }
    }

    /**
     * Returns the resource cache associated with this document, or null if there is none.
     */
    public ResourceCache getResourceCache() {
        return resourceCache;
    }

    /**
     * Sets the resource cache associated with this document.
     * 
     * @param resourceCache A resource cache, or null.
     */
    public void setResourceCache(ResourceCache resourceCache) {
        this.resourceCache = resourceCache;
    }
}