org.apache.padaf.preflight.helpers.TrailerValidationHelper.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.padaf.preflight.helpers.TrailerValidationHelper.java

Source

/*****************************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 ****************************************************************************/

package org.apache.padaf.preflight.helpers;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.padaf.preflight.DocumentHandler;
import org.apache.padaf.preflight.ValidationConstants;
import org.apache.padaf.preflight.ValidationException;
import org.apache.padaf.preflight.ValidationResult;
import org.apache.padaf.preflight.ValidatorConfig;
import org.apache.padaf.preflight.ValidationResult.ValidationError;
import org.apache.padaf.preflight.utils.COSUtils;
import org.apache.padaf.preflight.utils.PdfElementParser;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.persistence.util.COSObjectKey;

/**
 * @author eric
 * 
 */
public class TrailerValidationHelper extends AbstractValidationHelper {

    public TrailerValidationHelper(ValidatorConfig cfg) throws ValidationException {
        super(cfg);
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * net.awl.edoc.pdfa.validation.helpers.AbstractValidationHelper#validate(
     * net.awl.edoc.pdfa.validation.DocumentHandler)
     */
    @Override
    public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException {

        List<ValidationError> result = new ArrayList<ValidationError>(0);
        PDDocument pdfDoc = handler.getDocument();

        COSDictionary linearizedDict = isLinearizedPdf(pdfDoc);
        if (linearizedDict != null) {
            // it is a linearized PDF, check the linearized dictionary
            checkLinearizedDictionnary(linearizedDict, result);

            // if the pdf is a linearized pdf. the first trailer must be checked
            // and it must have the same ID than the last trailer.
            // According to the PDF version, trailers are available by the trailer key word (pdf <= 1.4)
            // or in the dictionary of the XRef stream ( PDF >= 1.5)
            String pdfVersion = pdfDoc.getDocument().getHeaderString();
            if (pdfVersion != null && pdfVersion.matches("%PDF-1\\.[1-4]")) {
                checkTrailersForLinearizedPDF14(handler, result);
            } else {
                checkTrailersForLinearizedPDF15(handler, result);
            }

        } else {
            // If the PDF isn't a linearized one, only the last trailer must be checked
            checkMainTrailer(pdfDoc.getDocument(), pdfDoc.getDocument().getTrailer(), result);

        }

        return result;
    }

    /**
     * Extracts and compares first and last trailers for PDF version between 1.1 and 1.4
     * @param handler
     * @param result
     */
    protected void checkTrailersForLinearizedPDF14(DocumentHandler handler, List<ValidationError> result) {
        PDDocument pdfDoc = handler.getDocument();
        List<String> lTrailers = handler.getPdfExtractor().getAllTrailers();

        if (lTrailers.isEmpty()) {
            result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                    "There are no trailer in the PDF file"));
        } else {
            String firstTrailer = lTrailers.get(0);
            String lastTrailer = lTrailers.get(lTrailers.size() - 1);

            COSDictionary first = null;
            COSDictionary last = null;
            COSDocument cd = null;
            try {
                cd = new COSDocument();

                PdfElementParser parser1 = new PdfElementParser(cd, firstTrailer.getBytes());
                first = parser1.parseAsDictionary();

                PdfElementParser parser2 = new PdfElementParser(cd, lastTrailer.getBytes());
                last = parser2.parseAsDictionary();

                checkMainTrailer(pdfDoc.getDocument(), first, result);
                if (!compareIds(first, last, pdfDoc.getDocument())) {
                    result.add(new ValidationResult.ValidationError(
                            ValidationConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                            "ID is different in the first and the last trailer"));
                }

            } catch (IOException e) {
                result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                        "Unable to parse trailers of the linearized PDF"));
            } finally {
                COSUtils.closeDocumentQuietly(cd);
            }
        }
    }

    /**
     * Accesses and compares First and Last trailers for a PDF version higher than 1.4.
     * 
     * @param handler
     * @param result
     */
    protected void checkTrailersForLinearizedPDF15(DocumentHandler handler, List<ValidationError> result) {
        PDDocument pdfDoc = handler.getDocument();
        try {
            COSDocument cosDocument = pdfDoc.getDocument();
            List<COSObject> xrefs = cosDocument.getObjectsByType(COSName.XREF);

            if (xrefs.isEmpty()) {
                // no XRef CosObject, may by this pdf file used the PDF 1.4 syntaxe
                checkTrailersForLinearizedPDF14(handler, result);

            } else {

                long min = Long.MAX_VALUE;
                long max = Long.MIN_VALUE;
                COSDictionary firstTrailer = null;
                COSDictionary lastTrailer = null;

                // Search First and Last trailers according to offset position.
                for (COSObject co : xrefs) {
                    long offset = cosDocument.getXrefTable().get(new COSObjectKey(co));
                    if (offset < min) {
                        min = offset;
                        firstTrailer = (COSDictionary) co.getObject();
                    }

                    if (offset > max) {
                        max = offset;
                        lastTrailer = (COSDictionary) co.getObject();
                    }

                }

                checkMainTrailer(pdfDoc.getDocument(), firstTrailer, result);
                if (!compareIds(firstTrailer, lastTrailer, pdfDoc.getDocument())) {
                    result.add(new ValidationResult.ValidationError(
                            ValidationConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                            "ID is different in the first and the last trailer"));
                }
            }
        } catch (IOException e) {
            result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                    "Unable to check PDF Trailers due to : " + e.getMessage()));
        }
    }

    /**
     * Return true if the ID of the first dictionary is the same as the id of the
     * last dictionary Return false otherwise.
     * 
     * @param first
     * @param last
     * @return
     */
    protected boolean compareIds(COSDictionary first, COSDictionary last, COSDocument doc) {
        COSBase idFirst = first.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));
        COSBase idLast = last.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));

        if (idFirst == null || idLast == null) {
            return false;
        }

        // ---- cast two COSBase to COSArray.
        COSArray af = COSUtils.getAsArray(idFirst, doc);
        COSArray al = COSUtils.getAsArray(idLast, doc);

        // ---- if one COSArray is null, the PDF/A isn't valid
        if ((af == null) || (al == null)) {
            return false;
        }

        // ---- compare both arrays
        boolean isEqual = true;
        for (Object of : af.toList()) {
            boolean oneIsEquals = false;
            for (Object ol : al.toList()) {
                // ---- according to PDF Reference 1-4, ID is an array containing two
                // strings
                if (!oneIsEquals)
                    oneIsEquals = ((COSString) ol).getString().equals(((COSString) of).getString());
            }
            isEqual = isEqual && oneIsEquals;
        }
        return isEqual;
    }

    /**
     * check if all keys are authorized in a trailer dictionary and if the type is
     * valid.
     * 
     * @param trailer
     * @param lErrors
     */
    protected void checkMainTrailer(COSDocument doc, COSDictionary trailer, List<ValidationError> lErrors) {
        boolean id = false;
        boolean root = false;
        boolean size = false;
        boolean prev = false;
        boolean info = false;
        boolean encrypt = false;

        for (Object key : trailer.keySet()) {
            if (!(key instanceof COSName)) {
                lErrors.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_DICTIONARY_KEY_INVALID,
                        "Invalid key in The trailer dictionary"));
                return;
            }

            String cosName = ((COSName) key).getName();
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ENCRYPT)) {
                encrypt = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_SIZE)) {
                size = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_PREV)) {
                prev = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ROOT)) {
                root = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_INFO)) {
                info = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ID)) {
                id = true;
            }
        }

        // ---- PDF/A Trailer dictionary must contain the ID key
        if (!id) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_MISSING_ID,
                    "The trailer dictionary doesn't contain ID"));
        } else {
            COSBase trailerId = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));
            if (!COSUtils.isArray(trailerId, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains an id but it isn't an array"));
            }
        }
        // ---- PDF/A Trailer dictionary mustn't contain the Encrypt key
        if (encrypt) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_ENCRYPT,
                    "The trailer dictionary contains Encrypt"));
        }
        // ---- PDF Trailer dictionary must contain the Size key
        if (!size) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_MISSING_SIZE,
                    "The trailer dictionary doesn't contain Size"));
        } else {
            COSBase trailerSize = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_SIZE));
            if (!COSUtils.isInteger(trailerSize, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains a size but it isn't an integer"));
            }
        }

        // ---- PDF Trailer dictionary must contain the Root key
        if (!root) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_MISSING_ROOT,
                    "The trailer dictionary doesn't contain Root"));
        } else {
            COSBase trailerRoot = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ROOT));
            if (!COSUtils.isDictionary(trailerRoot, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains a root but it isn't a dictionary"));
            }
        }
        // ---- PDF Trailer dictionary may contain the Prev key
        if (prev) {
            COSBase trailerPrev = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_PREV));
            if (!COSUtils.isInteger(trailerPrev, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains a prev but it isn't an integer"));
            }
        }
        // ---- PDF Trailer dictionary may contain the Info key
        if (info) {
            COSBase trailerInfo = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_INFO));
            if (!COSUtils.isDictionary(trailerInfo, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains an info but it isn't a dictionary"));
            }
        }
    }

    /**
     * According to the PDF Reference, A linearized PDF contain a dictionary as
     * first object (linearized dictionary) and only this one in the first
     * section.
     * 
     * @param document
     * @return
     */
    protected COSDictionary isLinearizedPdf(PDDocument document) {
        // ---- Get Ref to obj
        COSDocument cDoc = document.getDocument();
        List<?> lObj = cDoc.getObjects();
        for (Object object : lObj) {
            COSBase curObj = ((COSObject) object).getObject();
            if (curObj instanceof COSDictionary
                    && ((COSDictionary) curObj).keySet().contains(COSName.getPDFName(DICTIONARY_KEY_LINEARIZED))) {
                return (COSDictionary) curObj;
            }
        }
        return null;
    }

    /**
     * Check if mandatory keys of linearized dictionary are present.
     * 
     * @param lErrors
     */
    protected void checkLinearizedDictionnary(COSDictionary linearizedDict, List<ValidationError> lErrors) {
        // ---- check if all keys are authorized in a linearized dictionary
        // ---- Linearized dictionary must contain the lhoent keys
        boolean l = false;
        boolean h = false;
        boolean o = false;
        boolean e = false;
        boolean n = false;
        boolean t = false;

        for (Object key : linearizedDict.keySet()) {
            if (!(key instanceof COSName)) {
                lErrors.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_DICTIONARY_KEY_INVALID,
                        "Invalid key in The Linearized dictionary"));
                return;
            }

            String cosName = ((COSName) key).getName();
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_L)) {
                l = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_H)) {
                h = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_O)) {
                o = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_E)) {
                e = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_N)) {
                n = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_T)) {
                t = true;
            }
        }

        if (!(l && h && o && e && t && n)) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_DICT_INVALID,
                    "Invalid key in The Linearized dictionary"));
        }

        return;
    }
}