org.apache.pdfbox.preflight.process.StreamValidationProcess.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pdfbox.preflight.process.StreamValidationProcess.java

Source

/*****************************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 ****************************************************************************/

package org.apache.pdfbox.preflight.process;

import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DAMAGED;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_FX_KEYS;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_MISSING;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.preflight.PreflightContext;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.utils.COSUtils;
import org.apache.pdfbox.preflight.utils.FilterHelper;
import org.apache.pdfbox.util.Charsets;

public class StreamValidationProcess extends AbstractProcess {

    @Override
    public void validate(PreflightContext ctx) throws ValidationException {
        PDDocument pdfDoc = ctx.getDocument();
        COSDocument cDoc = pdfDoc.getDocument();

        List<?> lCOSObj = cDoc.getObjects();
        for (Object o : lCOSObj) {
            COSObject cObj = (COSObject) o;

            // If this object represents a Stream, the Dictionary must contain the Length key
            COSBase cBase = cObj.getObject();
            if (cBase instanceof COSStream) {
                validateStreamObject(ctx, cObj);
            }
        }
    }

    public void validateStreamObject(PreflightContext context, COSObject cObj) throws ValidationException {
        COSStream streamObj = (COSStream) cObj.getObject();

        // ---- Check dictionary entries
        // ---- Only the Length entry is mandatory
        // ---- In a PDF/A file, F, FFilter and FDecodeParms are forbidden
        checkDictionaryEntries(context, streamObj);
        // ---- check stream length
        checkStreamLength(context, cObj);
        // ---- Check the Filter value(s)
        checkFilters(streamObj, context);
    }

    /**
     * This method checks if one of declared Filter is LZWdecode. If LZW is found, the result list is updated with an
     * error code.
     * 
     * @param stream the stream to check.
     * @param context the preflight context.
     */
    protected void checkFilters(COSStream stream, PreflightContext context) {
        COSBase bFilter = stream.getDictionaryObject(COSName.FILTER);
        if (bFilter != null) {
            COSDocument cosDocument = context.getDocument().getDocument();
            if (COSUtils.isArray(bFilter, cosDocument)) {
                COSArray afName = (COSArray) bFilter;
                for (int i = 0; i < afName.size(); ++i) {
                    FilterHelper.isAuthorizedFilter(context, afName.getString(i));
                }
            } else if (bFilter instanceof COSName) {
                String fName = ((COSName) bFilter).getName();
                FilterHelper.isAuthorizedFilter(context, fName);
            } else {
                // ---- The filter type is invalid
                addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_INVALID_FILTER,
                        "Filter should be a Name or an Array"));
            }
        }
        // else Filter entry is optional
    }

    private boolean readUntilStream(InputStream ra) throws IOException {
        boolean search = true;
        boolean maybe = false;
        int lastChar = -1;
        do {
            int c = ra.read();
            switch (c) {
            case 's':
                maybe = true;
                lastChar = c;
                break;
            case 't':
                if (maybe && lastChar == 's') {
                    lastChar = c;
                } else {
                    maybe = false;
                    lastChar = -1;
                }
                break;
            case 'r':
                if (maybe && lastChar == 't') {
                    lastChar = c;
                } else {
                    maybe = false;
                    lastChar = -1;
                }
                break;
            case 'e':
                if (maybe && lastChar == 'r') {
                    lastChar = c;
                } else {
                    maybe = false;
                }
                break;
            case 'a':
                if (maybe && lastChar == 'e') {
                    lastChar = c;
                } else {
                    maybe = false;
                }
                break;
            case 'm':
                if (maybe && lastChar == 'a') {
                    return true;
                } else {
                    maybe = false;
                }
                break;
            case -1:
                search = false;
                break;
            default:
                maybe = false;
                break;
            }
        } while (search);
        return false;
    }

    protected void checkStreamLength(PreflightContext context, COSObject cObj) throws ValidationException {
        COSStream streamObj = (COSStream) cObj.getObject();
        int length = streamObj.getInt(COSName.LENGTH);
        InputStream ra = null;
        try {
            ra = context.getSource().getInputStream();
            Long offset = context.getDocument().getDocument().getXrefTable().get(new COSObjectKey(cObj));

            // ---- go to the beginning of the object
            long skipped = 0;
            if (offset != null) {
                while (skipped != offset) {
                    long curSkip = ra.skip(offset - skipped);
                    if (curSkip < 0) {
                        addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_DAMAGED,
                                "Unable to skip bytes in the PDFFile to check stream length"));
                        return;
                    }
                    skipped += curSkip;
                }

                // ---- go to the stream key word
                if (readUntilStream(ra)) {
                    int c = ra.read();
                    if (c == '\r') {
                        ra.read();
                    }
                    // else c is '\n' no more character to read

                    // ---- Here is the true beginning of the Stream Content.
                    // ---- Read the given length of bytes and check the 10 next bytes
                    // ---- to see if there are endstream.
                    byte[] buffer = new byte[1024];
                    int nbBytesToRead = length;

                    do {
                        int cr;
                        if (nbBytesToRead > 1024) {
                            cr = ra.read(buffer, 0, 1024);
                        } else {
                            cr = ra.read(buffer, 0, nbBytesToRead);
                        }
                        if (cr == -1) {
                            addStreamLengthValidationError(context, cObj, length, "");
                            return;
                        } else {
                            nbBytesToRead -= cr;
                        }
                    } while (nbBytesToRead > 0);

                    int len = "endstream".length() + 2;
                    byte[] buffer2 = new byte[len];
                    for (int i = 0; i < len; ++i) {
                        buffer2[i] = (byte) ra.read();
                    }

                    // ---- check the content of 10 last characters
                    String endStream = new String(buffer2, Charsets.ISO_8859_1);
                    if (buffer2[0] == '\r' && buffer2[1] == '\n') {
                        if (!endStream.contains("endstream")) {
                            addStreamLengthValidationError(context, cObj, length, endStream);
                        }
                    } else if (buffer2[0] == '\r' && buffer2[1] == 'e') {
                        if (!endStream.contains("endstream")) {
                            addStreamLengthValidationError(context, cObj, length, endStream);
                        }
                    } else if (buffer2[0] == '\n' && buffer2[1] == 'e') {
                        if (!endStream.contains("endstream")) {
                            addStreamLengthValidationError(context, cObj, length, endStream);
                        }
                    } else {
                        if (!endStream.startsWith("endStream")) {
                            addStreamLengthValidationError(context, cObj, length, endStream);
                        }
                    }
                } else {
                    addStreamLengthValidationError(context, cObj, length, "");
                }
            }
        } catch (IOException e) {
            throw new ValidationException("Unable to read a stream to validate: " + e.getMessage(), e);
        } finally {
            IOUtils.closeQuietly(ra);
        }
    }

    /**
     * Check dictionary entries. Only the Length entry is mandatory. In a PDF/A file, F, FFilter and FDecodeParms are
     * forbidden
     * 
     * @param context the preflight context.
     * @param streamObj the stream to check.
     */
    protected void checkDictionaryEntries(PreflightContext context, COSStream streamObj) {
        boolean len = streamObj.containsKey(COSName.LENGTH);
        boolean f = streamObj.containsKey(COSName.F);
        boolean ffilter = streamObj.containsKey(COSName.F_FILTER);
        boolean fdecParams = streamObj.containsKey(COSName.F_DECODE_PARMS);

        if (!len) {
            addValidationError(context,
                    new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_MISSING, "Stream length is missing"));
        }

        if (f || ffilter || fdecParams) {
            addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_FX_KEYS,
                    "F, FFilter or FDecodeParms keys are present in the stream dictionary"));
        }
    }

    private void addStreamLengthValidationError(PreflightContext context, COSObject cObj, int length,
            String endStream) {
        addValidationError(context,
                new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID, "Stream length is invalid [cObj=" + cObj
                        + "; defined length=" + length + "; buffer2=" + endStream + "]"));
    }

}