Java tutorial
/***************************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ****************************************************************************/ package org.apache.pdfbox.preflight.process; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DAMAGED; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_FX_KEYS; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_MISSING; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; import org.apache.pdfbox.preflight.exception.ValidationException; import org.apache.pdfbox.preflight.utils.COSUtils; import org.apache.pdfbox.preflight.utils.FilterHelper; import org.apache.pdfbox.util.Charsets; public class StreamValidationProcess extends AbstractProcess { @Override public void validate(PreflightContext ctx) throws ValidationException { PDDocument pdfDoc = ctx.getDocument(); COSDocument cDoc = pdfDoc.getDocument(); List<?> lCOSObj = cDoc.getObjects(); for (Object o : lCOSObj) { COSObject cObj = (COSObject) o; // If this object represents a Stream, the Dictionary must contain the Length key COSBase cBase = cObj.getObject(); if (cBase instanceof COSStream) { validateStreamObject(ctx, cObj); } } } public void validateStreamObject(PreflightContext context, COSObject cObj) throws ValidationException { COSStream streamObj = (COSStream) cObj.getObject(); // ---- Check dictionary entries // ---- Only the Length entry is mandatory // ---- In a PDF/A file, F, FFilter and FDecodeParms are forbidden checkDictionaryEntries(context, streamObj); // ---- check stream length checkStreamLength(context, cObj); // ---- Check the Filter value(s) checkFilters(streamObj, context); } /** * This method checks if one of declared Filter is LZWdecode. If LZW is found, the result list is updated with an * error code. * * @param stream the stream to check. * @param context the preflight context. */ protected void checkFilters(COSStream stream, PreflightContext context) { COSBase bFilter = stream.getDictionaryObject(COSName.FILTER); if (bFilter != null) { COSDocument cosDocument = context.getDocument().getDocument(); if (COSUtils.isArray(bFilter, cosDocument)) { COSArray afName = (COSArray) bFilter; for (int i = 0; i < afName.size(); ++i) { FilterHelper.isAuthorizedFilter(context, afName.getString(i)); } } else if (bFilter instanceof COSName) { String fName = ((COSName) bFilter).getName(); FilterHelper.isAuthorizedFilter(context, fName); } else { // ---- The filter type is invalid addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_INVALID_FILTER, "Filter should be a Name or an Array")); } } // else Filter entry is optional } private boolean readUntilStream(InputStream ra) throws IOException { boolean search = true; boolean maybe = false; int lastChar = -1; do { int c = ra.read(); switch (c) { case 's': maybe = true; lastChar = c; break; case 't': if (maybe && lastChar == 's') { lastChar = c; } else { maybe = false; lastChar = -1; } break; case 'r': if (maybe && lastChar == 't') { lastChar = c; } else { maybe = false; lastChar = -1; } break; case 'e': if (maybe && lastChar == 'r') { lastChar = c; } else { maybe = false; } break; case 'a': if (maybe && lastChar == 'e') { lastChar = c; } else { maybe = false; } break; case 'm': if (maybe && lastChar == 'a') { return true; } else { maybe = false; } break; case -1: search = false; break; default: maybe = false; break; } } while (search); return false; } protected void checkStreamLength(PreflightContext context, COSObject cObj) throws ValidationException { COSStream streamObj = (COSStream) cObj.getObject(); int length = streamObj.getInt(COSName.LENGTH); InputStream ra = null; try { ra = context.getSource().getInputStream(); Long offset = context.getDocument().getDocument().getXrefTable().get(new COSObjectKey(cObj)); // ---- go to the beginning of the object long skipped = 0; if (offset != null) { while (skipped != offset) { long curSkip = ra.skip(offset - skipped); if (curSkip < 0) { addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_DAMAGED, "Unable to skip bytes in the PDFFile to check stream length")); return; } skipped += curSkip; } // ---- go to the stream key word if (readUntilStream(ra)) { int c = ra.read(); if (c == '\r') { ra.read(); } // else c is '\n' no more character to read // ---- Here is the true beginning of the Stream Content. // ---- Read the given length of bytes and check the 10 next bytes // ---- to see if there are endstream. byte[] buffer = new byte[1024]; int nbBytesToRead = length; do { int cr; if (nbBytesToRead > 1024) { cr = ra.read(buffer, 0, 1024); } else { cr = ra.read(buffer, 0, nbBytesToRead); } if (cr == -1) { addStreamLengthValidationError(context, cObj, length, ""); return; } else { nbBytesToRead -= cr; } } while (nbBytesToRead > 0); int len = "endstream".length() + 2; byte[] buffer2 = new byte[len]; for (int i = 0; i < len; ++i) { buffer2[i] = (byte) ra.read(); } // ---- check the content of 10 last characters String endStream = new String(buffer2, Charsets.ISO_8859_1); if (buffer2[0] == '\r' && buffer2[1] == '\n') { if (!endStream.contains("endstream")) { addStreamLengthValidationError(context, cObj, length, endStream); } } else if (buffer2[0] == '\r' && buffer2[1] == 'e') { if (!endStream.contains("endstream")) { addStreamLengthValidationError(context, cObj, length, endStream); } } else if (buffer2[0] == '\n' && buffer2[1] == 'e') { if (!endStream.contains("endstream")) { addStreamLengthValidationError(context, cObj, length, endStream); } } else { if (!endStream.startsWith("endStream")) { addStreamLengthValidationError(context, cObj, length, endStream); } } } else { addStreamLengthValidationError(context, cObj, length, ""); } } } catch (IOException e) { throw new ValidationException("Unable to read a stream to validate: " + e.getMessage(), e); } finally { IOUtils.closeQuietly(ra); } } /** * Check dictionary entries. Only the Length entry is mandatory. In a PDF/A file, F, FFilter and FDecodeParms are * forbidden * * @param context the preflight context. * @param streamObj the stream to check. */ protected void checkDictionaryEntries(PreflightContext context, COSStream streamObj) { boolean len = streamObj.containsKey(COSName.LENGTH); boolean f = streamObj.containsKey(COSName.F); boolean ffilter = streamObj.containsKey(COSName.F_FILTER); boolean fdecParams = streamObj.containsKey(COSName.F_DECODE_PARMS); if (!len) { addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_MISSING, "Stream length is missing")); } if (f || ffilter || fdecParams) { addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_FX_KEYS, "F, FFilter or FDecodeParms keys are present in the stream dictionary")); } } private void addStreamLengthValidationError(PreflightContext context, COSObject cObj, int length, String endStream) { addValidationError(context, new ValidationError(ERROR_SYNTAX_STREAM_LENGTH_INVALID, "Stream length is invalid [cObj=" + cObj + "; defined length=" + length + "; buffer2=" + endStream + "]")); } }