Java tutorial
/* * $Id: PdfReader.java 3948 2009-06-03 15:17:22Z blowagie $ * * Copyright 2001, 2002 Paulo Soares * * The contents of this file are subject to the Mozilla Public License Version 1.1 * (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the License. * * The Original Code is 'iText, a free JAVA-PDF library'. * * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. * All Rights Reserved. * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. * * Contributor(s): all the names of the contributors are added in the source code * where applicable. * * Alternatively, the contents of this file may be used under the terms of the * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the * provisions of LGPL are applicable instead of those above. If you wish to * allow use of your version of this file only under the terms of the LGPL * License and not to allow others to use your version of this file under * the MPL, indicate your decision by deleting the provisions above and * replace them with the notice and other provisions required by the LGPL. * If you do not delete the provisions above, a recipient may use your version * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. * * This library is free software; you can redistribute it and/or modify it * under the terms of the MPL as stated above or under the terms of the GNU * Library General Public License as published by the Free Software Foundation; * either version 2 of the License, or any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more * details. * * If you didn't download this code from the following link, you should check if * you aren't using an obsolete version: * http://www.lowagie.com/iText/ */ package com.lowagie.text.pdf; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.zip.InflaterInputStream; import java.util.Stack; import java.security.Key; import java.security.MessageDigest; import java.security.cert.Certificate; import com.lowagie.text.ExceptionConverter; import com.lowagie.text.PageSize; import com.lowagie.text.Rectangle; import com.lowagie.text.exceptions.BadPasswordException; import com.lowagie.text.exceptions.InvalidPdfException; import com.lowagie.text.exceptions.UnsupportedPdfException; import com.lowagie.text.pdf.interfaces.PdfViewerPreferences; import com.lowagie.text.pdf.internal.PdfViewerPreferencesImp; import org.bouncycastle.cms.CMSEnvelopedData; import org.bouncycastle.cms.RecipientInformation; /** Reads a PDF document. * @author Paulo Soares (psoares@consiste.pt) * @author Kazuya Ujihara */ public class PdfReader implements PdfViewerPreferences { static final PdfName pageInhCandidates[] = { PdfName.MEDIABOX, PdfName.ROTATE, PdfName.RESOURCES, PdfName.CROPBOX }; static final byte endstream[] = PdfEncodings.convertToBytes("endstream", null); static final byte endobj[] = PdfEncodings.convertToBytes("endobj", null); protected PRTokeniser tokens; // Each xref pair is a position // type 0 -> -1, 0 // type 1 -> offset, 0 // type 2 -> index, obj num protected int xref[]; protected HashMap objStmMark; protected IntHashtable objStmToOffset; protected boolean newXrefType; private ArrayList xrefObj; PdfDictionary rootPages; protected PdfDictionary trailer; protected PdfDictionary catalog; protected PageRefs pageRefs; protected PRAcroForm acroForm = null; protected boolean acroFormParsed = false; protected boolean encrypted = false; protected boolean rebuilt = false; protected int freeXref; protected boolean tampered = false; protected int lastXref; protected int eofPos; protected char pdfVersion; protected PdfEncryption decrypt; protected byte password[] = null; //added by ujihara for decryption protected Key certificateKey = null; //added by Aiken Sam for certificate decryption protected Certificate certificate = null; //added by Aiken Sam for certificate decryption protected String certificateKeyProvider = null; //added by Aiken Sam for certificate decryption private boolean ownerPasswordUsed; protected ArrayList strings = new ArrayList(); protected boolean sharedStreams = true; protected boolean consolidateNamedDestinations = false; protected int rValue; protected int pValue; private int objNum; private int objGen; private int fileLength; private boolean hybridXref; private int lastXrefPartial = -1; private boolean partial; private PRIndirectReference cryptoRef; private PdfViewerPreferencesImp viewerPreferences = new PdfViewerPreferencesImp(); private boolean encryptionError; /** * Holds value of property appendable. */ private boolean appendable; protected PdfReader() { } /** Reads and parses a PDF document. * @param filename the file name of the document * @throws IOException on error */ public PdfReader(String filename) throws IOException { this(filename, null); } /** Reads and parses a PDF document. * @param filename the file name of the document * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(String filename, byte ownerPassword[]) throws IOException { password = ownerPassword; tokens = new PRTokeniser(filename); readPdf(); } /** Reads and parses a PDF document. * @param pdfIn the byte array with the document * @throws IOException on error */ public PdfReader(byte pdfIn[]) throws IOException { this(pdfIn, null); } /** Reads and parses a PDF document. * @param pdfIn the byte array with the document * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(byte pdfIn[], byte ownerPassword[]) throws IOException { password = ownerPassword; tokens = new PRTokeniser(pdfIn); readPdf(); } /** Reads and parses a PDF document. * @param filename the file name of the document * @param certificate the certificate to read the document * @param certificateKey the private key of the certificate * @param certificateKeyProvider the security provider for certificateKey * @throws IOException on error */ public PdfReader(String filename, Certificate certificate, Key certificateKey, String certificateKeyProvider) throws IOException { this.certificate = certificate; this.certificateKey = certificateKey; this.certificateKeyProvider = certificateKeyProvider; tokens = new PRTokeniser(filename); readPdf(); } /** Reads and parses a PDF document. * @param url the URL of the document * @throws IOException on error */ public PdfReader(URL url) throws IOException { this(url, null); } /** Reads and parses a PDF document. * @param url the URL of the document * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(URL url, byte ownerPassword[]) throws IOException { password = ownerPassword; tokens = new PRTokeniser(new RandomAccessFileOrArray(url)); readPdf(); } /** * Reads and parses a PDF document. * @param is the <CODE>InputStream</CODE> containing the document. The stream is read to the * end but is not closed * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(InputStream is, byte ownerPassword[]) throws IOException { password = ownerPassword; tokens = new PRTokeniser(new RandomAccessFileOrArray(is)); readPdf(); } /** * Reads and parses a PDF document. * @param is the <CODE>InputStream</CODE> containing the document. The stream is read to the * end but is not closed * @throws IOException on error */ public PdfReader(InputStream is) throws IOException { this(is, null); } /** * Reads and parses a pdf document. Contrary to the other constructors only the xref is read * into memory. The reader is said to be working in "partial" mode as only parts of the pdf * are read as needed. The pdf is left open but may be closed at any time with * <CODE>PdfReader.close()</CODE>, reopen is automatic. * @param raf the document location * @param ownerPassword the password or <CODE>null</CODE> for no password * @throws IOException on error */ public PdfReader(RandomAccessFileOrArray raf, byte ownerPassword[]) throws IOException { password = ownerPassword; partial = true; tokens = new PRTokeniser(raf); readPdfPartial(); } /** Creates an independent duplicate. * @param reader the <CODE>PdfReader</CODE> to duplicate */ public PdfReader(PdfReader reader) { this.appendable = reader.appendable; this.consolidateNamedDestinations = reader.consolidateNamedDestinations; this.encrypted = reader.encrypted; this.rebuilt = reader.rebuilt; this.sharedStreams = reader.sharedStreams; this.tampered = reader.tampered; this.password = reader.password; this.pdfVersion = reader.pdfVersion; this.eofPos = reader.eofPos; this.freeXref = reader.freeXref; this.lastXref = reader.lastXref; this.tokens = new PRTokeniser(reader.tokens.getSafeFile()); if (reader.decrypt != null) this.decrypt = new PdfEncryption(reader.decrypt); this.pValue = reader.pValue; this.rValue = reader.rValue; this.xrefObj = new ArrayList(reader.xrefObj); for (int k = 0; k < reader.xrefObj.size(); ++k) { this.xrefObj.set(k, duplicatePdfObject((PdfObject) reader.xrefObj.get(k), this)); } this.pageRefs = new PageRefs(reader.pageRefs, this); this.trailer = (PdfDictionary) duplicatePdfObject(reader.trailer, this); this.catalog = trailer.getAsDict(PdfName.ROOT); this.rootPages = catalog.getAsDict(PdfName.PAGES); this.fileLength = reader.fileLength; this.partial = reader.partial; this.hybridXref = reader.hybridXref; this.objStmToOffset = reader.objStmToOffset; this.xref = reader.xref; this.cryptoRef = (PRIndirectReference) duplicatePdfObject(reader.cryptoRef, this); this.ownerPasswordUsed = reader.ownerPasswordUsed; } /** Gets a new file instance of the original PDF * document. * @return a new file instance of the original PDF document */ public RandomAccessFileOrArray getSafeFile() { return tokens.getSafeFile(); } protected PdfReaderInstance getPdfReaderInstance(PdfWriter writer) { return new PdfReaderInstance(this, writer); } /** Gets the number of pages in the document. * @return the number of pages in the document */ public int getNumberOfPages() { return pageRefs.size(); } /** Returns the document's catalog. This dictionary is not a copy, * any changes will be reflected in the catalog. * @return the document's catalog */ public PdfDictionary getCatalog() { return catalog; } /** Returns the document's acroform, if it has one. * @return the document's acroform */ public PRAcroForm getAcroForm() { if (!acroFormParsed) { acroFormParsed = true; PdfObject form = catalog.get(PdfName.ACROFORM); if (form != null) { try { acroForm = new PRAcroForm(this); acroForm.readAcroForm((PdfDictionary) getPdfObject(form)); } catch (Exception e) { acroForm = null; } } } return acroForm; } /** * Gets the page rotation. This value can be 0, 90, 180 or 270. * @param index the page number. The first page is 1 * @return the page rotation */ public int getPageRotation(int index) { return getPageRotation(pageRefs.getPageNRelease(index)); } int getPageRotation(PdfDictionary page) { PdfNumber rotate = page.getAsNumber(PdfName.ROTATE); if (rotate == null) return 0; else { int n = rotate.intValue(); n %= 360; return n < 0 ? n + 360 : n; } } /** Gets the page size, taking rotation into account. This * is a <CODE>Rectangle</CODE> with the value of the /MediaBox and the /Rotate key. * @param index the page number. The first page is 1 * @return a <CODE>Rectangle</CODE> */ public Rectangle getPageSizeWithRotation(int index) { return getPageSizeWithRotation(pageRefs.getPageNRelease(index)); } /** * Gets the rotated page from a page dictionary. * @param page the page dictionary * @return the rotated page */ public Rectangle getPageSizeWithRotation(PdfDictionary page) { Rectangle rect = getPageSize(page); int rotation = getPageRotation(page); while (rotation > 0) { rect = rect.rotate(); rotation -= 90; } return rect; } /** Gets the page size without taking rotation into account. This * is the value of the /MediaBox key. * @param index the page number. The first page is 1 * @return the page size */ public Rectangle getPageSize(int index) { return getPageSize(pageRefs.getPageNRelease(index)); } /** * Gets the page from a page dictionary * @param page the page dictionary * @return the page */ public Rectangle getPageSize(PdfDictionary page) { PdfArray mediaBox = page.getAsArray(PdfName.MEDIABOX); return getNormalizedRectangle(mediaBox); } /** Gets the crop box without taking rotation into account. This * is the value of the /CropBox key. The crop box is the part * of the document to be displayed or printed. It usually is the same * as the media box but may be smaller. If the page doesn't have a crop * box the page size will be returned. * @param index the page number. The first page is 1 * @return the crop box */ public Rectangle getCropBox(int index) { PdfDictionary page = pageRefs.getPageNRelease(index); PdfArray cropBox = (PdfArray) getPdfObjectRelease(page.get(PdfName.CROPBOX)); if (cropBox == null) return getPageSize(page); return getNormalizedRectangle(cropBox); } /** Gets the box size. Allowed names are: "crop", "trim", "art", "bleed" and "media". * @param index the page number. The first page is 1 * @param boxName the box name * @return the box rectangle or null */ public Rectangle getBoxSize(int index, String boxName) { PdfDictionary page = pageRefs.getPageNRelease(index); PdfArray box = null; if (boxName.equals("trim")) box = (PdfArray) getPdfObjectRelease(page.get(PdfName.TRIMBOX)); else if (boxName.equals("art")) box = (PdfArray) getPdfObjectRelease(page.get(PdfName.ARTBOX)); else if (boxName.equals("bleed")) box = (PdfArray) getPdfObjectRelease(page.get(PdfName.BLEEDBOX)); else if (boxName.equals("crop")) box = (PdfArray) getPdfObjectRelease(page.get(PdfName.CROPBOX)); else if (boxName.equals("media")) box = (PdfArray) getPdfObjectRelease(page.get(PdfName.MEDIABOX)); if (box == null) return null; return getNormalizedRectangle(box); } /** Returns the content of the document information dictionary as a <CODE>HashMap</CODE> * of <CODE>String</CODE>. * @return content of the document information dictionary */ public HashMap getInfo() { HashMap map = new HashMap(); PdfDictionary info = trailer.getAsDict(PdfName.INFO); if (info == null) return map; for (Iterator it = info.getKeys().iterator(); it.hasNext();) { PdfName key = (PdfName) it.next(); PdfObject obj = getPdfObject(info.get(key)); if (obj == null) continue; String value = obj.toString(); switch (obj.type()) { case PdfObject.STRING: { value = ((PdfString) obj).toUnicodeString(); break; } case PdfObject.NAME: { value = PdfName.decodeName(value); break; } } map.put(PdfName.decodeName(key.toString()), value); } return map; } /** Normalizes a <CODE>Rectangle</CODE> so that llx and lly are smaller than urx and ury. * @param box the original rectangle * @return a normalized <CODE>Rectangle</CODE> */ public static Rectangle getNormalizedRectangle(PdfArray box) { float llx = ((PdfNumber) getPdfObjectRelease(box.getPdfObject(0))).floatValue(); float lly = ((PdfNumber) getPdfObjectRelease(box.getPdfObject(1))).floatValue(); float urx = ((PdfNumber) getPdfObjectRelease(box.getPdfObject(2))).floatValue(); float ury = ((PdfNumber) getPdfObjectRelease(box.getPdfObject(3))).floatValue(); return new Rectangle(Math.min(llx, urx), Math.min(lly, ury), Math.max(llx, urx), Math.max(lly, ury)); } protected void readPdf() throws IOException { try { fileLength = tokens.getFile().length(); pdfVersion = tokens.checkPdfHeader(); try { readXref(); } catch (Exception e) { try { rebuilt = true; rebuildXref(); lastXref = -1; } catch (Exception ne) { throw new InvalidPdfException( "Rebuild failed: " + ne.getMessage() + "; Original message: " + e.getMessage()); } } try { readDocObj(); } catch (Exception e) { if (e instanceof BadPasswordException) throw new BadPasswordException(e.getMessage()); if (rebuilt || encryptionError) throw new InvalidPdfException(e.getMessage()); rebuilt = true; encrypted = false; rebuildXref(); lastXref = -1; readDocObj(); } strings.clear(); readPages(); eliminateSharedStreams(); removeUnusedObjects(); } finally { try { tokens.close(); } catch (Exception e) { // empty on purpose } } } protected void readPdfPartial() throws IOException { try { fileLength = tokens.getFile().length(); pdfVersion = tokens.checkPdfHeader(); try { readXref(); } catch (Exception e) { try { rebuilt = true; rebuildXref(); lastXref = -1; } catch (Exception ne) { throw new InvalidPdfException( "Rebuild failed: " + ne.getMessage() + "; Original message: " + e.getMessage()); } } readDocObjPartial(); readPages(); } catch (IOException e) { try { tokens.close(); } catch (Exception ee) { } throw e; } } private boolean equalsArray(byte ar1[], byte ar2[], int size) { for (int k = 0; k < size; ++k) { if (ar1[k] != ar2[k]) return false; } return true; } /** * @throws IOException */ private void readDecryptedDocObj() throws IOException { if (encrypted) return; PdfObject encDic = trailer.get(PdfName.ENCRYPT); if (encDic == null || encDic.toString().equals("null")) return; encryptionError = true; byte[] encryptionKey = null; encrypted = true; PdfDictionary enc = (PdfDictionary) getPdfObject(encDic); String s; PdfObject o; PdfArray documentIDs = trailer.getAsArray(PdfName.ID); byte documentID[] = null; if (documentIDs != null) { o = documentIDs.getPdfObject(0); strings.remove(o); s = o.toString(); documentID = com.lowagie.text.DocWriter.getISOBytes(s); if (documentIDs.size() > 1) strings.remove(documentIDs.getPdfObject(1)); } // just in case we have a broken producer if (documentID == null) documentID = new byte[0]; byte uValue[] = null; byte oValue[] = null; int cryptoMode = PdfWriter.STANDARD_ENCRYPTION_40; int lengthValue = 0; PdfObject filter = getPdfObjectRelease(enc.get(PdfName.FILTER)); if (filter.equals(PdfName.STANDARD)) { s = enc.get(PdfName.U).toString(); strings.remove(enc.get(PdfName.U)); uValue = com.lowagie.text.DocWriter.getISOBytes(s); s = enc.get(PdfName.O).toString(); strings.remove(enc.get(PdfName.O)); oValue = com.lowagie.text.DocWriter.getISOBytes(s); o = enc.get(PdfName.P); if (!o.isNumber()) throw new InvalidPdfException("Illegal P value."); pValue = ((PdfNumber) o).intValue(); o = enc.get(PdfName.R); if (!o.isNumber()) throw new InvalidPdfException("Illegal R value."); rValue = ((PdfNumber) o).intValue(); switch (rValue) { case 2: cryptoMode = PdfWriter.STANDARD_ENCRYPTION_40; break; case 3: o = enc.get(PdfName.LENGTH); if (!o.isNumber()) throw new InvalidPdfException("Illegal Length value."); lengthValue = ((PdfNumber) o).intValue(); if (lengthValue > 128 || lengthValue < 40 || lengthValue % 8 != 0) throw new InvalidPdfException("Illegal Length value."); cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; break; case 4: PdfDictionary dic = (PdfDictionary) enc.get(PdfName.CF); if (dic == null) throw new InvalidPdfException("/CF not found (encryption)"); dic = (PdfDictionary) dic.get(PdfName.STDCF); if (dic == null) throw new InvalidPdfException("/StdCF not found (encryption)"); if (PdfName.V2.equals(dic.get(PdfName.CFM))) cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; else if (PdfName.AESV2.equals(dic.get(PdfName.CFM))) cryptoMode = PdfWriter.ENCRYPTION_AES_128; else throw new UnsupportedPdfException("No compatible encryption found"); PdfObject em = enc.get(PdfName.ENCRYPTMETADATA); if (em != null && em.toString().equals("false")) cryptoMode |= PdfWriter.DO_NOT_ENCRYPT_METADATA; break; default: throw new UnsupportedPdfException("Unknown encryption type R = " + rValue); } } else if (filter.equals(PdfName.PUBSEC)) { boolean foundRecipient = false; byte[] envelopedData = null; PdfArray recipients = null; o = enc.get(PdfName.V); if (!o.isNumber()) throw new InvalidPdfException("Illegal V value."); int vValue = ((PdfNumber) o).intValue(); switch (vValue) { case 1: cryptoMode = PdfWriter.STANDARD_ENCRYPTION_40; lengthValue = 40; recipients = (PdfArray) enc.get(PdfName.RECIPIENTS); break; case 2: o = enc.get(PdfName.LENGTH); if (!o.isNumber()) throw new InvalidPdfException("Illegal Length value."); lengthValue = ((PdfNumber) o).intValue(); if (lengthValue > 128 || lengthValue < 40 || lengthValue % 8 != 0) throw new InvalidPdfException("Illegal Length value."); cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; recipients = (PdfArray) enc.get(PdfName.RECIPIENTS); break; case 4: PdfDictionary dic = (PdfDictionary) enc.get(PdfName.CF); if (dic == null) throw new InvalidPdfException("/CF not found (encryption)"); dic = (PdfDictionary) dic.get(PdfName.DEFAULTCRYPTFILTER); if (dic == null) throw new InvalidPdfException("/DefaultCryptFilter not found (encryption)"); if (PdfName.V2.equals(dic.get(PdfName.CFM))) { cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; lengthValue = 128; } else if (PdfName.AESV2.equals(dic.get(PdfName.CFM))) { cryptoMode = PdfWriter.ENCRYPTION_AES_128; lengthValue = 128; } else throw new UnsupportedPdfException("No compatible encryption found"); PdfObject em = dic.get(PdfName.ENCRYPTMETADATA); if (em != null && em.toString().equals("false")) cryptoMode |= PdfWriter.DO_NOT_ENCRYPT_METADATA; recipients = (PdfArray) dic.get(PdfName.RECIPIENTS); break; default: throw new UnsupportedPdfException("Unknown encryption type V = " + rValue); } for (int i = 0; i < recipients.size(); i++) { PdfObject recipient = recipients.getPdfObject(i); strings.remove(recipient); CMSEnvelopedData data = null; try { data = new CMSEnvelopedData(recipient.getBytes()); Iterator recipientCertificatesIt = data.getRecipientInfos().getRecipients().iterator(); while (recipientCertificatesIt.hasNext()) { RecipientInformation recipientInfo = (RecipientInformation) recipientCertificatesIt.next(); if (recipientInfo.getRID().match(certificate) && !foundRecipient) { envelopedData = recipientInfo.getContent(certificateKey, certificateKeyProvider); foundRecipient = true; } } } catch (Exception f) { throw new ExceptionConverter(f); } } if (!foundRecipient || envelopedData == null) { throw new UnsupportedPdfException("Bad certificate and key."); } MessageDigest md = null; try { md = MessageDigest.getInstance("SHA-1"); md.update(envelopedData, 0, 20); for (int i = 0; i < recipients.size(); i++) { byte[] encodedRecipient = recipients.getPdfObject(i).getBytes(); md.update(encodedRecipient); } if ((cryptoMode & PdfWriter.DO_NOT_ENCRYPT_METADATA) != 0) md.update(new byte[] { (byte) 255, (byte) 255, (byte) 255, (byte) 255 }); encryptionKey = md.digest(); } catch (Exception f) { throw new ExceptionConverter(f); } } decrypt = new PdfEncryption(); decrypt.setCryptoMode(cryptoMode, lengthValue); if (filter.equals(PdfName.STANDARD)) { //check by owner password decrypt.setupByOwnerPassword(documentID, password, uValue, oValue, pValue); if (!equalsArray(uValue, decrypt.userKey, (rValue == 3 || rValue == 4) ? 16 : 32)) { //check by user password decrypt.setupByUserPassword(documentID, password, oValue, pValue); if (!equalsArray(uValue, decrypt.userKey, (rValue == 3 || rValue == 4) ? 16 : 32)) { throw new BadPasswordException("Bad user password"); } } else ownerPasswordUsed = true; } else if (filter.equals(PdfName.PUBSEC)) { decrypt.setupByEncryptionKey(encryptionKey, lengthValue); ownerPasswordUsed = true; } for (int k = 0; k < strings.size(); ++k) { PdfString str = (PdfString) strings.get(k); str.decrypt(this); } if (encDic.isIndirect()) { cryptoRef = (PRIndirectReference) encDic; xrefObj.set(cryptoRef.getNumber(), null); } encryptionError = false; } /** * @param obj * @return a PdfObject */ public static PdfObject getPdfObjectRelease(PdfObject obj) { PdfObject obj2 = getPdfObject(obj); releaseLastXrefPartial(obj); return obj2; } /** * Reads a <CODE>PdfObject</CODE> resolving an indirect reference * if needed. * @param obj the <CODE>PdfObject</CODE> to read * @return the resolved <CODE>PdfObject</CODE> */ public static PdfObject getPdfObject(PdfObject obj) { if (obj == null) return null; if (!obj.isIndirect()) return obj; try { PRIndirectReference ref = (PRIndirectReference) obj; int idx = ref.getNumber(); boolean appendable = ref.getReader().appendable; obj = ref.getReader().getPdfObject(idx); if (obj == null) { return null; } else { if (appendable) { switch (obj.type()) { case PdfObject.NULL: obj = new PdfNull(); break; case PdfObject.BOOLEAN: obj = new PdfBoolean(((PdfBoolean) obj).booleanValue()); break; case PdfObject.NAME: obj = new PdfName(obj.getBytes()); break; } obj.setIndRef(ref); } return obj; } } catch (Exception e) { throw new ExceptionConverter(e); } } /** * Reads a <CODE>PdfObject</CODE> resolving an indirect reference * if needed. If the reader was opened in partial mode the object will be released * to save memory. * @param obj the <CODE>PdfObject</CODE> to read * @param parent * @return a PdfObject */ public static PdfObject getPdfObjectRelease(PdfObject obj, PdfObject parent) { PdfObject obj2 = getPdfObject(obj, parent); releaseLastXrefPartial(obj); return obj2; } /** * @param obj * @param parent * @return a PdfObject */ public static PdfObject getPdfObject(PdfObject obj, PdfObject parent) { if (obj == null) return null; if (!obj.isIndirect()) { PRIndirectReference ref = null; if (parent != null && (ref = parent.getIndRef()) != null && ref.getReader().isAppendable()) { switch (obj.type()) { case PdfObject.NULL: obj = new PdfNull(); break; case PdfObject.BOOLEAN: obj = new PdfBoolean(((PdfBoolean) obj).booleanValue()); break; case PdfObject.NAME: obj = new PdfName(obj.getBytes()); break; } obj.setIndRef(ref); } return obj; } return getPdfObject(obj); } /** * @param idx * @return a PdfObject */ public PdfObject getPdfObjectRelease(int idx) { PdfObject obj = getPdfObject(idx); releaseLastXrefPartial(); return obj; } /** * @param idx * @return aPdfObject */ public PdfObject getPdfObject(int idx) { try { lastXrefPartial = -1; if (idx < 0 || idx >= xrefObj.size()) return null; PdfObject obj = (PdfObject) xrefObj.get(idx); if (!partial || obj != null) return obj; if (idx * 2 >= xref.length) return null; obj = readSingleObject(idx); lastXrefPartial = -1; if (obj != null) lastXrefPartial = idx; return obj; } catch (Exception e) { throw new ExceptionConverter(e); } } /** * */ public void resetLastXrefPartial() { lastXrefPartial = -1; } /** * */ public void releaseLastXrefPartial() { if (partial && lastXrefPartial != -1) { xrefObj.set(lastXrefPartial, null); lastXrefPartial = -1; } } /** * @param obj */ public static void releaseLastXrefPartial(PdfObject obj) { if (obj == null) return; if (!obj.isIndirect()) return; if (!(obj instanceof PRIndirectReference)) return; PRIndirectReference ref = (PRIndirectReference) obj; PdfReader reader = ref.getReader(); if (reader.partial && reader.lastXrefPartial != -1 && reader.lastXrefPartial == ref.getNumber()) { reader.xrefObj.set(reader.lastXrefPartial, null); } reader.lastXrefPartial = -1; } private void setXrefPartialObject(int idx, PdfObject obj) { if (!partial || idx < 0) return; xrefObj.set(idx, obj); } /** * @param obj * @return an indirect reference */ public PRIndirectReference addPdfObject(PdfObject obj) { xrefObj.add(obj); return new PRIndirectReference(this, xrefObj.size() - 1); } protected void readPages() throws IOException { catalog = trailer.getAsDict(PdfName.ROOT); rootPages = catalog.getAsDict(PdfName.PAGES); pageRefs = new PageRefs(this); } protected void readDocObjPartial() throws IOException { xrefObj = new ArrayList(xref.length / 2); xrefObj.addAll(Collections.nCopies(xref.length / 2, null)); readDecryptedDocObj(); if (objStmToOffset != null) { int keys[] = objStmToOffset.getKeys(); for (int k = 0; k < keys.length; ++k) { int n = keys[k]; objStmToOffset.put(n, xref[n * 2]); xref[n * 2] = -1; } } } protected PdfObject readSingleObject(int k) throws IOException { strings.clear(); int k2 = k * 2; int pos = xref[k2]; if (pos < 0) return null; if (xref[k2 + 1] > 0) pos = objStmToOffset.get(xref[k2 + 1]); if (pos == 0) return null; tokens.seek(pos); tokens.nextValidToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Invalid object number."); objNum = tokens.intValue(); tokens.nextValidToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Invalid generation number."); objGen = tokens.intValue(); tokens.nextValidToken(); if (!tokens.getStringValue().equals("obj")) tokens.throwError("Token 'obj' expected."); PdfObject obj; try { obj = readPRObject(); for (int j = 0; j < strings.size(); ++j) { PdfString str = (PdfString) strings.get(j); str.decrypt(this); } if (obj.isStream()) { checkPRStreamLength((PRStream) obj); } } catch (Exception e) { obj = null; } if (xref[k2 + 1] > 0) { obj = readOneObjStm((PRStream) obj, xref[k2]); } xrefObj.set(k, obj); return obj; } protected PdfObject readOneObjStm(PRStream stream, int idx) throws IOException { int first = stream.getAsNumber(PdfName.FIRST).intValue(); byte b[] = getStreamBytes(stream, tokens.getFile()); PRTokeniser saveTokens = tokens; tokens = new PRTokeniser(b); try { int address = 0; boolean ok = true; ++idx; for (int k = 0; k < idx; ++k) { ok = tokens.nextToken(); if (!ok) break; if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) { ok = false; break; } ok = tokens.nextToken(); if (!ok) break; if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) { ok = false; break; } address = tokens.intValue() + first; } if (!ok) throw new InvalidPdfException("Error reading ObjStm"); tokens.seek(address); return readPRObject(); } finally { tokens = saveTokens; } } /** * @return the percentage of the cross reference table that has been read */ public double dumpPerc() { int total = 0; for (int k = 0; k < xrefObj.size(); ++k) { if (xrefObj.get(k) != null) ++total; } return (total * 100.0 / xrefObj.size()); } protected void readDocObj() throws IOException { ArrayList streams = new ArrayList(); xrefObj = new ArrayList(xref.length / 2); xrefObj.addAll(Collections.nCopies(xref.length / 2, null)); for (int k = 2; k < xref.length; k += 2) { int pos = xref[k]; if (pos <= 0 || xref[k + 1] > 0) continue; tokens.seek(pos); tokens.nextValidToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Invalid object number."); objNum = tokens.intValue(); tokens.nextValidToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Invalid generation number."); objGen = tokens.intValue(); tokens.nextValidToken(); if (!tokens.getStringValue().equals("obj")) tokens.throwError("Token 'obj' expected."); PdfObject obj; try { obj = readPRObject(); if (obj.isStream()) { streams.add(obj); } } catch (Exception e) { obj = null; } xrefObj.set(k / 2, obj); } for (int k = 0; k < streams.size(); ++k) { checkPRStreamLength((PRStream) streams.get(k)); } readDecryptedDocObj(); if (objStmMark != null) { for (Iterator i = objStmMark.entrySet().iterator(); i.hasNext();) { Map.Entry entry = (Map.Entry) i.next(); int n = ((Integer) entry.getKey()).intValue(); IntHashtable h = (IntHashtable) entry.getValue(); readObjStm((PRStream) xrefObj.get(n), h); xrefObj.set(n, null); } objStmMark = null; } xref = null; } private void checkPRStreamLength(PRStream stream) throws IOException { int fileLength = tokens.length(); int start = stream.getOffset(); boolean calc = false; int streamLength = 0; PdfObject obj = getPdfObjectRelease(stream.get(PdfName.LENGTH)); if (obj != null && obj.type() == PdfObject.NUMBER) { streamLength = ((PdfNumber) obj).intValue(); if (streamLength + start > fileLength - 20) calc = true; else { tokens.seek(start + streamLength); String line = tokens.readString(20); if (!line.startsWith("\nendstream") && !line.startsWith("\r\nendstream") && !line.startsWith("\rendstream") && !line.startsWith("endstream")) calc = true; } } else calc = true; if (calc) { byte tline[] = new byte[16]; tokens.seek(start); while (true) { int pos = tokens.getFilePointer(); if (!tokens.readLineSegment(tline)) break; if (equalsn(tline, endstream)) { streamLength = pos - start; break; } if (equalsn(tline, endobj)) { tokens.seek(pos - 16); String s = tokens.readString(16); int index = s.indexOf("endstream"); if (index >= 0) pos = pos - 16 + index; streamLength = pos - start; break; } } } stream.setLength(streamLength); } protected void readObjStm(PRStream stream, IntHashtable map) throws IOException { int first = stream.getAsNumber(PdfName.FIRST).intValue(); int n = stream.getAsNumber(PdfName.N).intValue(); byte b[] = getStreamBytes(stream, tokens.getFile()); PRTokeniser saveTokens = tokens; tokens = new PRTokeniser(b); try { int address[] = new int[n]; int objNumber[] = new int[n]; boolean ok = true; for (int k = 0; k < n; ++k) { ok = tokens.nextToken(); if (!ok) break; if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) { ok = false; break; } objNumber[k] = tokens.intValue(); ok = tokens.nextToken(); if (!ok) break; if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) { ok = false; break; } address[k] = tokens.intValue() + first; } if (!ok) throw new InvalidPdfException("Error reading ObjStm"); for (int k = 0; k < n; ++k) { if (map.containsKey(k)) { tokens.seek(address[k]); PdfObject obj = readPRObject(); xrefObj.set(objNumber[k], obj); } } } finally { tokens = saveTokens; } } /** * Eliminates the reference to the object freeing the memory used by it and clearing * the xref entry. * @param obj the object. If it's an indirect reference it will be eliminated * @return the object or the already erased dereferenced object */ public static PdfObject killIndirect(PdfObject obj) { if (obj == null || obj.isNull()) return null; PdfObject ret = getPdfObjectRelease(obj); if (obj.isIndirect()) { PRIndirectReference ref = (PRIndirectReference) obj; PdfReader reader = ref.getReader(); int n = ref.getNumber(); reader.xrefObj.set(n, null); if (reader.partial) reader.xref[n * 2] = -1; } return ret; } private void ensureXrefSize(int size) { if (size == 0) return; if (xref == null) xref = new int[size]; else { if (xref.length < size) { int xref2[] = new int[size]; System.arraycopy(xref, 0, xref2, 0, xref.length); xref = xref2; } } } protected void readXref() throws IOException { hybridXref = false; newXrefType = false; tokens.seek(tokens.getStartxref()); tokens.nextToken(); if (!tokens.getStringValue().equals("startxref")) throw new InvalidPdfException("startxref not found."); tokens.nextToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) throw new InvalidPdfException("startxref is not followed by a number."); int startxref = tokens.intValue(); lastXref = startxref; eofPos = tokens.getFilePointer(); try { if (readXRefStream(startxref)) { newXrefType = true; return; } } catch (Exception e) { } xref = null; tokens.seek(startxref); trailer = readXrefSection(); PdfDictionary trailer2 = trailer; while (true) { PdfNumber prev = (PdfNumber) trailer2.get(PdfName.PREV); if (prev == null) break; tokens.seek(prev.intValue()); trailer2 = readXrefSection(); } } protected PdfDictionary readXrefSection() throws IOException { tokens.nextValidToken(); if (!tokens.getStringValue().equals("xref")) tokens.throwError("xref subsection not found"); int start = 0; int end = 0; int pos = 0; int gen = 0; while (true) { tokens.nextValidToken(); if (tokens.getStringValue().equals("trailer")) break; if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Object number of the first object in this xref subsection not found"); start = tokens.intValue(); tokens.nextValidToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Number of entries in this xref subsection not found"); end = tokens.intValue() + start; if (start == 1) { // fix incorrect start number int back = tokens.getFilePointer(); tokens.nextValidToken(); pos = tokens.intValue(); tokens.nextValidToken(); gen = tokens.intValue(); if (pos == 0 && gen == PdfWriter.GENERATION_MAX) { --start; --end; } tokens.seek(back); } ensureXrefSize(end * 2); for (int k = start; k < end; ++k) { tokens.nextValidToken(); pos = tokens.intValue(); tokens.nextValidToken(); gen = tokens.intValue(); tokens.nextValidToken(); int p = k * 2; if (tokens.getStringValue().equals("n")) { if (xref[p] == 0 && xref[p + 1] == 0) { // if (pos == 0) // tokens.throwError("File position 0 cross-reference entry in this xref subsection"); xref[p] = pos; } } else if (tokens.getStringValue().equals("f")) { if (xref[p] == 0 && xref[p + 1] == 0) xref[p] = -1; } else tokens.throwError("Invalid cross-reference entry in this xref subsection"); } } PdfDictionary trailer = (PdfDictionary) readPRObject(); PdfNumber xrefSize = (PdfNumber) trailer.get(PdfName.SIZE); ensureXrefSize(xrefSize.intValue() * 2); PdfObject xrs = trailer.get(PdfName.XREFSTM); if (xrs != null && xrs.isNumber()) { int loc = ((PdfNumber) xrs).intValue(); try { readXRefStream(loc); newXrefType = true; hybridXref = true; } catch (IOException e) { xref = null; throw e; } } return trailer; } protected boolean readXRefStream(int ptr) throws IOException { tokens.seek(ptr); int thisStream = 0; if (!tokens.nextToken()) return false; if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) return false; thisStream = tokens.intValue(); if (!tokens.nextToken() || tokens.getTokenType() != PRTokeniser.TK_NUMBER) return false; if (!tokens.nextToken() || !tokens.getStringValue().equals("obj")) return false; PdfObject object = readPRObject(); PRStream stm = null; if (object.isStream()) { stm = (PRStream) object; if (!PdfName.XREF.equals(stm.get(PdfName.TYPE))) return false; } else return false; if (trailer == null) { trailer = new PdfDictionary(); trailer.putAll(stm); } stm.setLength(((PdfNumber) stm.get(PdfName.LENGTH)).intValue()); int size = ((PdfNumber) stm.get(PdfName.SIZE)).intValue(); PdfArray index; PdfObject obj = stm.get(PdfName.INDEX); if (obj == null) { index = new PdfArray(); index.add(new int[] { 0, size }); } else index = (PdfArray) obj; PdfArray w = (PdfArray) stm.get(PdfName.W); int prev = -1; obj = stm.get(PdfName.PREV); if (obj != null) prev = ((PdfNumber) obj).intValue(); // Each xref pair is a position // type 0 -> -1, 0 // type 1 -> offset, 0 // type 2 -> index, obj num ensureXrefSize(size * 2); if (objStmMark == null && !partial) objStmMark = new HashMap(); if (objStmToOffset == null && partial) objStmToOffset = new IntHashtable(); byte b[] = getStreamBytes(stm, tokens.getFile()); int bptr = 0; int wc[] = new int[3]; for (int k = 0; k < 3; ++k) wc[k] = w.getAsNumber(k).intValue(); for (int idx = 0; idx < index.size(); idx += 2) { int start = index.getAsNumber(idx).intValue(); int length = index.getAsNumber(idx + 1).intValue(); ensureXrefSize((start + length) * 2); while (length-- > 0) { int type = 1; if (wc[0] > 0) { type = 0; for (int k = 0; k < wc[0]; ++k) type = (type << 8) + (b[bptr++] & 0xff); } int field2 = 0; for (int k = 0; k < wc[1]; ++k) field2 = (field2 << 8) + (b[bptr++] & 0xff); int field3 = 0; for (int k = 0; k < wc[2]; ++k) field3 = (field3 << 8) + (b[bptr++] & 0xff); int base = start * 2; if (xref[base] == 0 && xref[base + 1] == 0) { switch (type) { case 0: xref[base] = -1; break; case 1: xref[base] = field2; break; case 2: xref[base] = field3; xref[base + 1] = field2; if (partial) { objStmToOffset.put(field2, 0); } else { Integer on = new Integer(field2); IntHashtable seq = (IntHashtable) objStmMark.get(on); if (seq == null) { seq = new IntHashtable(); seq.put(field3, 1); objStmMark.put(on, seq); } else seq.put(field3, 1); } break; } } ++start; } } thisStream *= 2; if (thisStream < xref.length) xref[thisStream] = -1; if (prev == -1) return true; return readXRefStream(prev); } protected void rebuildXref() throws IOException { hybridXref = false; newXrefType = false; tokens.seek(0); int xr[][] = new int[1024][]; int top = 0; trailer = null; byte line[] = new byte[64]; for (;;) { int pos = tokens.getFilePointer(); if (!tokens.readLineSegment(line)) break; if (line[0] == 't') { if (!PdfEncodings.convertToString(line, null).startsWith("trailer")) continue; tokens.seek(pos); tokens.nextToken(); pos = tokens.getFilePointer(); try { PdfDictionary dic = (PdfDictionary) readPRObject(); if (dic.get(PdfName.ROOT) != null) trailer = dic; else tokens.seek(pos); } catch (Exception e) { tokens.seek(pos); } } else if (line[0] >= '0' && line[0] <= '9') { int obj[] = PRTokeniser.checkObjectStart(line); if (obj == null) continue; int num = obj[0]; int gen = obj[1]; if (num >= xr.length) { int newLength = num * 2; int xr2[][] = new int[newLength][]; System.arraycopy(xr, 0, xr2, 0, top); xr = xr2; } if (num >= top) top = num + 1; if (xr[num] == null || gen >= xr[num][1]) { obj[0] = pos; xr[num] = obj; } } } if (trailer == null) throw new InvalidPdfException("trailer not found."); xref = new int[top * 2]; for (int k = 0; k < top; ++k) { int obj[] = xr[k]; if (obj != null) xref[k * 2] = obj[0]; } } protected PdfDictionary readDictionary() throws IOException { PdfDictionary dic = new PdfDictionary(); while (true) { tokens.nextValidToken(); if (tokens.getTokenType() == PRTokeniser.TK_END_DIC) break; if (tokens.getTokenType() != PRTokeniser.TK_NAME) tokens.throwError("Dictionary key is not a name."); PdfName name = new PdfName(tokens.getStringValue(), false); PdfObject obj = readPRObject(); int type = obj.type(); if (-type == PRTokeniser.TK_END_DIC) tokens.throwError("Unexpected '>>'"); if (-type == PRTokeniser.TK_END_ARRAY) tokens.throwError("Unexpected ']'"); dic.put(name, obj); } return dic; } protected PdfArray readArray() throws IOException { PdfArray array = new PdfArray(); while (true) { PdfObject obj = readPRObject(); int type = obj.type(); if (-type == PRTokeniser.TK_END_ARRAY) break; if (-type == PRTokeniser.TK_END_DIC) tokens.throwError("Unexpected '>>'"); array.add(obj); } return array; } // Track how deeply nested the current object is, so // we know when to return an individual null or boolean, or // reuse one of the static ones. private int readDepth = 0; protected PdfObject readPRObject() throws IOException { tokens.nextValidToken(); int type = tokens.getTokenType(); switch (type) { case PRTokeniser.TK_START_DIC: { ++readDepth; PdfDictionary dic = readDictionary(); --readDepth; int pos = tokens.getFilePointer(); // be careful in the trailer. May not be a "next" token. boolean hasNext; do { hasNext = tokens.nextToken(); } while (hasNext && tokens.getTokenType() == PRTokeniser.TK_COMMENT); if (hasNext && tokens.getStringValue().equals("stream")) { //skip whitespaces int ch; do { ch = tokens.read(); } while (ch == 32 || ch == 9 || ch == 0 || ch == 12); if (ch != '\n') ch = tokens.read(); if (ch != '\n') tokens.backOnePosition(ch); PRStream stream = new PRStream(this, tokens.getFilePointer()); stream.putAll(dic); // crypto handling stream.setObjNum(objNum, objGen); return stream; } else { tokens.seek(pos); return dic; } } case PRTokeniser.TK_START_ARRAY: { ++readDepth; PdfArray arr = readArray(); --readDepth; return arr; } case PRTokeniser.TK_NUMBER: return new PdfNumber(tokens.getStringValue()); case PRTokeniser.TK_STRING: PdfString str = new PdfString(tokens.getStringValue(), null).setHexWriting(tokens.isHexString()); // crypto handling str.setObjNum(objNum, objGen); if (strings != null) strings.add(str); return str; case PRTokeniser.TK_NAME: { PdfName cachedName = (PdfName) PdfName.staticNames.get(tokens.getStringValue()); if (readDepth > 0 && cachedName != null) { return cachedName; } else { // an indirect name (how odd...), or a non-standard one return new PdfName(tokens.getStringValue(), false); } } case PRTokeniser.TK_REF: int num = tokens.getReference(); PRIndirectReference ref = new PRIndirectReference(this, num, tokens.getGeneration()); return ref; default: String sv = tokens.getStringValue(); if ("null".equals(sv)) { if (readDepth == 0) { return new PdfNull(); } //else return PdfNull.PDFNULL; } else if ("true".equals(sv)) { if (readDepth == 0) { return new PdfBoolean(true); } //else return PdfBoolean.PDFTRUE; } else if ("false".equals(sv)) { if (readDepth == 0) { return new PdfBoolean(false); } //else return PdfBoolean.PDFFALSE; } return new PdfLiteral(-type, tokens.getStringValue()); } } /** Decodes a stream that has the FlateDecode filter. * @param in the input data * @return the decoded data */ public static byte[] FlateDecode(byte in[]) { byte b[] = FlateDecode(in, true); if (b == null) return FlateDecode(in, false); return b; } /** * @param in * @param dicPar * @return a byte array */ public static byte[] decodePredictor(byte in[], PdfObject dicPar) { if (dicPar == null || !dicPar.isDictionary()) return in; PdfDictionary dic = (PdfDictionary) dicPar; PdfObject obj = getPdfObject(dic.get(PdfName.PREDICTOR)); if (obj == null || !obj.isNumber()) return in; int predictor = ((PdfNumber) obj).intValue(); if (predictor < 10) return in; int width = 1; obj = getPdfObject(dic.get(PdfName.COLUMNS)); if (obj != null && obj.isNumber()) width = ((PdfNumber) obj).intValue(); int colors = 1; obj = getPdfObject(dic.get(PdfName.COLORS)); if (obj != null && obj.isNumber()) colors = ((PdfNumber) obj).intValue(); int bpc = 8; obj = getPdfObject(dic.get(PdfName.BITSPERCOMPONENT)); if (obj != null && obj.isNumber()) bpc = ((PdfNumber) obj).intValue(); DataInputStream dataStream = new DataInputStream(new ByteArrayInputStream(in)); ByteArrayOutputStream fout = new ByteArrayOutputStream(in.length); int bytesPerPixel = colors * bpc / 8; int bytesPerRow = (colors * width * bpc + 7) / 8; byte[] curr = new byte[bytesPerRow]; byte[] prior = new byte[bytesPerRow]; // Decode the (sub)image row-by-row while (true) { // Read the filter type byte and a row of data int filter = 0; try { filter = dataStream.read(); if (filter < 0) { return fout.toByteArray(); } dataStream.readFully(curr, 0, bytesPerRow); } catch (Exception e) { return fout.toByteArray(); } switch (filter) { case 0: //PNG_FILTER_NONE break; case 1: //PNG_FILTER_SUB for (int i = bytesPerPixel; i < bytesPerRow; i++) { curr[i] += curr[i - bytesPerPixel]; } break; case 2: //PNG_FILTER_UP for (int i = 0; i < bytesPerRow; i++) { curr[i] += prior[i]; } break; case 3: //PNG_FILTER_AVERAGE for (int i = 0; i < bytesPerPixel; i++) { curr[i] += prior[i] / 2; } for (int i = bytesPerPixel; i < bytesPerRow; i++) { curr[i] += ((curr[i - bytesPerPixel] & 0xff) + (prior[i] & 0xff)) / 2; } break; case 4: //PNG_FILTER_PAETH for (int i = 0; i < bytesPerPixel; i++) { curr[i] += prior[i]; } for (int i = bytesPerPixel; i < bytesPerRow; i++) { int a = curr[i - bytesPerPixel] & 0xff; int b = prior[i] & 0xff; int c = prior[i - bytesPerPixel] & 0xff; int p = a + b - c; int pa = Math.abs(p - a); int pb = Math.abs(p - b); int pc = Math.abs(p - c); int ret; if ((pa <= pb) && (pa <= pc)) { ret = a; } else if (pb <= pc) { ret = b; } else { ret = c; } curr[i] += (byte) (ret); } break; default: // Error -- unknown filter type throw new RuntimeException("PNG filter unknown."); } try { fout.write(curr); } catch (IOException ioe) { // Never happens } // Swap curr and prior byte[] tmp = prior; prior = curr; curr = tmp; } } /** A helper to FlateDecode. * @param in the input data * @param strict <CODE>true</CODE> to read a correct stream. <CODE>false</CODE> * to try to read a corrupted stream * @return the decoded data */ public static byte[] FlateDecode(byte in[], boolean strict) { ByteArrayInputStream stream = new ByteArrayInputStream(in); InflaterInputStream zip = new InflaterInputStream(stream); ByteArrayOutputStream out = new ByteArrayOutputStream(); byte b[] = new byte[strict ? 4092 : 1]; try { int n; while ((n = zip.read(b)) >= 0) { out.write(b, 0, n); } zip.close(); out.close(); return out.toByteArray(); } catch (Exception e) { if (strict) return null; return out.toByteArray(); } } /** Decodes a stream that has the ASCIIHexDecode filter. * @param in the input data * @return the decoded data */ public static byte[] ASCIIHexDecode(byte in[]) { ByteArrayOutputStream out = new ByteArrayOutputStream(); boolean first = true; int n1 = 0; for (int k = 0; k < in.length; ++k) { int ch = in[k] & 0xff; if (ch == '>') break; if (PRTokeniser.isWhitespace(ch)) continue; int n = PRTokeniser.getHex(ch); if (n == -1) throw new RuntimeException("Illegal character in ASCIIHexDecode."); if (first) n1 = n; else out.write((byte) ((n1 << 4) + n)); first = !first; } if (!first) out.write((byte) (n1 << 4)); return out.toByteArray(); } /** Decodes a stream that has the ASCII85Decode filter. * @param in the input data * @return the decoded data */ public static byte[] ASCII85Decode(byte in[]) { ByteArrayOutputStream out = new ByteArrayOutputStream(); int state = 0; int chn[] = new int[5]; for (int k = 0; k < in.length; ++k) { int ch = in[k] & 0xff; if (ch == '~') break; if (PRTokeniser.isWhitespace(ch)) continue; if (ch == 'z' && state == 0) { out.write(0); out.write(0); out.write(0); out.write(0); continue; } if (ch < '!' || ch > 'u') throw new RuntimeException("Illegal character in ASCII85Decode."); chn[state] = ch - '!'; ++state; if (state == 5) { state = 0; int r = 0; for (int j = 0; j < 5; ++j) r = r * 85 + chn[j]; out.write((byte) (r >> 24)); out.write((byte) (r >> 16)); out.write((byte) (r >> 8)); out.write((byte) r); } } int r = 0; // We'll ignore the next two lines for the sake of perpetuating broken PDFs // if (state == 1) // throw new RuntimeException("Illegal length in ASCII85Decode."); if (state == 2) { r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + 85 * 85 * 85 + 85 * 85 + 85; out.write((byte) (r >> 24)); } else if (state == 3) { r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + chn[2] * 85 * 85 + 85 * 85 + 85; out.write((byte) (r >> 24)); out.write((byte) (r >> 16)); } else if (state == 4) { r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + chn[2] * 85 * 85 + chn[3] * 85 + 85; out.write((byte) (r >> 24)); out.write((byte) (r >> 16)); out.write((byte) (r >> 8)); } return out.toByteArray(); } /** Decodes a stream that has the LZWDecode filter. * @param in the input data * @return the decoded data */ public static byte[] LZWDecode(byte in[]) { ByteArrayOutputStream out = new ByteArrayOutputStream(); LZWDecoder lzw = new LZWDecoder(); lzw.decode(in, out); return out.toByteArray(); } /** Checks if the document had errors and was rebuilt. * @return true if rebuilt. * */ public boolean isRebuilt() { return this.rebuilt; } /** Gets the dictionary that represents a page. * @param pageNum the page number. 1 is the first * @return the page dictionary */ public PdfDictionary getPageN(int pageNum) { PdfDictionary dic = pageRefs.getPageN(pageNum); if (dic == null) return null; if (appendable) dic.setIndRef(pageRefs.getPageOrigRef(pageNum)); return dic; } /** * @param pageNum * @return a Dictionary object */ public PdfDictionary getPageNRelease(int pageNum) { PdfDictionary dic = getPageN(pageNum); pageRefs.releasePage(pageNum); return dic; } /** * @param pageNum */ public void releasePage(int pageNum) { pageRefs.releasePage(pageNum); } /** * */ public void resetReleasePage() { pageRefs.resetReleasePage(); } /** Gets the page reference to this page. * @param pageNum the page number. 1 is the first * @return the page reference */ public PRIndirectReference getPageOrigRef(int pageNum) { return pageRefs.getPageOrigRef(pageNum); } /** Gets the contents of the page. * @param pageNum the page number. 1 is the first * @param file the location of the PDF document * @throws IOException on error * @return the content */ public byte[] getPageContent(int pageNum, RandomAccessFileOrArray file) throws IOException { PdfDictionary page = getPageNRelease(pageNum); if (page == null) return null; PdfObject contents = getPdfObjectRelease(page.get(PdfName.CONTENTS)); if (contents == null) return new byte[0]; ByteArrayOutputStream bout = null; if (contents.isStream()) { return getStreamBytes((PRStream) contents, file); } else if (contents.isArray()) { PdfArray array = (PdfArray) contents; bout = new ByteArrayOutputStream(); for (int k = 0; k < array.size(); ++k) { PdfObject item = getPdfObjectRelease(array.getPdfObject(k)); if (item == null || !item.isStream()) continue; byte[] b = getStreamBytes((PRStream) item, file); bout.write(b); if (k != array.size() - 1) bout.write('\n'); } return bout.toByteArray(); } else return new byte[0]; } /** Gets the contents of the page. * @param pageNum the page number. 1 is the first * @throws IOException on error * @return the content */ public byte[] getPageContent(int pageNum) throws IOException { RandomAccessFileOrArray rf = getSafeFile(); try { rf.reOpen(); return getPageContent(pageNum, rf); } finally { try { rf.close(); } catch (Exception e) { } } } protected void killXref(PdfObject obj) { if (obj == null) return; if ((obj instanceof PdfIndirectReference) && !obj.isIndirect()) return; switch (obj.type()) { case PdfObject.INDIRECT: { int xr = ((PRIndirectReference) obj).getNumber(); obj = (PdfObject) xrefObj.get(xr); xrefObj.set(xr, null); freeXref = xr; killXref(obj); break; } case PdfObject.ARRAY: { PdfArray t = (PdfArray) obj; for (int i = 0; i < t.size(); ++i) killXref(t.getPdfObject(i)); break; } case PdfObject.STREAM: case PdfObject.DICTIONARY: { PdfDictionary dic = (PdfDictionary) obj; for (Iterator i = dic.getKeys().iterator(); i.hasNext();) { killXref(dic.get((PdfName) i.next())); } break; } } } /** Sets the contents of the page. * @param content the new page content * @param pageNum the page number. 1 is the first */ public void setPageContent(int pageNum, byte content[]) { setPageContent(pageNum, content, PdfStream.DEFAULT_COMPRESSION); } /** Sets the contents of the page. * @param content the new page content * @param pageNum the page number. 1 is the first * @since 2.1.3 (the method already existed without param compressionLevel) */ public void setPageContent(int pageNum, byte content[], int compressionLevel) { PdfDictionary page = getPageN(pageNum); if (page == null) return; PdfObject contents = page.get(PdfName.CONTENTS); freeXref = -1; killXref(contents); if (freeXref == -1) { xrefObj.add(null); freeXref = xrefObj.size() - 1; } page.put(PdfName.CONTENTS, new PRIndirectReference(this, freeXref)); xrefObj.set(freeXref, new PRStream(this, content, compressionLevel)); } /** Get the content from a stream applying the required filters. * @param stream the stream * @param file the location where the stream is * @throws IOException on error * @return the stream content */ public static byte[] getStreamBytes(PRStream stream, RandomAccessFileOrArray file) throws IOException { PdfObject filter = getPdfObjectRelease(stream.get(PdfName.FILTER)); byte[] b = getStreamBytesRaw(stream, file); ArrayList filters = new ArrayList(); if (filter != null) { if (filter.isName()) filters.add(filter); else if (filter.isArray()) filters = ((PdfArray) filter).getArrayList(); } ArrayList dp = new ArrayList(); PdfObject dpo = getPdfObjectRelease(stream.get(PdfName.DECODEPARMS)); if (dpo == null || (!dpo.isDictionary() && !dpo.isArray())) dpo = getPdfObjectRelease(stream.get(PdfName.DP)); if (dpo != null) { if (dpo.isDictionary()) dp.add(dpo); else if (dpo.isArray()) dp = ((PdfArray) dpo).getArrayList(); } String name; for (int j = 0; j < filters.size(); ++j) { name = ((PdfName) getPdfObjectRelease((PdfObject) filters.get(j))).toString(); if (name.equals("/FlateDecode") || name.equals("/Fl")) { b = FlateDecode(b); PdfObject dicParam = null; if (j < dp.size()) { dicParam = (PdfObject) dp.get(j); b = decodePredictor(b, dicParam); } } else if (name.equals("/ASCIIHexDecode") || name.equals("/AHx")) b = ASCIIHexDecode(b); else if (name.equals("/ASCII85Decode") || name.equals("/A85")) b = ASCII85Decode(b); else if (name.equals("/LZWDecode")) { b = LZWDecode(b); PdfObject dicParam = null; if (j < dp.size()) { dicParam = (PdfObject) dp.get(j); b = decodePredictor(b, dicParam); } } else if (name.equals("/Crypt")) { } else throw new UnsupportedPdfException("The filter " + name + " is not supported."); } return b; } /** Get the content from a stream applying the required filters. * @param stream the stream * @throws IOException on error * @return the stream content */ public static byte[] getStreamBytes(PRStream stream) throws IOException { RandomAccessFileOrArray rf = stream.getReader().getSafeFile(); try { rf.reOpen(); return getStreamBytes(stream, rf); } finally { try { rf.close(); } catch (Exception e) { } } } /** Get the content from a stream as it is without applying any filter. * @param stream the stream * @param file the location where the stream is * @throws IOException on error * @return the stream content */ public static byte[] getStreamBytesRaw(PRStream stream, RandomAccessFileOrArray file) throws IOException { PdfReader reader = stream.getReader(); byte b[]; if (stream.getOffset() < 0) b = stream.getBytes(); else { b = new byte[stream.getLength()]; file.seek(stream.getOffset()); file.readFully(b); PdfEncryption decrypt = reader.getDecrypt(); if (decrypt != null) { PdfObject filter = getPdfObjectRelease(stream.get(PdfName.FILTER)); ArrayList filters = new ArrayList(); if (filter != null) { if (filter.isName()) filters.add(filter); else if (filter.isArray()) filters = ((PdfArray) filter).getArrayList(); } boolean skip = false; for (int k = 0; k < filters.size(); ++k) { PdfObject obj = getPdfObjectRelease((PdfObject) filters.get(k)); if (obj != null && obj.toString().equals("/Crypt")) { skip = true; break; } } if (!skip) { decrypt.setHashKey(stream.getObjNum(), stream.getObjGen()); b = decrypt.decryptByteArray(b); } } } return b; } /** Get the content from a stream as it is without applying any filter. * @param stream the stream * @throws IOException on error * @return the stream content */ public static byte[] getStreamBytesRaw(PRStream stream) throws IOException { RandomAccessFileOrArray rf = stream.getReader().getSafeFile(); try { rf.reOpen(); return getStreamBytesRaw(stream, rf); } finally { try { rf.close(); } catch (Exception e) { } } } /** Eliminates shared streams if they exist. */ public void eliminateSharedStreams() { if (!sharedStreams) return; sharedStreams = false; if (pageRefs.size() == 1) return; ArrayList newRefs = new ArrayList(); ArrayList newStreams = new ArrayList(); IntHashtable visited = new IntHashtable(); for (int k = 1; k <= pageRefs.size(); ++k) { PdfDictionary page = pageRefs.getPageN(k); if (page == null) continue; PdfObject contents = getPdfObject(page.get(PdfName.CONTENTS)); if (contents == null) continue; if (contents.isStream()) { PRIndirectReference ref = (PRIndirectReference) page.get(PdfName.CONTENTS); if (visited.containsKey(ref.getNumber())) { // need to duplicate newRefs.add(ref); newStreams.add(new PRStream((PRStream) contents, null)); } else visited.put(ref.getNumber(), 1); } else if (contents.isArray()) { PdfArray array = (PdfArray) contents; for (int j = 0; j < array.size(); ++j) { PRIndirectReference ref = (PRIndirectReference) array.getPdfObject(j); if (visited.containsKey(ref.getNumber())) { // need to duplicate newRefs.add(ref); newStreams.add(new PRStream((PRStream) getPdfObject(ref), null)); } else visited.put(ref.getNumber(), 1); } } } if (newStreams.isEmpty()) return; for (int k = 0; k < newStreams.size(); ++k) { xrefObj.add(newStreams.get(k)); PRIndirectReference ref = (PRIndirectReference) newRefs.get(k); ref.setNumber(xrefObj.size() - 1, 0); } } /** Checks if the document was changed. * @return <CODE>true</CODE> if the document was changed, * <CODE>false</CODE> otherwise */ public boolean isTampered() { return tampered; } /** * Sets the tampered state. A tampered PdfReader cannot be reused in PdfStamper. * @param tampered the tampered state */ public void setTampered(boolean tampered) { this.tampered = tampered; pageRefs.keepPages(); } /** Gets the XML metadata. * @throws IOException on error * @return the XML metadata */ public byte[] getMetadata() throws IOException { PdfObject obj = getPdfObject(catalog.get(PdfName.METADATA)); if (!(obj instanceof PRStream)) return null; RandomAccessFileOrArray rf = getSafeFile(); byte b[] = null; try { rf.reOpen(); b = getStreamBytes((PRStream) obj, rf); } finally { try { rf.close(); } catch (Exception e) { // empty on purpose } } return b; } /** * Gets the byte address of the last xref table. * @return the byte address of the last xref table */ public int getLastXref() { return lastXref; } /** * Gets the number of xref objects. * @return the number of xref objects */ public int getXrefSize() { return xrefObj.size(); } /** * Gets the byte address of the %%EOF marker. * @return the byte address of the %%EOF marker */ public int getEofPos() { return eofPos; } /** * Gets the PDF version. Only the last version char is returned. For example * version 1.4 is returned as '4'. * @return the PDF version */ public char getPdfVersion() { return pdfVersion; } /** * Returns <CODE>true</CODE> if the PDF is encrypted. * @return <CODE>true</CODE> if the PDF is encrypted */ public boolean isEncrypted() { return encrypted; } /** * Gets the encryption permissions. It can be used directly in * <CODE>PdfWriter.setEncryption()</CODE>. * @return the encryption permissions */ public int getPermissions() { return pValue; } /** * Returns <CODE>true</CODE> if the PDF has a 128 bit key encryption. * @return <CODE>true</CODE> if the PDF has a 128 bit key encryption */ public boolean is128Key() { return rValue == 3; } /** * Gets the trailer dictionary * @return the trailer dictionary */ public PdfDictionary getTrailer() { return trailer; } PdfEncryption getDecrypt() { return decrypt; } static boolean equalsn(byte a1[], byte a2[]) { int length = a2.length; for (int k = 0; k < length; ++k) { if (a1[k] != a2[k]) return false; } return true; } static boolean existsName(PdfDictionary dic, PdfName key, PdfName value) { PdfObject type = getPdfObjectRelease(dic.get(key)); if (type == null || !type.isName()) return false; PdfName name = (PdfName) type; return name.equals(value); } static String getFontName(PdfDictionary dic) { if (dic == null) return null; PdfObject type = getPdfObjectRelease(dic.get(PdfName.BASEFONT)); if (type == null || !type.isName()) return null; return PdfName.decodeName(type.toString()); } static String getSubsetPrefix(PdfDictionary dic) { if (dic == null) return null; String s = getFontName(dic); if (s == null) return null; if (s.length() < 8 || s.charAt(6) != '+') return null; for (int k = 0; k < 6; ++k) { char c = s.charAt(k); if (c < 'A' || c > 'Z') return null; } return s; } /** Finds all the font subsets and changes the prefixes to some * random values. * @return the number of font subsets altered */ public int shuffleSubsetNames() { int total = 0; for (int k = 1; k < xrefObj.size(); ++k) { PdfObject obj = getPdfObjectRelease(k); if (obj == null || !obj.isDictionary()) continue; PdfDictionary dic = (PdfDictionary) obj; if (!existsName(dic, PdfName.TYPE, PdfName.FONT)) continue; if (existsName(dic, PdfName.SUBTYPE, PdfName.TYPE1) || existsName(dic, PdfName.SUBTYPE, PdfName.MMTYPE1) || existsName(dic, PdfName.SUBTYPE, PdfName.TRUETYPE)) { String s = getSubsetPrefix(dic); if (s == null) continue; String ns = BaseFont.createSubsetPrefix() + s.substring(7); PdfName newName = new PdfName(ns); dic.put(PdfName.BASEFONT, newName); setXrefPartialObject(k, dic); ++total; PdfDictionary fd = dic.getAsDict(PdfName.FONTDESCRIPTOR); if (fd == null) continue; fd.put(PdfName.FONTNAME, newName); } else if (existsName(dic, PdfName.SUBTYPE, PdfName.TYPE0)) { String s = getSubsetPrefix(dic); PdfArray arr = dic.getAsArray(PdfName.DESCENDANTFONTS); if (arr == null) continue; if (arr.isEmpty()) continue; PdfDictionary desc = arr.getAsDict(0); String sde = getSubsetPrefix(desc); if (sde == null) continue; String ns = BaseFont.createSubsetPrefix(); if (s != null) dic.put(PdfName.BASEFONT, new PdfName(ns + s.substring(7))); setXrefPartialObject(k, dic); PdfName newName = new PdfName(ns + sde.substring(7)); desc.put(PdfName.BASEFONT, newName); ++total; PdfDictionary fd = desc.getAsDict(PdfName.FONTDESCRIPTOR); if (fd == null) continue; fd.put(PdfName.FONTNAME, newName); } } return total; } /** Finds all the fonts not subset but embedded and marks them as subset. * @return the number of fonts altered */ public int createFakeFontSubsets() { int total = 0; for (int k = 1; k < xrefObj.size(); ++k) { PdfObject obj = getPdfObjectRelease(k); if (obj == null || !obj.isDictionary()) continue; PdfDictionary dic = (PdfDictionary) obj; if (!existsName(dic, PdfName.TYPE, PdfName.FONT)) continue; if (existsName(dic, PdfName.SUBTYPE, PdfName.TYPE1) || existsName(dic, PdfName.SUBTYPE, PdfName.MMTYPE1) || existsName(dic, PdfName.SUBTYPE, PdfName.TRUETYPE)) { String s = getSubsetPrefix(dic); if (s != null) continue; s = getFontName(dic); if (s == null) continue; String ns = BaseFont.createSubsetPrefix() + s; PdfDictionary fd = (PdfDictionary) getPdfObjectRelease(dic.get(PdfName.FONTDESCRIPTOR)); if (fd == null) continue; if (fd.get(PdfName.FONTFILE) == null && fd.get(PdfName.FONTFILE2) == null && fd.get(PdfName.FONTFILE3) == null) continue; fd = dic.getAsDict(PdfName.FONTDESCRIPTOR); PdfName newName = new PdfName(ns); dic.put(PdfName.BASEFONT, newName); fd.put(PdfName.FONTNAME, newName); setXrefPartialObject(k, dic); ++total; } } return total; } private static PdfArray getNameArray(PdfObject obj) { if (obj == null) return null; obj = getPdfObjectRelease(obj); if (obj == null) return null; if (obj.isArray()) return (PdfArray) obj; else if (obj.isDictionary()) { PdfObject arr2 = getPdfObjectRelease(((PdfDictionary) obj).get(PdfName.D)); if (arr2 != null && arr2.isArray()) return (PdfArray) arr2; } return null; } /** * Gets all the named destinations as an <CODE>HashMap</CODE>. The key is the name * and the value is the destinations array. * @return gets all the named destinations */ public HashMap getNamedDestination() { return getNamedDestination(false); } /** * Gets all the named destinations as an <CODE>HashMap</CODE>. The key is the name * and the value is the destinations array. * @param keepNames true if you want the keys to be real PdfNames instead of Strings * @return gets all the named destinations * @since 2.1.6 */ public HashMap getNamedDestination(boolean keepNames) { HashMap names = getNamedDestinationFromNames(keepNames); names.putAll(getNamedDestinationFromStrings()); return names; } /** * Gets the named destinations from the /Dests key in the catalog as an <CODE>HashMap</CODE>. The key is the name * and the value is the destinations array. * @return gets the named destinations */ public HashMap getNamedDestinationFromNames() { return getNamedDestinationFromNames(false); } /** * Gets the named destinations from the /Dests key in the catalog as an <CODE>HashMap</CODE>. The key is the name * and the value is the destinations array. * @param keepNames true if you want the keys to be real PdfNames instead of Strings * @return gets the named destinations * @since 2.1.6 */ public HashMap getNamedDestinationFromNames(boolean keepNames) { HashMap names = new HashMap(); if (catalog.get(PdfName.DESTS) != null) { PdfDictionary dic = (PdfDictionary) getPdfObjectRelease(catalog.get(PdfName.DESTS)); if (dic == null) return names; Set keys = dic.getKeys(); for (Iterator it = keys.iterator(); it.hasNext();) { PdfName key = (PdfName) it.next(); PdfArray arr = getNameArray(dic.get(key)); if (arr == null) continue; if (keepNames) { names.put(key, arr); } else { String name = PdfName.decodeName(key.toString()); names.put(name, arr); } } } return names; } /** * Gets the named destinations from the /Names key in the catalog as an <CODE>HashMap</CODE>. The key is the name * and the value is the destinations array. * @return gets the named destinations */ public HashMap getNamedDestinationFromStrings() { if (catalog.get(PdfName.NAMES) != null) { PdfDictionary dic = (PdfDictionary) getPdfObjectRelease(catalog.get(PdfName.NAMES)); if (dic != null) { dic = (PdfDictionary) getPdfObjectRelease(dic.get(PdfName.DESTS)); if (dic != null) { HashMap names = PdfNameTree.readTree(dic); for (Iterator it = names.entrySet().iterator(); it.hasNext();) { Map.Entry entry = (Map.Entry) it.next(); PdfArray arr = getNameArray((PdfObject) entry.getValue()); if (arr != null) entry.setValue(arr); else it.remove(); } return names; } } } return new HashMap(); } private boolean replaceNamedDestination(PdfObject obj, HashMap names) { obj = getPdfObject(obj); int objIdx = lastXrefPartial; releaseLastXrefPartial(); if (obj != null && obj.isDictionary()) { PdfObject ob2 = getPdfObjectRelease(((PdfDictionary) obj).get(PdfName.DEST)); Object name = null; if (ob2 != null) { if (ob2.isName()) name = ob2; else if (ob2.isString()) name = ob2.toString(); PdfArray dest = (PdfArray) names.get(name); if (dest != null) { ((PdfDictionary) obj).put(PdfName.DEST, dest); setXrefPartialObject(objIdx, obj); return true; } } else if ((ob2 = getPdfObject(((PdfDictionary) obj).get(PdfName.A))) != null) { int obj2Idx = lastXrefPartial; releaseLastXrefPartial(); PdfDictionary dic = (PdfDictionary) ob2; PdfName type = (PdfName) getPdfObjectRelease(dic.get(PdfName.S)); if (PdfName.GOTO.equals(type)) { PdfObject ob3 = getPdfObjectRelease(dic.get(PdfName.D)); if (ob3 != null) { if (ob3.isName()) name = ob3; else if (ob3.isString()) name = ob3.toString(); } PdfArray dest = (PdfArray) names.get(name); if (dest != null) { dic.put(PdfName.D, dest); setXrefPartialObject(obj2Idx, ob2); setXrefPartialObject(objIdx, obj); return true; } } } } return false; } /** * Removes all the fields from the document. */ public void removeFields() { pageRefs.resetReleasePage(); for (int k = 1; k <= pageRefs.size(); ++k) { PdfDictionary page = pageRefs.getPageN(k); PdfArray annots = page.getAsArray(PdfName.ANNOTS); if (annots == null) { pageRefs.releasePage(k); continue; } for (int j = 0; j < annots.size(); ++j) { PdfObject obj = getPdfObjectRelease(annots.getPdfObject(j)); if (obj == null || !obj.isDictionary()) continue; PdfDictionary annot = (PdfDictionary) obj; if (PdfName.WIDGET.equals(annot.get(PdfName.SUBTYPE))) annots.remove(j--); } if (annots.isEmpty()) page.remove(PdfName.ANNOTS); else pageRefs.releasePage(k); } catalog.remove(PdfName.ACROFORM); pageRefs.resetReleasePage(); } /** * Removes all the annotations and fields from the document. */ public void removeAnnotations() { pageRefs.resetReleasePage(); for (int k = 1; k <= pageRefs.size(); ++k) { PdfDictionary page = pageRefs.getPageN(k); if (page.get(PdfName.ANNOTS) == null) pageRefs.releasePage(k); else page.remove(PdfName.ANNOTS); } catalog.remove(PdfName.ACROFORM); pageRefs.resetReleasePage(); } public ArrayList getLinks(int page) { pageRefs.resetReleasePage(); ArrayList result = new ArrayList(); PdfDictionary pageDic = pageRefs.getPageN(page); if (pageDic.get(PdfName.ANNOTS) != null) { PdfArray annots = pageDic.getAsArray(PdfName.ANNOTS); for (int j = 0; j < annots.size(); ++j) { PdfDictionary annot = (PdfDictionary) getPdfObjectRelease(annots.getPdfObject(j)); if (PdfName.LINK.equals(annot.get(PdfName.SUBTYPE))) { result.add(new PdfAnnotation.PdfImportedLink(annot)); } } } pageRefs.releasePage(page); pageRefs.resetReleasePage(); return result; } private void iterateBookmarks(PdfObject outlineRef, HashMap names) { while (outlineRef != null) { replaceNamedDestination(outlineRef, names); PdfDictionary outline = (PdfDictionary) getPdfObjectRelease(outlineRef); PdfObject first = outline.get(PdfName.FIRST); if (first != null) { iterateBookmarks(first, names); } outlineRef = outline.get(PdfName.NEXT); } } /** Replaces all the local named links with the actual destinations. */ public void consolidateNamedDestinations() { if (consolidateNamedDestinations) return; consolidateNamedDestinations = true; HashMap names = getNamedDestination(true); if (names.isEmpty()) return; for (int k = 1; k <= pageRefs.size(); ++k) { PdfDictionary page = pageRefs.getPageN(k); PdfObject annotsRef; PdfArray annots = (PdfArray) getPdfObject(annotsRef = page.get(PdfName.ANNOTS)); int annotIdx = lastXrefPartial; releaseLastXrefPartial(); if (annots == null) { pageRefs.releasePage(k); continue; } boolean commitAnnots = false; for (int an = 0; an < annots.size(); ++an) { PdfObject objRef = annots.getPdfObject(an); if (replaceNamedDestination(objRef, names) && !objRef.isIndirect()) commitAnnots = true; } if (commitAnnots) setXrefPartialObject(annotIdx, annots); if (!commitAnnots || annotsRef.isIndirect()) pageRefs.releasePage(k); } PdfDictionary outlines = (PdfDictionary) getPdfObjectRelease(catalog.get(PdfName.OUTLINES)); if (outlines == null) return; iterateBookmarks(outlines.get(PdfName.FIRST), names); } protected static PdfDictionary duplicatePdfDictionary(PdfDictionary original, PdfDictionary copy, PdfReader newReader) { if (copy == null) copy = new PdfDictionary(); for (Iterator it = original.getKeys().iterator(); it.hasNext();) { PdfName key = (PdfName) it.next(); copy.put(key, duplicatePdfObject(original.get(key), newReader)); } return copy; } protected static PdfObject duplicatePdfObject(PdfObject original, PdfReader newReader) { if (original == null) return null; switch (original.type()) { case PdfObject.DICTIONARY: { return duplicatePdfDictionary((PdfDictionary) original, null, newReader); } case PdfObject.STREAM: { PRStream org = (PRStream) original; PRStream stream = new PRStream(org, null, newReader); duplicatePdfDictionary(org, stream, newReader); return stream; } case PdfObject.ARRAY: { PdfArray arr = new PdfArray(); for (Iterator it = ((PdfArray) original).listIterator(); it.hasNext();) { arr.add(duplicatePdfObject((PdfObject) it.next(), newReader)); } return arr; } case PdfObject.INDIRECT: { PRIndirectReference org = (PRIndirectReference) original; return new PRIndirectReference(newReader, org.getNumber(), org.getGeneration()); } default: return original; } } /** * Closes the reader */ public void close() { if (!partial) return; try { tokens.close(); } catch (IOException e) { throw new ExceptionConverter(e); } } protected void removeUnusedNode(PdfObject obj, boolean hits[]) { Stack state = new Stack(); state.push(obj); while (!state.empty()) { Object current = state.pop(); if (current == null) continue; ArrayList ar = null; PdfDictionary dic = null; PdfName[] keys = null; Object[] objs = null; int idx = 0; if (current instanceof PdfObject) { obj = (PdfObject) current; switch (obj.type()) { case PdfObject.DICTIONARY: case PdfObject.STREAM: dic = (PdfDictionary) obj; keys = new PdfName[dic.size()]; dic.getKeys().toArray(keys); break; case PdfObject.ARRAY: ar = ((PdfArray) obj).getArrayList(); break; case PdfObject.INDIRECT: PRIndirectReference ref = (PRIndirectReference) obj; int num = ref.getNumber(); if (!hits[num]) { hits[num] = true; state.push(getPdfObjectRelease(ref)); } continue; default: continue; } } else { objs = (Object[]) current; if (objs[0] instanceof ArrayList) { ar = (ArrayList) objs[0]; idx = ((Integer) objs[1]).intValue(); } else { keys = (PdfName[]) objs[0]; dic = (PdfDictionary) objs[1]; idx = ((Integer) objs[2]).intValue(); } } if (ar != null) { for (int k = idx; k < ar.size(); ++k) { PdfObject v = (PdfObject) ar.get(k); if (v.isIndirect()) { int num = ((PRIndirectReference) v).getNumber(); if (num >= xrefObj.size() || (!partial && xrefObj.get(num) == null)) { ar.set(k, PdfNull.PDFNULL); continue; } } if (objs == null) state.push(new Object[] { ar, new Integer(k + 1) }); else { objs[1] = new Integer(k + 1); state.push(objs); } state.push(v); break; } } else { for (int k = idx; k < keys.length; ++k) { PdfName key = keys[k]; PdfObject v = dic.get(key); if (v.isIndirect()) { int num = ((PRIndirectReference) v).getNumber(); if (num >= xrefObj.size() || (!partial && xrefObj.get(num) == null)) { dic.put(key, PdfNull.PDFNULL); continue; } } if (objs == null) state.push(new Object[] { keys, dic, new Integer(k + 1) }); else { objs[2] = new Integer(k + 1); state.push(objs); } state.push(v); break; } } } } /** Removes all the unreachable objects. * @return the number of indirect objects removed */ public int removeUnusedObjects() { boolean hits[] = new boolean[xrefObj.size()]; removeUnusedNode(trailer, hits); int total = 0; if (partial) { for (int k = 1; k < hits.length; ++k) { if (!hits[k]) { xref[k * 2] = -1; xref[k * 2 + 1] = 0; xrefObj.set(k, null); ++total; } } } else { for (int k = 1; k < hits.length; ++k) { if (!hits[k]) { xrefObj.set(k, null); ++total; } } } return total; } /** Gets a read-only version of <CODE>AcroFields</CODE>. * @return a read-only version of <CODE>AcroFields</CODE> */ public AcroFields getAcroFields() { return new AcroFields(this, null); } /** * Gets the global document JavaScript. * @param file the document file * @throws IOException on error * @return the global document JavaScript */ public String getJavaScript(RandomAccessFileOrArray file) throws IOException { PdfDictionary names = (PdfDictionary) getPdfObjectRelease(catalog.get(PdfName.NAMES)); if (names == null) return null; PdfDictionary js = (PdfDictionary) getPdfObjectRelease(names.get(PdfName.JAVASCRIPT)); if (js == null) return null; HashMap jscript = PdfNameTree.readTree(js); String sortedNames[] = new String[jscript.size()]; sortedNames = (String[]) jscript.keySet().toArray(sortedNames); Arrays.sort(sortedNames); StringBuffer buf = new StringBuffer(); for (int k = 0; k < sortedNames.length; ++k) { PdfDictionary j = (PdfDictionary) getPdfObjectRelease( (PdfIndirectReference) jscript.get(sortedNames[k])); if (j == null) continue; PdfObject obj = getPdfObjectRelease(j.get(PdfName.JS)); if (obj != null) { if (obj.isString()) buf.append(((PdfString) obj).toUnicodeString()).append('\n'); else if (obj.isStream()) { byte bytes[] = getStreamBytes((PRStream) obj, file); if (bytes.length >= 2 && bytes[0] == (byte) 254 && bytes[1] == (byte) 255) buf.append(PdfEncodings.convertToString(bytes, PdfObject.TEXT_UNICODE)); else buf.append(PdfEncodings.convertToString(bytes, PdfObject.TEXT_PDFDOCENCODING)); buf.append('\n'); } } } return buf.toString(); } /** * Gets the global document JavaScript. * @throws IOException on error * @return the global document JavaScript */ public String getJavaScript() throws IOException { RandomAccessFileOrArray rf = getSafeFile(); try { rf.reOpen(); return getJavaScript(rf); } finally { try { rf.close(); } catch (Exception e) { } } } /** * Selects the pages to keep in the document. The pages are described as * ranges. The page ordering can be changed but * no page repetitions are allowed. Note that it may be very slow in partial mode. * @param ranges the comma separated ranges as described in {@link SequenceList} */ public void selectPages(String ranges) { selectPages(SequenceList.expand(ranges, getNumberOfPages())); } /** * Selects the pages to keep in the document. The pages are described as a * <CODE>List</CODE> of <CODE>Integer</CODE>. The page ordering can be changed but * no page repetitions are allowed. Note that it may be very slow in partial mode. * @param pagesToKeep the pages to keep in the document */ public void selectPages(List pagesToKeep) { pageRefs.selectPages(pagesToKeep); removeUnusedObjects(); } /** Sets the viewer preferences as the sum of several constants. * @param preferences the viewer preferences * @see PdfViewerPreferences#setViewerPreferences */ public void setViewerPreferences(int preferences) { this.viewerPreferences.setViewerPreferences(preferences); setViewerPreferences(this.viewerPreferences); } /** Adds a viewer preference * @param key a key for a viewer preference * @param value a value for the viewer preference * @see PdfViewerPreferences#addViewerPreference */ public void addViewerPreference(PdfName key, PdfObject value) { this.viewerPreferences.addViewerPreference(key, value); setViewerPreferences(this.viewerPreferences); } void setViewerPreferences(PdfViewerPreferencesImp vp) { vp.addToCatalog(catalog); } /** * Returns a bitset representing the PageMode and PageLayout viewer preferences. * Doesn't return any information about the ViewerPreferences dictionary. * @return an int that contains the Viewer Preferences. */ public int getSimpleViewerPreferences() { return PdfViewerPreferencesImp.getViewerPreferences(catalog).getPageLayoutAndMode(); } /** * Getter for property appendable. * @return Value of property appendable. */ public boolean isAppendable() { return this.appendable; } /** * Setter for property appendable. * @param appendable New value of property appendable. */ public void setAppendable(boolean appendable) { this.appendable = appendable; if (appendable) getPdfObject(trailer.get(PdfName.ROOT)); } /** * Getter for property newXrefType. * @return Value of property newXrefType. */ public boolean isNewXrefType() { return newXrefType; } /** * Getter for property fileLength. * @return Value of property fileLength. */ public int getFileLength() { return fileLength; } /** * Getter for property hybridXref. * @return Value of property hybridXref. */ public boolean isHybridXref() { return hybridXref; } static class PageRefs { private PdfReader reader; private IntHashtable refsp; private ArrayList refsn; private ArrayList pageInh; private int lastPageRead = -1; private int sizep; private boolean keepPages; private PageRefs(PdfReader reader) throws IOException { this.reader = reader; if (reader.partial) { refsp = new IntHashtable(); PdfNumber npages = (PdfNumber) PdfReader.getPdfObjectRelease(reader.rootPages.get(PdfName.COUNT)); sizep = npages.intValue(); } else { readPages(); } } PageRefs(PageRefs other, PdfReader reader) { this.reader = reader; this.sizep = other.sizep; if (other.refsn != null) { refsn = new ArrayList(other.refsn); for (int k = 0; k < refsn.size(); ++k) { refsn.set(k, duplicatePdfObject((PdfObject) refsn.get(k), reader)); } } else this.refsp = (IntHashtable) other.refsp.clone(); } int size() { if (refsn != null) return refsn.size(); else return sizep; } void readPages() throws IOException { if (refsn != null) return; refsp = null; refsn = new ArrayList(); pageInh = new ArrayList(); iteratePages((PRIndirectReference) reader.catalog.get(PdfName.PAGES)); pageInh = null; reader.rootPages.put(PdfName.COUNT, new PdfNumber(refsn.size())); } void reReadPages() throws IOException { refsn = null; readPages(); } /** Gets the dictionary that represents a page. * @param pageNum the page number. 1 is the first * @return the page dictionary */ public PdfDictionary getPageN(int pageNum) { PRIndirectReference ref = getPageOrigRef(pageNum); return (PdfDictionary) PdfReader.getPdfObject(ref); } /** * @param pageNum * @return a dictionary object */ public PdfDictionary getPageNRelease(int pageNum) { PdfDictionary page = getPageN(pageNum); releasePage(pageNum); return page; } /** * @param pageNum * @return an indirect reference */ public PRIndirectReference getPageOrigRefRelease(int pageNum) { PRIndirectReference ref = getPageOrigRef(pageNum); releasePage(pageNum); return ref; } /** Gets the page reference to this page. * @param pageNum the page number. 1 is the first * @return the page reference */ public PRIndirectReference getPageOrigRef(int pageNum) { try { --pageNum; if (pageNum < 0 || pageNum >= size()) return null; if (refsn != null) return (PRIndirectReference) refsn.get(pageNum); else { int n = refsp.get(pageNum); if (n == 0) { PRIndirectReference ref = getSinglePage(pageNum); if (reader.lastXrefPartial == -1) lastPageRead = -1; else lastPageRead = pageNum; reader.lastXrefPartial = -1; refsp.put(pageNum, ref.getNumber()); if (keepPages) lastPageRead = -1; return ref; } else { if (lastPageRead != pageNum) lastPageRead = -1; if (keepPages) lastPageRead = -1; return new PRIndirectReference(reader, n); } } } catch (Exception e) { throw new ExceptionConverter(e); } } void keepPages() { if (refsp == null || keepPages) return; keepPages = true; refsp.clear(); } /** * @param pageNum */ public void releasePage(int pageNum) { if (refsp == null) return; --pageNum; if (pageNum < 0 || pageNum >= size()) return; if (pageNum != lastPageRead) return; lastPageRead = -1; reader.lastXrefPartial = refsp.get(pageNum); reader.releaseLastXrefPartial(); refsp.remove(pageNum); } /** * */ public void resetReleasePage() { if (refsp == null) return; lastPageRead = -1; } void insertPage(int pageNum, PRIndirectReference ref) { --pageNum; if (refsn != null) { if (pageNum >= refsn.size()) refsn.add(ref); else refsn.add(pageNum, ref); } else { ++sizep; lastPageRead = -1; if (pageNum >= size()) { refsp.put(size(), ref.getNumber()); } else { IntHashtable refs2 = new IntHashtable((refsp.size() + 1) * 2); for (Iterator it = refsp.getEntryIterator(); it.hasNext();) { IntHashtable.Entry entry = (IntHashtable.Entry) it.next(); int p = entry.getKey(); refs2.put(p >= pageNum ? p + 1 : p, entry.getValue()); } refs2.put(pageNum, ref.getNumber()); refsp = refs2; } } } private void pushPageAttributes(PdfDictionary nodePages) { PdfDictionary dic = new PdfDictionary(); if (!pageInh.isEmpty()) { dic.putAll((PdfDictionary) pageInh.get(pageInh.size() - 1)); } for (int k = 0; k < pageInhCandidates.length; ++k) { PdfObject obj = nodePages.get(pageInhCandidates[k]); if (obj != null) dic.put(pageInhCandidates[k], obj); } pageInh.add(dic); } private void popPageAttributes() { pageInh.remove(pageInh.size() - 1); } private void iteratePages(PRIndirectReference rpage) throws IOException { PdfDictionary page = (PdfDictionary) getPdfObject(rpage); PdfArray kidsPR = page.getAsArray(PdfName.KIDS); if (kidsPR == null) { page.put(PdfName.TYPE, PdfName.PAGE); PdfDictionary dic = (PdfDictionary) pageInh.get(pageInh.size() - 1); PdfName key; for (Iterator i = dic.getKeys().iterator(); i.hasNext();) { key = (PdfName) i.next(); if (page.get(key) == null) page.put(key, dic.get(key)); } if (page.get(PdfName.MEDIABOX) == null) { PdfArray arr = new PdfArray( new float[] { 0, 0, PageSize.LETTER.getRight(), PageSize.LETTER.getTop() }); page.put(PdfName.MEDIABOX, arr); } refsn.add(rpage); } else { page.put(PdfName.TYPE, PdfName.PAGES); pushPageAttributes(page); for (int k = 0; k < kidsPR.size(); ++k) { PdfObject obj = kidsPR.getPdfObject(k); if (!obj.isIndirect()) { while (k < kidsPR.size()) kidsPR.remove(k); break; } iteratePages((PRIndirectReference) obj); } popPageAttributes(); } } protected PRIndirectReference getSinglePage(int n) { PdfDictionary acc = new PdfDictionary(); PdfDictionary top = reader.rootPages; int base = 0; while (true) { for (int k = 0; k < pageInhCandidates.length; ++k) { PdfObject obj = top.get(pageInhCandidates[k]); if (obj != null) acc.put(pageInhCandidates[k], obj); } PdfArray kids = (PdfArray) PdfReader.getPdfObjectRelease(top.get(PdfName.KIDS)); for (Iterator it = kids.listIterator(); it.hasNext();) { PRIndirectReference ref = (PRIndirectReference) it.next(); PdfDictionary dic = (PdfDictionary) getPdfObject(ref); int last = reader.lastXrefPartial; PdfObject count = getPdfObjectRelease(dic.get(PdfName.COUNT)); reader.lastXrefPartial = last; int acn = 1; if (count != null && count.type() == PdfObject.NUMBER) acn = ((PdfNumber) count).intValue(); if (n < base + acn) { if (count == null) { dic.mergeDifferent(acc); return ref; } reader.releaseLastXrefPartial(); top = dic; break; } reader.releaseLastXrefPartial(); base += acn; } } } private void selectPages(List pagesToKeep) { IntHashtable pg = new IntHashtable(); ArrayList finalPages = new ArrayList(); int psize = size(); for (Iterator it = pagesToKeep.iterator(); it.hasNext();) { Integer pi = (Integer) it.next(); int p = pi.intValue(); if (p >= 1 && p <= psize && pg.put(p, 1) == 0) finalPages.add(pi); } if (reader.partial) { for (int k = 1; k <= psize; ++k) { getPageOrigRef(k); resetReleasePage(); } } PRIndirectReference parent = (PRIndirectReference) reader.catalog.get(PdfName.PAGES); PdfDictionary topPages = (PdfDictionary) PdfReader.getPdfObject(parent); ArrayList newPageRefs = new ArrayList(finalPages.size()); PdfArray kids = new PdfArray(); for (int k = 0; k < finalPages.size(); ++k) { int p = ((Integer) finalPages.get(k)).intValue(); PRIndirectReference pref = getPageOrigRef(p); resetReleasePage(); kids.add(pref); newPageRefs.add(pref); getPageN(p).put(PdfName.PARENT, parent); } AcroFields af = reader.getAcroFields(); boolean removeFields = (af.getFields().size() > 0); for (int k = 1; k <= psize; ++k) { if (!pg.containsKey(k)) { if (removeFields) af.removeFieldsFromPage(k); PRIndirectReference pref = getPageOrigRef(k); int nref = pref.getNumber(); reader.xrefObj.set(nref, null); if (reader.partial) { reader.xref[nref * 2] = -1; reader.xref[nref * 2 + 1] = 0; } } } topPages.put(PdfName.COUNT, new PdfNumber(finalPages.size())); topPages.put(PdfName.KIDS, kids); refsp = null; refsn = newPageRefs; } } PdfIndirectReference getCryptoRef() { if (cryptoRef == null) return null; return new PdfIndirectReference(0, cryptoRef.getNumber(), cryptoRef.getGeneration()); } /** * Removes any usage rights that this PDF may have. Only Adobe can grant usage rights * and any PDF modification with iText will invalidate them. Invalidated usage rights may * confuse Acrobat and it's advisable to remove them altogether. */ public void removeUsageRights() { PdfDictionary perms = catalog.getAsDict(PdfName.PERMS); if (perms == null) return; perms.remove(PdfName.UR); perms.remove(PdfName.UR3); if (perms.size() == 0) catalog.remove(PdfName.PERMS); } /** * Gets the certification level for this document. The return values can be <code>PdfSignatureAppearance.NOT_CERTIFIED</code>, * <code>PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED</code>, * <code>PdfSignatureAppearance.CERTIFIED_FORM_FILLING</code> and * <code>PdfSignatureAppearance.CERTIFIED_FORM_FILLING_AND_ANNOTATIONS</code>. * <p> * No signature validation is made, use the methods available for that in <CODE>AcroFields</CODE>. * </p> * @return gets the certification level for this document */ public int getCertificationLevel() { PdfDictionary dic = catalog.getAsDict(PdfName.PERMS); if (dic == null) return PdfSignatureAppearance.NOT_CERTIFIED; dic = dic.getAsDict(PdfName.DOCMDP); if (dic == null) return PdfSignatureAppearance.NOT_CERTIFIED; PdfArray arr = dic.getAsArray(PdfName.REFERENCE); if (arr == null || arr.size() == 0) return PdfSignatureAppearance.NOT_CERTIFIED; dic = arr.getAsDict(0); if (dic == null) return PdfSignatureAppearance.NOT_CERTIFIED; dic = dic.getAsDict(PdfName.TRANSFORMPARAMS); if (dic == null) return PdfSignatureAppearance.NOT_CERTIFIED; PdfNumber p = dic.getAsNumber(PdfName.P); if (p == null) return PdfSignatureAppearance.NOT_CERTIFIED; return p.intValue(); } /** * Checks if the document was opened with the owner password so that the end application * can decide what level of access restrictions to apply. If the document is not encrypted * it will return <CODE>true</CODE>. * @return <CODE>true</CODE> if the document was opened with the owner password or if it's not encrypted, * <CODE>false</CODE> if the document was opened with the user password */ public final boolean isOpenedWithFullPermissions() { return !encrypted || ownerPasswordUsed; } public int getCryptoMode() { if (decrypt == null) return -1; else return decrypt.getCryptoMode(); } public boolean isMetadataEncrypted() { if (decrypt == null) return false; else return decrypt.isMetadataEncrypted(); } public byte[] computeUserPassword() { if (!encrypted || !ownerPasswordUsed) return null; return decrypt.computeUserPassword(password); } }