Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pdfbox.pdmodel; import java.util.ArrayDeque; import java.util.Iterator; import java.util.Queue; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSObjectable; import java.util.ArrayList; import java.util.List; import java.util.NoSuchElementException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * The page tree, which defines the ordering of pages in the document in an efficient manner. * * @author John Hewson */ public class PDPageTree implements COSObjectable, Iterable<PDPage> { private static final Log LOG = LogFactory.getLog(PDPageTree.class); private final COSDictionary root; private final PDDocument document; // optional /** * Constructor for embedding. */ public PDPageTree() { root = new COSDictionary(); root.setItem(COSName.TYPE, COSName.PAGES); root.setItem(COSName.KIDS, new COSArray()); root.setItem(COSName.COUNT, COSInteger.ZERO); document = null; } /** * Constructor for reading. * * @param root A page tree root. */ public PDPageTree(COSDictionary root) { this(root, null); } /** * Constructor for reading. * * @param root A page tree root. * @param document The document which contains "root". */ PDPageTree(COSDictionary root, PDDocument document) { if (root == null) { throw new IllegalArgumentException("page tree root cannot be null"); } // repair bad PDFs which contain a Page dict instead of a page tree, see PDFBOX-3154 if (COSName.PAGE.equals(root.getCOSName(COSName.TYPE))) { COSArray kids = new COSArray(); kids.add(root); this.root = new COSDictionary(); this.root.setItem(COSName.KIDS, kids); this.root.setInt(COSName.COUNT, 1); } else { this.root = root; } this.document = document; } /** * Returns the given attribute, inheriting from parent tree nodes if necessary. * * @param node page object * @param key the key to look up * @return COS value for the given key */ public static COSBase getInheritableAttribute(COSDictionary node, COSName key) { COSBase value = node.getDictionaryObject(key); if (value != null) { return value; } COSBase base = node.getDictionaryObject(COSName.PARENT, COSName.P); if (base instanceof COSDictionary) { COSDictionary parent = (COSDictionary) base; if (COSName.PAGES.equals(parent.getDictionaryObject(COSName.TYPE))) { return getInheritableAttribute(parent, key); } } return null; } /** * Returns an iterator which walks all pages in the tree, in order. */ @Override public Iterator<PDPage> iterator() { return new PageIterator(root); } /** * Helper to get kids from malformed PDFs. * @param node page tree node * @return list of kids */ private List<COSDictionary> getKids(COSDictionary node) { List<COSDictionary> result = new ArrayList<>(); COSArray kids = node.getCOSArray(COSName.KIDS); if (kids == null) { // probably a malformed PDF return result; } for (int i = 0, size = kids.size(); i < size; i++) { COSBase base = kids.getObject(i); if (base instanceof COSDictionary) { result.add((COSDictionary) base); } else { LOG.warn("COSDictionary expected, but got " + (base == null ? "null" : base.getClass().getSimpleName())); } } return result; } /** * Iterator which walks all pages in the tree, in order. */ private final class PageIterator implements Iterator<PDPage> { private final Queue<COSDictionary> queue = new ArrayDeque<>(); private PageIterator(COSDictionary node) { enqueueKids(node); } private void enqueueKids(COSDictionary node) { if (isPageTreeNode(node)) { List<COSDictionary> kids = getKids(node); for (COSDictionary kid : kids) { enqueueKids(kid); } } else { queue.add(node); } } @Override public boolean hasNext() { return !queue.isEmpty(); } @Override public PDPage next() { if (!hasNext()) { throw new NoSuchElementException(); } COSDictionary next = queue.poll(); sanitizeType(next); ResourceCache resourceCache = document != null ? document.getResourceCache() : null; return new PDPage(next, resourceCache); } @Override public void remove() { throw new UnsupportedOperationException(); } } /** * Returns the page at the given index. * * @param index zero-based index */ public PDPage get(int index) { COSDictionary dict = get(index + 1, root, 0); sanitizeType(dict); ResourceCache resourceCache = document != null ? document.getResourceCache() : null; return new PDPage(dict, resourceCache); } private static void sanitizeType(COSDictionary dictionary) { COSName type = dictionary.getCOSName(COSName.TYPE); if (type == null) { dictionary.setItem(COSName.TYPE, COSName.PAGE); return; } if (!COSName.PAGE.equals(type)) { throw new IllegalStateException("Expected 'Page' but found " + type); } } /** * Returns the given COS page using a depth-first search. * * @param pageNum 1-based page number * @param node page tree node to search * @param encountered number of pages encountered so far * @return COS dictionary of the Page object */ private COSDictionary get(int pageNum, COSDictionary node, int encountered) { if (pageNum < 0) { throw new IndexOutOfBoundsException("Index out of bounds: " + pageNum); } if (isPageTreeNode(node)) { int count = node.getInt(COSName.COUNT, 0); if (pageNum <= encountered + count) { // it's a kid of this node for (COSDictionary kid : getKids(node)) { // which kid? if (isPageTreeNode(kid)) { int kidCount = kid.getInt(COSName.COUNT, 0); if (pageNum <= encountered + kidCount) { // it's this kid return get(pageNum, kid, encountered); } else { encountered += kidCount; } } else { // single page encountered++; if (pageNum == encountered) { // it's this page return get(pageNum, kid, encountered); } } } throw new IllegalStateException("1-based index not found: " + pageNum); } else { throw new IndexOutOfBoundsException("1-based index out of bounds: " + pageNum); } } else { if (encountered == pageNum) { return node; } else { throw new IllegalStateException("1-based index not found: " + pageNum); } } } /** * Returns true if the node is a page tree node (i.e. and intermediate). */ private boolean isPageTreeNode(COSDictionary node) { // some files such as PDFBOX-2250-229205.pdf don't have Pages set as the Type, so we have // to check for the presence of Kids too return node != null && (node.getCOSName(COSName.TYPE) == COSName.PAGES || node.containsKey(COSName.KIDS)); } /** * Returns the index of the given page, or -1 if it does not exist. * * @param page The page to search for. * @return the zero-based index of the given page, or -1 if the page is not found. */ public int indexOf(PDPage page) { SearchContext context = new SearchContext(page); if (findPage(context, root)) { return context.index; } return -1; } private boolean findPage(SearchContext context, COSDictionary node) { for (COSDictionary kid : getKids(node)) { if (context.found) { break; } if (isPageTreeNode(kid)) { findPage(context, kid); } else { context.visitPage(kid); } } return context.found; } private static final class SearchContext { private final COSDictionary searched; private int index = -1; private boolean found; private SearchContext(PDPage page) { this.searched = page.getCOSObject(); } private void visitPage(COSDictionary current) { index++; found = searched == current; } } /** * Returns the number of leaf nodes (page objects) that are descendants of this root within the * page tree. */ public int getCount() { return root.getInt(COSName.COUNT, 0); } @Override public COSDictionary getCOSObject() { return root; } /** * Removes the page with the given index from the page tree. * @param index zero-based page index */ public void remove(int index) { COSDictionary node = get(index + 1, root, 0); remove(node); } /** * Removes the given page from the page tree. * * @param page The page to remove. */ public void remove(PDPage page) { remove(page.getCOSObject()); } /** * Removes the given COS page. */ private void remove(COSDictionary node) { // remove from parent's kids COSDictionary parent = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); COSArray kids = (COSArray) parent.getDictionaryObject(COSName.KIDS); if (kids.removeObject(node)) { // update ancestor counts do { node = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); if (node != null) { node.setInt(COSName.COUNT, node.getInt(COSName.COUNT) - 1); } } while (node != null); } } /** * Adds the given page to this page tree. * * @param page The page to add. */ public void add(PDPage page) { // set parent COSDictionary node = page.getCOSObject(); node.setItem(COSName.PARENT, root); // todo: re-balance tree? (or at least group new pages into tree nodes of e.g. 20) // add to parent's kids COSArray kids = (COSArray) root.getDictionaryObject(COSName.KIDS); kids.add(node); // update ancestor counts do { node = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); if (node != null) { node.setInt(COSName.COUNT, node.getInt(COSName.COUNT) + 1); } } while (node != null); } /** * Insert a page before another page within a page tree. * * @param newPage the page to be inserted. * @param nextPage the page that is to be after the new page. * @throws IllegalArgumentException if one attempts to insert a page that isn't part of a page * tree. */ public void insertBefore(PDPage newPage, PDPage nextPage) { COSDictionary nextPageDict = nextPage.getCOSObject(); COSDictionary parentDict = (COSDictionary) nextPageDict.getDictionaryObject(COSName.PARENT); COSArray kids = (COSArray) parentDict.getDictionaryObject(COSName.KIDS); boolean found = false; for (int i = 0; i < kids.size(); ++i) { COSDictionary pageDict = (COSDictionary) kids.getObject(i); if (pageDict.equals(nextPage.getCOSObject())) { kids.add(i, newPage.getCOSObject()); newPage.getCOSObject().setItem(COSName.PARENT, parentDict); found = true; break; } } if (!found) { throw new IllegalArgumentException("attempted to insert before orphan page"); } increaseParents(parentDict); } /** * Insert a page after another page within a page tree. * * @param newPage the page to be inserted. * @param prevPage the page that is to be before the new page. * @throws IllegalArgumentException if one attempts to insert a page that isn't part of a page * tree. */ public void insertAfter(PDPage newPage, PDPage prevPage) { COSDictionary prevPageDict = prevPage.getCOSObject(); COSDictionary parentDict = (COSDictionary) prevPageDict.getDictionaryObject(COSName.PARENT); COSArray kids = (COSArray) parentDict.getDictionaryObject(COSName.KIDS); boolean found = false; for (int i = 0; i < kids.size(); ++i) { COSDictionary pageDict = (COSDictionary) kids.getObject(i); if (pageDict.equals(prevPage.getCOSObject())) { kids.add(i + 1, newPage.getCOSObject()); newPage.getCOSObject().setItem(COSName.PARENT, parentDict); found = true; break; } } if (!found) { throw new IllegalArgumentException("attempted to insert before orphan page"); } increaseParents(parentDict); } private void increaseParents(COSDictionary parentDict) { do { int cnt = parentDict.getInt(COSName.COUNT); parentDict.setInt(COSName.COUNT, cnt + 1); parentDict = (COSDictionary) parentDict.getDictionaryObject(COSName.PARENT); } while (parentDict != null); } }