inspects a PDF file : Introduction « PDF « Java Tutorial






import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Iterator;

import com.lowagie.text.pdf.PRStream;
import com.lowagie.text.pdf.PRTokeniser;
import com.lowagie.text.pdf.PdfArray;
import com.lowagie.text.pdf.PdfDictionary;
import com.lowagie.text.pdf.PdfIndirectReference;
import com.lowagie.text.pdf.PdfLister;
import com.lowagie.text.pdf.PdfName;
import com.lowagie.text.pdf.PdfObject;
import com.lowagie.text.pdf.PdfReader;

public class MainClass {
  public static void main(String[] args) throws Exception {
    PdfReader reader = new PdfReader("2.pdf");
    PrintStream list = new PrintStream(new FileOutputStream("2.txt"));
    PdfLister lister = new PdfLister(new PrintStream(list));
    PdfDictionary trailer = reader.getTrailer();
    lister.listDict(trailer);
    PdfIndirectReference info = (PdfIndirectReference) trailer.get(PdfName.INFO);
    lister.listAnyObject(info);
    lister.listAnyObject(reader.getPdfObject(info.getNumber()));
    PdfDictionary root = reader.getCatalog();
    lister.listDict(root);
    PdfDictionary outlines = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root
        .get(PdfName.OUTLINES)).getNumber());
    lister.listDict(outlines);
    PdfObject first = reader.getPdfObject(((PdfIndirectReference) outlines.get(PdfName.FIRST))
        .getNumber());
    lister.listAnyObject(first);
    PdfDictionary pages = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root
        .get(PdfName.PAGES)).getNumber());
    lister.listDict(pages);
    PdfArray kids = (PdfArray) pages.get(PdfName.KIDS);
    PdfIndirectReference kid_ref;
    PdfDictionary kid = null;
    for (Iterator i = kids.getArrayList().iterator(); i.hasNext();) {
      kid_ref = (PdfIndirectReference) i.next();
      kid = (PdfDictionary) reader.getPdfObject(kid_ref.getNumber());
      lister.listDict(kid);
    }
    PdfIndirectReference content_ref = (PdfIndirectReference) kid.get(PdfName.CONTENTS);
    PRStream content = (PRStream) reader.getPdfObject(content_ref.getNumber());
    lister.listDict(content);
    byte[] contentstream = PdfReader.getStreamBytes(content);
    list.println(new String(contentstream));
    PRTokeniser tokenizer = new PRTokeniser(contentstream);
    while (tokenizer.nextToken()) {
      if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) {
        list.println(tokenizer.getStringValue());
      }
    }
  }
}








29.1.Introduction
29.1.1.Create your first Pdf document with Java
29.1.2.Create Empty page
29.1.3.Update a Pdf document
29.1.4.Read Pdf document to string
29.1.5.PRTokeniser
29.1.6.PdfDictionary
29.1.7.inspects a PDF file
29.1.8.Tagged PDF