import java.io.FileOutputStream; import java.io.PrintStream; import java.util.Iterator; import com.lowagie.text.pdf.PRStream; import com.lowagie.text.pdf.PRTokeniser; import com.lowagie.text.pdf.PdfArray; import com.lowagie.text.pdf.PdfDictionary; import com.lowagie.text.pdf.PdfIndirectReference; import com.lowagie.text.pdf.PdfLister; import com.lowagie.text.pdf.PdfName; import com.lowagie.text.pdf.PdfObject; import com.lowagie.text.pdf.PdfReader; public class MainClass { public static void main(String[] args) throws Exception { PdfReader reader = new PdfReader("2.pdf"); PrintStream list = new PrintStream(new FileOutputStream("2.txt")); PdfLister lister = new PdfLister(new PrintStream(list)); PdfDictionary trailer = reader.getTrailer(); lister.listDict(trailer); PdfIndirectReference info = (PdfIndirectReference) trailer.get(PdfName.INFO); lister.listAnyObject(info); lister.listAnyObject(reader.getPdfObject(info.getNumber())); PdfDictionary root = reader.getCatalog(); lister.listDict(root); PdfDictionary outlines = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root .get(PdfName.OUTLINES)).getNumber()); lister.listDict(outlines); PdfObject first = reader.getPdfObject(((PdfIndirectReference) outlines.get(PdfName.FIRST)) .getNumber()); lister.listAnyObject(first); PdfDictionary pages = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root .get(PdfName.PAGES)).getNumber()); lister.listDict(pages); PdfArray kids = (PdfArray) pages.get(PdfName.KIDS); PdfIndirectReference kid_ref; PdfDictionary kid = null; for (Iterator i = kids.getArrayList().iterator(); i.hasNext();) { kid_ref = (PdfIndirectReference) i.next(); kid = (PdfDictionary) reader.getPdfObject(kid_ref.getNumber()); lister.listDict(kid); } PdfIndirectReference content_ref = (PdfIndirectReference) kid.get(PdfName.CONTENTS); PRStream content = (PRStream) reader.getPdfObject(content_ref.getNumber()); lister.listDict(content); byte[] contentstream = PdfReader.getStreamBytes(content); list.println(new String(contentstream)); PRTokeniser tokenizer = new PRTokeniser(contentstream); while (tokenizer.nextToken()) { if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) { list.println(tokenizer.getStringValue()); } } } }