List of usage examples for com.itextpdf.text.pdf PdfReader getNumberOfPages
public int getNumberOfPages()
From source file:org.archive.modules.extractor.ExtractorPDFContent.java
License:Apache License
protected boolean innerExtract(CrawlURI curi) { PdfReader documentReader; ArrayList<String> uris = new ArrayList<String>(); try {//from w w w.j av a 2 s. co m documentReader = new PdfReader(curi.getRecorder().getContentReplayInputStream()); for (int i = 1; i <= documentReader.getNumberOfPages(); i++) { //Page numbers start at 1 String pageParseText = extractPageText(documentReader, i); Matcher matcher = URLPattern.matcher(pageParseText); while (matcher.find()) { String prospectiveURL = pageParseText.substring(matcher.start(), matcher.end()).trim(); //handle URLs wrapped in parentheses if (prospectiveURL.startsWith("(")) { prospectiveURL = prospectiveURL.substring(1, prospectiveURL.length()); if (prospectiveURL.endsWith(")")) prospectiveURL = prospectiveURL.substring(0, prospectiveURL.length() - 1); } uris.add(prospectiveURL); //parsetext URLs tend to end in a '.' if they are in a sentence, queue without trailing '.' if (prospectiveURL.endsWith(".") && prospectiveURL.length() > 2) uris.add(prospectiveURL.substring(0, prospectiveURL.length() - 1)); //Full regex allows newlines which seem to be common, also add match without newline in case we are wrong if (matcher.group(19) != null) { String alternateURL = matcher.group(1) + "://" + (matcher.group(2) != null ? matcher.group(2) : "") + matcher.group(6) + matcher.group(13); //Again, handle URLs wrapped in parentheses if (prospectiveURL.startsWith("(") && alternateURL.endsWith(")")) alternateURL = alternateURL.substring(0, alternateURL.length() - 1); uris.add(alternateURL); } } } } catch (IOException e) { curi.getNonFatalFailures().add(e); return false; } catch (RuntimeException e) { curi.getNonFatalFailures().add(e); return false; } if (uris.size() < 1) { return true; } for (String uri : uris) { try { LinkContext lc = LinkContext.NAVLINK_MISC; Hop hop = Hop.NAVLINK; CrawlURI out = curi.createCrawlURI(uri, lc, hop); curi.getOutLinks().add(out); } catch (URIException e1) { logUriError(e1, curi.getUURI(), uri); } } numberOfLinksExtracted.addAndGet(uris.size()); LOGGER.fine(curi + " has " + uris.size() + " links."); // Set flag to indicate that link extraction is completed. return true; }
From source file:org.ednovo.gooru.domain.service.resource.ResourceServiceImpl.java
License:Open Source License
private static List<String> splitToChaptersAndSaveFiles(String newLocalUrl) { try {/*w w w. ja va 2s. c o m*/ HashMap<Integer, String> chapters = new HashMap<Integer, String>(); ArrayList<Integer> pages = new ArrayList<Integer>(); /** Call the split method with filename and page size as params **/ PdfReader reader = new PdfReader(newLocalUrl); reader.consolidateNamedDestinations(); List<HashMap<String, Object>> list = SimpleBookmark.getBookmark(reader); for (HashMap<String, Object> test : list) { String page = test.get(PAGE).toString(); Integer num = Integer.parseInt(page.substring(0, page.indexOf(' '))); chapters.put(num, (String) test.get(_TITLE)); pages.add(num); } int index = 1; List<String> chaptersUrls = new ArrayList<String>(); for (Integer i : pages) { String chapterUrl = null; if (pages.size() != index) { chapterUrl = splitAndSaveChapter(newLocalUrl, i, pages.get(index), chapters.get(i)); } else { chapterUrl = splitAndSaveChapter(newLocalUrl, i, reader.getNumberOfPages(), chapters.get(i)); } index++; if (chapterUrl != null) { chaptersUrls.add(chapterUrl); } } return chaptersUrls; } catch (Exception ex) { return new ArrayList<String>(); } }
From source file:org.gmdev.pdftrick.engine.MergeFiles.java
License:Open Source License
/** * Materially multiple pdf files are written merged file on a disk * @param list// w ww .j a v a2 s.c o m * @param outputStream * @throws DocumentException * @throws IOException */ private void doMerge(List<StreamPwdContainer> list, OutputStream outputStream) throws DocumentException, IOException { HashMap<Integer, String> rotationFromPages = factory.getRotationFromPages(); Document document = new Document(); PdfWriter writer = PdfWriter.getInstance(document, outputStream); document.open(); PdfContentByte cb = writer.getDirectContent(); int z = 0; for (StreamPwdContainer boom : list) { InputStream in = boom.getIn(); PdfReader reader = null; if (!boom.getPwd().equalsIgnoreCase("")) { reader = new PdfReader(in, boom.getPwd().getBytes()); } else { reader = new PdfReader(in); } for (int i = 1; i <= reader.getNumberOfPages(); i++) { z++; int rotation = reader.getPageRotation(i); //set size Rectangle pageSize_ = reader.getPageSize(i); Rectangle pageSize = null; if (rotation == 270 || rotation == 90) { pageSize = new Rectangle(pageSize_.getHeight(), pageSize_.getWidth()); } else { pageSize = pageSize_; } document.setPageSize(pageSize); writer.setCropBoxSize(pageSize); document.newPage(); // import the page from source pdf PdfImportedPage page = writer.getImportedPage(reader, i); // add the page to the destination pdf if (rotation == 270) { cb.addTemplate(page, 0, 1.0f, -1.0f, 0, reader.getPageSizeWithRotation(i).getWidth(), 0); rotationFromPages.put(z, "" + rotation); } else if (rotation == 180) { cb.addTemplate(page, -1f, 0, 0, -1f, 0, 0); rotationFromPages.put(z, "" + rotation); } else if (rotation == 90) { cb.addTemplate(page, 0, -1f, 1f, 0, 0, reader.getPageSizeWithRotation(i).getHeight()); rotationFromPages.put(z, "" + rotation); } else { cb.addTemplate(page, 1f, 0, 0, 1f, 0, 0); } } in.close(); } outputStream.flush(); document.close(); outputStream.close(); }
From source file:org.gmdev.pdftrick.render.PdfRenderLeft.java
License:Open Source License
/** * Render pdf resultfile thumbs in a left_panel, using threads pool *///from www .ja v a 2 s .c o m public void pdfRender() { String imgPath = PdfTrickUtils.createImgFolder(); int totPages = 0; // get the page number of the new generated pdf try { PdfReader reader = new PdfReader(factory.getResultFile()); totPages = reader.getNumberOfPages(); factory.setNumPages(totPages); reader.close(); } catch (Exception e) { logger.error("Exception", e); PdfTrickMessages.append("ERROR", Consts.SENDLOG_MSG); } // system of thread that call native function and renderizing pdf cover in png images boolean runPool = true; int division = totPages / 3; if (totPages < 3) { runPool = false; division = totPages; } DivisionThumb divisionThumbs = new DivisionThumb(division, imgPath); factory.gettContainer().setDivisionThumbs(divisionThumbs); Thread divisionThumbsThread = new Thread(divisionThumbs, "divisionThumbsThread"); factory.gettContainer().setDivisionThumbsThread(divisionThumbsThread); divisionThumbsThread.start(); if (runPool) { ExecPool execPool = new ExecPool(totPages, division, imgPath); factory.gettContainer().setExecPool(execPool); Thread execPoolThread = new Thread(execPool, "execPoolThread"); factory.gettContainer().setExecPoolThread(execPoolThread); execPoolThread.start(); } // thread that search and showing thumbnails ShowThumbs showThumbs = new ShowThumbs(); factory.gettContainer().setShowThumbs(showThumbs); Thread showThumbsThread = new Thread(showThumbs, "showThumbsThread"); factory.gettContainer().setShowThumbsThread(showThumbsThread); showThumbsThread.start(); }
From source file:org.h819.commons.file.MyPDFUtils.java
/** * ??//from w ww .ja va 2 s. co m * * @param srcPdf ? * @param destPdf * @param waterMarkText ? * @param waterMarkImage ? */ public static void addWaterMarkFile(File srcPdf, File destPdf, String waterMarkText, File waterMarkImage) throws IOException, DocumentException { if (waterMarkText == null && waterMarkImage == null) throw new FileNotFoundException(waterMarkText + " " + waterMarkImage + " all null."); if (srcPdf == null || !srcPdf.exists() || !srcPdf.isFile()) throw new FileNotFoundException("pdf file : '" + srcPdf + "' does not exsit."); if (!FilenameUtils.getExtension(srcPdf.getAbsolutePath()).toLowerCase().equals("pdf")) return; if (waterMarkImage != null) { if (!waterMarkImage.exists() || !waterMarkImage.isFile()) throw new FileNotFoundException("img file : '" + srcPdf + "' does not exsit."); if (!FilenameUtils.getExtension(waterMarkImage.getAbsolutePath()).toLowerCase().equals("png")) throw new FileNotFoundException("image file '" + srcPdf + "' not png.(???? pdf )"); } PdfReader reader = getPdfReader(srcPdf); int n = reader.getNumberOfPages(); PdfStamper stamper = getPdfStamper(srcPdf, destPdf); // // HashMap<String, String> moreInfo = new HashMap<String, String>(); // moreInfo.put("Author", "H819 create"); // moreInfo.put("Producer", "H819 Producer"); // Key = CreationDate, Value = D:20070425182920 // Key = Producer, Value = TH-OCR 2000 (C++/Win32) // Key = Author, Value = TH-OCR 2000 // Key = Creator, Value = TH-OCR PDF Writer // stamp.setMoreInfo(moreInfo); // text Phrase text = null; if (waterMarkText != null) { // Font bfont = getPdfFont(); bfont.setSize(35); bfont.setColor(new BaseColor(192, 192, 192)); text = new Phrase(waterMarkText, bfont); } // image watermark Image img = null; float w = 0; float h = 0; if (waterMarkImage != null) { img = Image.getInstance(waterMarkImage.getAbsolutePath()); w = img.getScaledWidth(); h = img.getScaledHeight(); // img. img.setRotationDegrees(45); } // transparency PdfGState gs1 = new PdfGState(); gs1.setFillOpacity(0.5f); // properties PdfContentByte over; Rectangle pageSize; float x, y; // loop over every page for (int i = 1; i <= n; i++) { pageSize = reader.getPageSizeWithRotation(i); x = (pageSize.getLeft() + pageSize.getRight()) / 2; y = (pageSize.getTop() + pageSize.getBottom()) / 2; // pdf pdf ??? over = stamper.getOverContent(i); // ? // over = stamp.getUnderContent(i); // ?? over.beginText(); over.endText(); ? // ,?,:???? over.saveState(); //?? over.setGState(gs1); if (waterMarkText != null && waterMarkImage != null) { // if (i % 2 == 1) { ColumnText.showTextAligned(over, Element.ALIGN_CENTER, text, x, y, 45); } else over.addImage(img, w, 0, 0, h, x - (w / 2), y - (h / 2)); } else if (waterMarkText != null) { //? ColumnText.showTextAligned(over, Element.ALIGN_CENTER, text, x, y, 45); //?? ,?, :????? // ... } else { //? over.addImage(img, w, 0, 0, h, x - (w / 2), y - (h / 2)); } over.restoreState();//??? } stamper.close(); reader.close(); }
From source file:org.h819.commons.file.MyPDFUtils.java
/** * ? pdf ??//from w w w . j a va 2 s . com * * @param srcPdfFile the original PDF * @param descPdfFile the resulting PDF * @throws java.io.IOException * @throws DocumentException */ public static void compressPdf(File srcPdfFile, File descPdfFile) throws IOException, DocumentException { if (srcPdfFile == null || !srcPdfFile.exists()) throw new IOException("src pdf file '" + srcPdfFile + "' does not exsit."); PdfReader reader = getPdfReader(srcPdfFile); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(descPdfFile.getAbsoluteFile()), PdfWriter.VERSION_1_7); stamper.getWriter().setCompressionLevel(9); int total = reader.getNumberOfPages() + 1; for (int i = 1; i < total; i++) { reader.setPageContent(i, reader.getPageContent(i)); } stamper.setFullCompression(); stamper.close(); reader.close(); }
From source file:org.h819.commons.file.MyPDFUtils.java
/** * ?pdf/*w w w.j av a2s . co m*/ * * @param srcPdfFile the original PDF * @param descPdfFile the resulting PDF * @throws java.io.IOException * @throws DocumentException */ public static void decompressPdf(File srcPdfFile, File descPdfFile) throws IOException, DocumentException { if (srcPdfFile == null || !srcPdfFile.exists()) throw new IOException("src pdf file '" + srcPdfFile + "' does not exsit."); PdfReader reader = getPdfReader(srcPdfFile); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(descPdfFile.getAbsoluteFile())); stamper.getWriter().setCompressionLevel(PdfStream.NO_COMPRESSION); int total = reader.getNumberOfPages() + 1; for (int i = 1; i < total; i++) { reader.setPageContent(i, reader.getPageContent(i)); } stamper.close(); reader.close(); }
From source file:org.h819.commons.file.MyPDFUtils.java
/** * ?// ww w .java2 s. c o m * <p> * ? iText in Action 2nd EditionChapter 6: Working with existing PDFs * Concatenate * </p> * * @param files ? * @param result ?? * @throws DocumentException * @throws java.io.IOException */ public static void merge(File[] files, File result) throws DocumentException, IOException { // step 1 Document document = new Document(); // step 2 /** * PdfCopy PdfSmartCopy * * PdfCopy??? * * PdfSmartCopy?????? */ // // PdfCopy copy = new PdfCopy(document, new FileOutputStream(result)); PdfSmartCopy copy = new PdfSmartCopy(document, new FileOutputStream(result)); // step 3 document.open(); // step 4 PdfReader reader; int n; // loop over the documents you want to concatenate for (int i = 0; i < files.length; i++) { reader = getPdfReader(files[i]); // loop over the pages in that document n = reader.getNumberOfPages(); for (int page = 0; page < n;) { copy.addPage(copy.getImportedPage(reader, ++page)); } copy.freeReader(reader); } // step 5 document.close(); }
From source file:org.h819.commons.file.MyPDFUtils.java
/** * pdf??? getPageCount()/*from w w w .j ava 2 s .c o m*/ * * @param srcPDFFileDir * @return * @throws IOException */ private static void countNumberOfPagesOfDir(File srcPDFFileDir) throws IOException { if (srcPDFFileDir == null || !srcPDFFileDir.isDirectory()) throw new FileNotFoundException(srcPDFFileDir + "'is null or dose not exist."); Collection<File> listPDFs = FileUtils.listFiles(srcPDFFileDir, null, true); for (File f : listPDFs) { // if (f.isDirectory()) countNumberOfPagesOfDir(f); if (!FilenameUtils.getExtension(f.getName().toUpperCase()).equals("PDF")) continue; // System.out.println(f.getPath()); // we create a reader for a certain document PdfReader reader = getPdfReader(f); // we retrieve the total number of pages // if (reader.isEncrypted()) continue; numberOfPagesOfDirectory = numberOfPagesOfDirectory + reader.getNumberOfPages(); } }
From source file:org.jfree.chart.swt.ChartPdf.java
License:Open Source License
public static void saveChartAsPDF(File file, JFreeChart chart, int width, int height) throws DocumentException, FileNotFoundException, IOException { if (chart != null) { boolean success = false; String old = null;/* ww w . j a va 2 s . c o m*/ File oldFile = null; boolean append = file.exists(); if (append) { old = file.getAbsolutePath() + ".old"; //$NON-NLS-1$ oldFile = new File(old); oldFile.delete(); file.renameTo(oldFile); } try (BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(file))) { // convert chart to PDF with iText: Rectangle pagesize = new Rectangle(width, height); if (append) { PdfReader reader = new PdfReader(old); PdfStamper stamper = new PdfStamper(reader, out); try { int n = reader.getNumberOfPages() + 1; stamper.insertPage(n, pagesize); PdfContentByte overContent = stamper.getOverContent(n); writeChart(chart, width, height, overContent); ColumnText ct = new ColumnText(overContent); ct.setSimpleColumn(width - 50, 50, width - 12, height, 150, Element.ALIGN_RIGHT); Paragraph paragraph = new Paragraph(String.valueOf(n), new Font(FontFamily.HELVETICA, 9, Font.NORMAL, BaseColor.DARK_GRAY)); paragraph.setAlignment(Element.ALIGN_RIGHT); ct.addElement(paragraph); ct.go(); success = true; } finally { stamper.close(); reader.close(); oldFile.delete(); } } else { Document document = new Document(pagesize, 50, 50, 50, 50); document.addCreationDate(); document.addCreator(Constants.APPLICATION_NAME); document.addAuthor(System.getProperty("user.name")); //$NON-NLS-1$ try { PdfWriter writer = PdfWriter.getInstance(document, out); document.open(); writeChart(chart, width, height, writer.getDirectContent()); success = true; } finally { document.close(); } } } if (!success) { file.delete(); if (oldFile != null) oldFile.renameTo(file); } } }