gov.lanl.adore.djatoka.plugin.ExtractPDF.java Source code

Introduction

Here is the source code for gov.lanl.adore.djatoka.plugin.ExtractPDF.java
Source

/*
 * Copyright (c) 2010 Brasiliana Digital Library (http://brasiliana.usp.br).
 * Based on similar source code from Adore Djatoka.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 * 
 */

package gov.lanl.adore.djatoka.plugin;

import gov.lanl.adore.djatoka.DjatokaDecodeParam;
import gov.lanl.adore.djatoka.DjatokaException;
import gov.lanl.adore.djatoka.IExtract;
import gov.lanl.adore.djatoka.openurl.OpenURLJP2KService;
import gov.lanl.adore.djatoka.util.IOUtils;
import gov.lanl.adore.djatoka.util.ImageProcessingUtils;
import gov.lanl.adore.djatoka.util.ImageRecord;

import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.awt.Dimension;
import java.awt.Rectangle;

import javax.imageio.ImageIO;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.HashMap;
import java.util.Properties;

import java.util.Enumeration;
import java.util.StringTokenizer;
import java.util.logging.Level;

import org.apache.log4j.Logger;

import gov.lanl.util.ConfigurationManager;

/**
 * Uses Poppler PDF commands to extract PDF pages to PNG files.
 * @author Fabio N. Kepler
 */
public class ExtractPDF implements IExtract {

    private static Logger logger = Logger.getLogger(ExtractPDF.class);

    // maximum size of either preview image dimension
    private static final int MAX_PX = 1000;

    // maxium DPI
    private static final int MAX_DPI = 180;

    private static int DEFAULT_DENSITY = 72;
    //private static String DEFAULT_COLORSPACE = "RGB";
    private static int DEFAULT_LEVELS = 4;

    // command to get image from PDF; @FILE@, @OUTPUT@ etc are placeholders
    private static final String PDFTOPPM_COMMAND[] = { "@COMMAND@", "-q", "-png", "-f", "@FIRSTPAGE@", "-l",
            "@LASTPAGE@", "-r", "@DPI@", "@FILE@", "@OUTPUTFILE@" };
    private static final int PDFTOPPM_COMMAND_POSITION_BIN = 0;
    private static final int PDFTOPPM_COMMAND_POSITION_FIRSTPAGE = 4;
    private static final int PDFTOPPM_COMMAND_POSITION_LASTPAGE = 6;
    private static final int PDFTOPPM_COMMAND_POSITION_DPI = 8;
    private static final int PDFTOPPM_COMMAND_POSITION_OPTIONAL_EXTRAS = 9; // Must insert at this position instead of just setting it.
    private static final int PDFTOPPM_COMMAND_POSITION_FILE = 9;
    private static final int PDFTOPPM_COMMAND_POSITION_OUTPUTFILE = 10;

    // command to get image from PDF; @FILE@, @OUTPUT@ etc are placeholders
    private static final String PDFINFO_COMMAND[] = { "@COMMAND@", "-f", "@FIRSTPAGE@", "-l", "@LASTPAGE@", "-box",
            "@FILE@" };
    private static final int PDFINFO_COMMAND_POSITION_BIN = 0;
    private static final int PDFINFO_COMMAND_POSITION_FIRSTPAGE = 2;
    private static final int PDFINFO_COMMAND_POSITION_LASTPAGE = 4;
    private static final int PDFINFO_COMMAND_POSITION_FILE = 6;

    // executable path for "pdftoppm", comes from DSpace config at runtime.
    private static String pdftoppmPath = null;

    // executable path for "pdfinfo", comes from DSpace config at runtime.
    private static String pdfinfoPath = null;

    // match line in pdfinfo output that describes file's MediaBox
    private static final Pattern MEDIABOX_PATT = Pattern.compile(
            //"^Page\\s+(\\d+)\\s+size:\\s+([\\.\\d]+)\\s+x\\s+([\\.\\d]+)\\s+pts\\.*$"); // Does not seem to match "Page   41 size: 595 x 842 pts (A4)".
            "^Page\\s+(\\d+)\\s+MediaBox:\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)"); // For use with -box switch
    /* Without the -box switch:
    Page    1 size: 444.72 x 771.12 pts
    Page    2 size: 416.16 x 743.52 pts
    */
    /* With -box switch:
    Page    1 size: 444.72 x 771.12 pts
    Page    2 size: 416.16 x 743.52 pts
    Page    1 MediaBox:     0.00     0.00   444.72   771.12
    Page    1 CropBox:      0.00     0.00   444.72   771.12
    Page    1 BleedBox:     0.00     0.00   444.72   771.12
    Page    1 TrimBox:      0.00     0.00   444.72   771.12
    Page    1 ArtBox:       0.00     0.00   444.72   771.12
    Page    2 MediaBox:     0.00     0.00   416.16   743.52
    Page    2 CropBox:      0.00     0.00   416.16   743.52
    Page    2 BleedBox:     0.00     0.00   416.16   743.52
    Page    2 TrimBox:      0.00     0.00   416.16   743.52
    Page    2 ArtBox:       0.00     0.00   416.16   743.52
    */

    // match line in pdfinfo output that describes file's MediaBox
    private static final Pattern PAGES_PATT = Pattern.compile("^Pages:\\s+([\\d-]+)");

    private static Properties props = new Properties();

    private static final String DEFAULT_PDFTOPPM_PATH = "/usr/bin/pdftoppm";
    private static final String DEFAULT_PDFINFO_PATH = "/usr/bin/pdfinfo";

    private static final String PROPS_PDF_PDFTOPPM_PATH = "PDF.pdftoppmPath";
    private static final String PROPS_PDF_PDFINFO_PATH = "PDF.pdfinfoPath";

    /**
     * Returns PDF props in ImageRecord
     * @param r ImageRecord containing absolute file path of PDF file.
     * @return a populated ImageRecord object
     * @throws DjatokaException
     */
    @Override
    public final ImageRecord getMetadata(ImageRecord r) throws DjatokaException {
        if ((r.getImageFile() == null || !new File(r.getImageFile()).exists()) && r.getObject() == null)
            throw new DjatokaException("Image Does Not Exist: " + r.toString());
        logger.debug("Get metadata: " + r.toString());
        try {
            DjatokaDecodeParam params = new DjatokaDecodeParam();
            BufferedImage bi = process(r, params);

            r.setWidth(bi.getWidth());
            r.setHeight(bi.getHeight());
            r.setDWTLevels(DEFAULT_LEVELS);
            r.setLevels(DEFAULT_LEVELS);
            r.setBitDepth(bi.getColorModel().getPixelSize());
            r.setNumChannels(bi.getColorModel().getNumColorComponents());

            //r.setCompositingLayerCount(getNumberOfPages(r)); // Semantics: number of pages in the PDF file.
            HashMap<String, String> pdfProps = (HashMap<String, String>) getPDFProperties(r);
            int n = Integer.parseInt(pdfProps.remove("Pages"));
            r.setCompositingLayerCount(n);

            // Since it is not possible for the viewer to query about a specific page's width and height
            // (because in Djatoka's point of view a PDF is just one image with various compositing layers, which are the pages),
            // at this point right here we query the PDF file about the size of all pages and store this
            // information in a Map. This map can be returned by getMetadata by setting it as the instProps member of the
            // ImageRecord class, which Djatoka already implements and which is returned as JSON to the viewer JS.
            // The viewer then has to store this information and later query it instead of asking Djatoka (getMetadata) again.
            //Map<String, String> instProps = getPagesSizes(r);
            r.setInstProps(pdfProps);
            logger.debug("instProps: " + r.getInstProps());

            logger.debug("Get metadata: " + r.toString());
        } catch (Exception e) {
            throw new DjatokaException(e);
        }

        return r;
    }

    //*
    public final ImageRecord getMetadata(BufferedImage bi) throws DjatokaException {
        if (bi == null)
            throw new DjatokaException("Image Does Not Exist");

        logger.debug("getMetadata(BufferedImage): " + bi.getWidth());
        try {
            ImageRecord r = new ImageRecord();

            r.setWidth(bi.getWidth());
            r.setHeight(bi.getHeight());

            r.setDWTLevels(DEFAULT_LEVELS);
            r.setLevels(DEFAULT_LEVELS);

            r.setBitDepth(bi.getColorModel().getPixelSize());
            r.setNumChannels(bi.getColorModel().getNumColorComponents());
            //r.setCompositingLayerCount(getNumberOfPages(r)); // 'bi' refers to just one page extracted from the PDF file.
            //logger.debug("r2: "+r.toString());

            //TODO

            return r;
        } catch (Exception e) {
            throw new DjatokaException(e);
        }
    }
    //*/

    @Override
    // TODO
    // FIXME
    public final String[] getXMLBox(ImageRecord r) throws DjatokaException {
        String[] xml = null;
        try {
            if (r.getImageFile() == null && r.getObject() != null && r.getObject() instanceof InputStream) {
                //            xml = new JP2ImageInfo((InputStream) r.getObject()).getXmlDocs();
            } else {
                //            xml = new JP2ImageInfo(new File(r.getImageFile())).getXmlDocs();
            }
        } catch (Exception e) {
            logger.error(e, e);
        }
        return xml;
    }

    /**
     * Extracts region defined in DjatokaDecodeParam as BufferedImage
     * @param input absolute file path of PDF file.
     * @param params DjatokaDecodeParam instance containing region and transform settings.
     * @return extracted region as a BufferedImage
     * @throws DjatokaException
     */
    @Override
    public BufferedImage process(String input, DjatokaDecodeParam params) throws DjatokaException {

        logger.debug("ExtractPDF.process:\n\tinput: " + input + "\n\tparams: " + params);

        if (input == null)
            throw new DjatokaException("Unknown failure while converting file: no image produced.");

        try {
            setPDFCommandsPath();
        } catch (IllegalStateException e) {
            logger.error("Failed to set PDF commands path: ", e);
            throw e;
        }

        int page_number = 1 + params.getCompositingLayer(); // From 0-based to 1-based.
        int status = 0;
        BufferedImage processedImage = null;
        try {
            /*
            // First get max physical dim of bounding box of the page
            // to compute the DPI to ask for..  otherwise some AutoCAD
            // drawings can produce enormous files even at 75dpi, for
            // 48" drawings..
            int dpi = 0;
            Dimension pageSize = getPDFPageSize(input, page_number);
            if (pageSize == null)
            {
            logger.error("Sanity check: Did not find \"Page " + page_number + " size\" line in output of pdfinfo, file="+input);
            throw new IllegalArgumentException("Failed to get \"Page " + page_number + " size\" of PDF with pdfinfo.");
            }
            else
            {
            double w = pageSize.getWidth();
            double h = pageSize.getHeight();
            int maxdim = (int)Math.max(Math.abs(w), Math.abs(h));
            dpi = Math.min(MAX_DPI, (MAX_PX * 72 / maxdim));
            logger.debug("DPI: pdfinfo method got dpi="+dpi+" for max dim="+maxdim+" (points, 1/72\")");
            } */

            // Scale
            int dpi = getScaledDPI(params);

            // Requires Sun JAI imageio additions to read ppm directly.
            // this will get "-[0]+1.ppm" appended to it by pdftoppm
            File outPrefixF = File.createTempFile("pdftopng", "out");
            String outPrefix = outPrefixF.toString();
            outPrefixF.delete();

            //String pdfCmd[] = PDFTOPPM_COMMAND.clone();
            ArrayList<String> pdfCmd = new ArrayList<String>(Arrays.asList(PDFTOPPM_COMMAND));
            pdfCmd.set(PDFTOPPM_COMMAND_POSITION_BIN, pdftoppmPath);
            pdfCmd.set(PDFTOPPM_COMMAND_POSITION_FIRSTPAGE, "" + page_number);
            pdfCmd.set(PDFTOPPM_COMMAND_POSITION_LASTPAGE, "" + page_number);
            pdfCmd.set(PDFTOPPM_COMMAND_POSITION_DPI, String.valueOf(dpi));
            pdfCmd.set(PDFTOPPM_COMMAND_POSITION_FILE, input.toString());
            pdfCmd.set(PDFTOPPM_COMMAND_POSITION_OUTPUTFILE, outPrefix);

            // Crop
            Rectangle crop = getCropParam(params);
            if (crop != null) {
                String[] cropParams = { "-x", "" + (int) crop.getX(), "-y", "" + (int) crop.getY(), "-W",
                        "" + (int) crop.getWidth(), "-H", "" + (int) crop.getHeight() };
                pdfCmd.addAll(PDFTOPPM_COMMAND_POSITION_OPTIONAL_EXTRAS, Arrays.asList(cropParams));
            }

            String[] pdfCmdA = pdfCmd.toArray(new String[pdfCmd.size()]);
            logger.debug("Running pdftoppm command: " + Arrays.deepToString(pdfCmdA));
            //logger.debug("Running pdftoppm command: " + pdfCmd.toString());

            File outf = null;
            Process pdfProc = null;
            try {
                pdfProc = Runtime.getRuntime().exec(pdfCmdA);
                status = pdfProc.waitFor();
                logger.debug("status: " + status);

                // pdftoppm uses variable numbers of padding 0s to the output prefix.
                // E.g., may be prefix-000001.png, prefix-001.png or even prefix-01.png.
                // Version 0.12.3 (Poppler, not XPDF) seems to consider the total number of pages.
                // So, for example, in a PDF with 90 pages, the output will be "prefix-02.png";
                // for a PDF with 350 pages, the output will be "prefix-002.png".
                // FIXME: try some approach where the PDF number of pages is considered without
                // running pdfinfo command again, thus making it simpler to determine the number
                // of padding zeros. Right now we going "brute force" because we do not know if
                // it is feasable to once again run the pdfinfo command.
                String tests[] = { outPrefix + "-" + page_number + ".png", outPrefix + "-0" + page_number + ".png",
                        outPrefix + "-00" + page_number + ".png", outPrefix + "-000" + page_number + ".png",
                        outPrefix + "-0000" + page_number + ".png", outPrefix + "-00000" + page_number + ".png" };
                for (String outname : tests) {
                    if ((new File(outname)).exists()) {
                        outf = new File(outname);
                        break;
                    }
                }
                logger.debug("PDFTOPPM output is: " + outf + ", exists=" + outf != null ? outf.exists() : "!");
                processedImage = ImageIO.read(outf);

                // Rotate
                if (params.getRotationDegree() > 0) {
                    processedImage = ImageProcessingUtils.rotate(processedImage, params.getRotationDegree());
                }
            } catch (InterruptedException e) {
                logger.error("Failed converting PDF file to image: ", e);
                throw new IllegalArgumentException("Failed converting PDF file to image: ", e);
            } finally {
                if (outf != null)
                    outf.delete();
                // Our exec() should not produce any output, but we want to stay safe.
                // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html
                org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream());
                org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream());
                org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream());
            }
        } catch (Exception e) {
            logger.error("Failed converting PDF file to image: ", e);
            throw new IllegalArgumentException("Failed converting PDF file to image: ", e);
        } finally {
            if (status != 0)
                logger.error("PDF conversion proc failed, exit status=" + status + ", file=" + input);
        }

        return processedImage;
    }

    public BufferedImage processUsingTemp(InputStream input, DjatokaDecodeParam params) throws DjatokaException {
        File in;
        // Copy to tmp file
        try {
            String cacheDir = OpenURLJP2KService.getCacheDir();
            if (cacheDir != null) {
                in = File.createTempFile("tmp", ".pdf", new File(cacheDir));
            } else {
                in = File.createTempFile("tmp", ".pdf");
            }
            FileOutputStream fos = new FileOutputStream(in);
            in.deleteOnExit();
            IOUtils.copyStream(input, fos);
        } catch (IOException e) {
            logger.error(e, e);
            throw new DjatokaException(e);
        }

        BufferedImage bi = process(in.getAbsolutePath(), params);

        if (in != null) {
            in.delete();
        }

        return bi;
    }

    /**
     * Extracts region defined in DjatokaDecodeParam as BufferedImage
     * @param input InputStream containing a PDF bitstream.
     * @param params DjatokaDecodeParam instance containing region and transform settings.
     * @return extracted region as a BufferedImage
     * @throws DjatokaException
     */
    @Override
    public BufferedImage process(InputStream input, DjatokaDecodeParam params) throws DjatokaException {
        return processUsingTemp(input, params);
    }

    /**
     * Extracts region defined in DjatokaDecodeParam as BufferedImage
     * @param input ImageRecord wrapper containing file reference, inputstream, etc.
     * @param params DjatokaDecodeParam instance containing region and transform settings.
     * @return extracted region as a BufferedImage
     * @throws DjatokaException
     */
    @Override
    public BufferedImage process(ImageRecord input, DjatokaDecodeParam params) throws DjatokaException {
        logger.debug("in imagerecord;");
        if (input.getImageFile() != null)
            return process(input.getImageFile(), params);
        else if (input.getObject() != null && (input.getObject() instanceof InputStream))
            return process((InputStream) input.getObject(), params);
        else
            throw new DjatokaException("File not defined and Input Object Type "
                    + input.getObject().getClass().getName() + " is not supported");
    }

    /**
     * Get PDF information with pdfinfo:
     * - "Pages: X": number of pages;
     * - "Page X size: www.ww hhh.hh": size of each page, in pts.
     * @returns a map:
     * - [Pages][n]
     * - [Page 1][111.11 222.22]
     * - [Page i][www.ww hhh.hh]
     * - [Page n][999.99 1000.00]
     */
    private static Map<String, String> getPDFProperties(ImageRecord input) throws DjatokaException {
        logger.debug("Getting PDF info");

        try {
            setPDFCommandsPath();
        } catch (IllegalStateException e) {
            logger.error("Failed to set PDF commands path: ", e);
            throw e;
        }

        HashMap<String, String> pdfProperties = new HashMap<String, String>();

        String sourcePath = null;

        if (input.getImageFile() != null) {
            logger.debug("PDFInfo image file: " + input.getImageFile());
            sourcePath = input.getImageFile();
        } else if (input.getObject() != null && (input.getObject() instanceof InputStream)) {
            FileInputStream fis = null;
            fis = (FileInputStream) input.getObject();
            File in;

            // Copy to tmp file
            try {
                String cacheDir = OpenURLJP2KService.getCacheDir();
                if (cacheDir != null) {
                    in = File.createTempFile("tmp", ".pdf", new File(cacheDir));
                } else {
                    in = File.createTempFile("tmp", ".pdf");
                }
                in.deleteOnExit();

                FileOutputStream fos = new FileOutputStream(in);
                IOUtils.copyStream(fis, fos);
            } catch (IOException e) {
                logger.error(e, e);
                throw new DjatokaException(e);
            }
            sourcePath = in.getAbsolutePath();
        } else {
            throw new DjatokaException("File not defined and Input Object Type " + input //.getObject().getClass().getName()
                    + " is not supported");
        }

        String pdfinfoCmd[] = PDFINFO_COMMAND.clone();
        pdfinfoCmd[PDFINFO_COMMAND_POSITION_BIN] = pdfinfoPath;
        pdfinfoCmd[PDFINFO_COMMAND_POSITION_FIRSTPAGE] = "1";
        pdfinfoCmd[PDFINFO_COMMAND_POSITION_LASTPAGE] = "-1"; // Last page even we not knowing its number.
        pdfinfoCmd[PDFINFO_COMMAND_POSITION_FILE] = sourcePath;
        Process pdfProc = null;
        try {
            ArrayList<MatchResult> pageSizes = new ArrayList<MatchResult>();
            MatchResult pages = null;

            pdfProc = Runtime.getRuntime().exec(pdfinfoCmd);
            BufferedReader lr = new BufferedReader(new InputStreamReader(pdfProc.getInputStream()));
            String line;
            for (line = lr.readLine(); line != null; line = lr.readLine()) {
                Matcher mm1 = PAGES_PATT.matcher(line);
                if (mm1.matches())
                    pages = mm1.toMatchResult();
                Matcher mm2 = MEDIABOX_PATT.matcher(line);
                if (mm2.matches())
                    pageSizes.add(mm2.toMatchResult());
            }

            int istatus = pdfProc.waitFor();
            if (istatus != 0)
                logger.error("pdfinfo proc failed, exit status=" + istatus + ", file=" + sourcePath);

            if (pages == null) {
                logger.error("Did not find 'Pages' line in output of pdfinfo command: "
                        + Arrays.deepToString(pdfinfoCmd));
                pdfProperties.put("Pages", "0");
            } else {
                //int n = Integer.parseInteger(pages.group(1));
                pdfProperties.put("Pages", pages.group(1));
            }

            if (pageSizes.isEmpty()) {
                logger.error("Did not find \"Page X size\" lines in output of pdfinfo command: "
                        + Arrays.deepToString(pdfinfoCmd));
                throw new IllegalArgumentException("Failed to get pages size of PDF with pdfinfo.");
            } else {
                for (MatchResult mr : pageSizes) {
                    String page = mr.group(1);

                    float x0 = Float.parseFloat(mr.group(2));
                    float y0 = Float.parseFloat(mr.group(3));
                    float x1 = Float.parseFloat(mr.group(4));
                    float y1 = Float.parseFloat(mr.group(5));
                    float w = Math.abs(x1 - x0);
                    float h = Math.abs(y1 - y0);
                    // Have to scale page sizes by max dpi (MAX_DPI / DEFAULT_DENSITY). Otherwise, BookReader.js will request the wrong zoom level (svc.level).
                    float ws = w * MAX_DPI / DEFAULT_DENSITY;
                    float hs = h * MAX_DPI / DEFAULT_DENSITY;
                    String width = "" + ws; //mr.group(2);
                    String height = "" + hs; //mr.group(3);
                    pdfProperties.put("Page " + page, width + " " + height);
                }
            }

        } catch (Exception e) {
            logger.error("Failed getting PDF information: ", e);
            throw new DjatokaException("Failed getting PDF information: ", e);
        } finally {
            // Our exec() should just consume one of the streams, but we want to stay safe.
            // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html
            org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream());
            org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream());
            org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream());
        }

        return pdfProperties;
    }

    /*
        private static Dimension getPDFPageSize(String source, int page_number) throws DjatokaException {
    logger.debug("Getting PDF info for size of page '" + page_number + "'.");
        
    Dimension pageDimension = null;
        
          try {
      setPDFCommandsPath();
           } catch (IllegalStateException e) {
        logger.error("Failed to set PDF commands path: ",e);
        throw e;
    }
        
    String pdfinfoCmd[] = PDFINFO_COMMAND.clone();
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_BIN] = pdfinfoPath;
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_FIRSTPAGE] = "" + page_number;
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_LASTPAGE] = "" + page_number; // Last page even we not knowing its number.
    pdfinfoCmd[PDFINFO_COMMAND_POSITION_FILE] = source;
    Process pdfProc = null;
    try
    {
        MatchResult pageSize = null;;
            
        pdfProc = Runtime.getRuntime().exec(pdfinfoCmd);
        BufferedReader lr = new BufferedReader(new InputStreamReader(pdfProc.getInputStream()));
        String line;
        for (line = lr.readLine(); line != null; line = lr.readLine())
        {
            Matcher mm = MEDIABOX_PATT.matcher(line);
            if (mm.matches())
                pageSize = mm.toMatchResult();
        }
        
        int istatus = pdfProc.waitFor();
        if (istatus != 0)
            logger.error("pdfinfo proc failed, exit status=" + istatus + ", file=" + source);
                
        if (pageSize == null)
        {
            logger.error("Did not find 'Page " + page_number + " size' line in output of pdfinfo command: " + pdfinfoCmd);
            //throw new IllegalArgumentException("Failed to get pages size of PDF with pdfinfo.");
            pageDimension = new Dimension(0, 0);
        }
        else
        {
            String page = pageSize.group(1);
            double x0 = Double.parseDouble(pageSize.group(2));
            double y0 = Double.parseDouble(pageSize.group(3));
            double x1 = Double.parseDouble(pageSize.group(4));
            double y1 = Double.parseDouble(pageSize.group(5));
            double width = Math.abs(x1 - x0);
            double height = Math.abs(y1 - y0);
            pageDimension = new Dimension();
            pageDimension.setSize(width, height);
        }
            
    }
    catch (Exception e)
    {
        logger.error("Failed getting PDF page size: ", e);
        throw new DjatokaException("Failed getting PDF page size: ", e);
    } finally {
        // Our exec() should just consume one of the streams, but we want to stay safe.
        // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html
        org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream());
        org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream());
        org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream());
    }
    return pageDimension;
        }
    */

    private int getScaledDPI(DjatokaDecodeParam params) {
        if (params.getLevel() >= 0) {
            int levels = DEFAULT_LEVELS;
            int reduce = levels - params.getLevel();
            params.setLevelReductionFactor((reduce >= 0) ? reduce : 0);
        } else if (params.getLevel() == -1 && params.getRegion() == null && params.getScalingDimensions() != null) {
            int width = params.getScalingDimensions()[0];
            int height = params.getScalingDimensions()[1];
            int levels = DEFAULT_LEVELS;
            int scale_level = Math.min(MAX_DPI,
                    (MAX_PX * DEFAULT_DENSITY / Math.max(Math.abs(width), Math.abs(height))));
            int reduce = levels - scale_level;
            params.setLevelReductionFactor((reduce >= 0) ? reduce : 0);
        }
        if (params.getLevelReductionFactor() > 0) {
            int reduce = 1 << params.getLevelReductionFactor(); // => image.size() / 2^r: reduce 0 means image/1, reduce 1 means image/2, etc.
            double s = 1.0 / reduce;
            return (int) (MAX_DPI * s);
        }
        return MAX_DPI;
    }

    private Rectangle getCropParam(DjatokaDecodeParam params) {
        if (params.getRegion() != null) {
            StringTokenizer st = new StringTokenizer(params.getRegion(), "{},");
            String token;
            logger.debug("Region params: " + params.getRegion());
            int x, y, w, h;
            // top
            if ((token = st.nextToken()).contains(".")) {
                y = Integer.parseInt(token);
            } else {
                y = Integer.parseInt(token);
            }
            // left
            if ((token = st.nextToken()).contains(".")) {
                x = Integer.parseInt(token);
            } else {
                x = Integer.parseInt(token);
            }
            // height
            if ((token = st.nextToken()).contains(".")) {
                h = Integer.parseInt(token);
            } else {
                h = Integer.parseInt(token);
            }
            // width
            if ((token = st.nextToken()).contains(".")) {
                w = Integer.parseInt(token);
            } else {
                w = Integer.parseInt(token);
            }

            return new Rectangle(x, y, w, h);
        }

        return null;
    }

    private static void setPDFCommandsPath() throws IllegalStateException {
        // sanity check: poppler paths are required. can cache since it won't change
        if (pdftoppmPath == null || pdfinfoPath == null) {
            //props = IOUtils.loadConfigByCP(classConfig.getArg("props"));

            pdftoppmPath = ConfigurationManager.getProperty(PROPS_PDF_PDFTOPPM_PATH, DEFAULT_PDFTOPPM_PATH);
            pdfinfoPath = ConfigurationManager.getProperty(PROPS_PDF_PDFINFO_PATH, DEFAULT_PDFINFO_PATH);

            if (pdftoppmPath == null)
                throw new IllegalStateException("No value for key \"" + PROPS_PDF_PDFTOPPM_PATH
                        + "\" in djatoka.properties! Should be path to pdftoppm executable.");
            if (pdfinfoPath == null)
                throw new IllegalStateException("No value for key \"" + PROPS_PDF_PDFINFO_PATH
                        + "\" in djatoka.properties! Should be path to pdfinfo executable.");
        }
    }
}