com.giaybac.traprange.test.TESTPDFBox.java Source code

Java tutorial

Introduction

Here is the source code for com.giaybac.traprange.test.TESTPDFBox.java

Source

/**
* Copyright (C) 2015, GIAYBAC
*
* Released under the MIT license
*/
package com.giaybac.traprange.test;

import com.giaybac.traprange.TrapRangeBuilder;
import com.google.common.collect.Range;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.TextPosition;
import org.junit.Test;

/**
 *
 * @author THOQ LUONG Mar 21, 2015 11:23:40 PM
 */
public class TESTPDFBox extends PDFTextStripper {

    //--------------------------------------------------------------------------
    //  Members
    private final List<Range<Integer>> ranges = new ArrayList<>();

    private TrapRangeBuilder trapRangeBuilder = new TrapRangeBuilder();

    //--------------------------------------------------------------------------
    //  Initialization and releasation
    public TESTPDFBox() throws IOException {
        super.setSortByPosition(true);
    }

    //--------------------------------------------------------------------------
    //  Getter N Setter
    //--------------------------------------------------------------------------
    //  Method binding
    @Test
    public void test() throws IOException {
        String filePath = "D:\\traprange\\_Docs\\TK0976-AB5-0-2014042211.pdf";
        //String filePath = "C:\\Users\\ThoLuong\\Downloads\\Download\\1986 NL Batting - Sheet1.pdf";
        File pdfFile = new File(filePath);
        PDDocument pdDocument = PDDocument.load(pdfFile);
        //PrintTextLocations printer = new PrinTextLocations();
        List pages = pdDocument.getDocumentCatalog().getAllPages();
        PDPage page = (PDPage) pages.get(0);
        PDStream stream = page.getContents();

        this.processStream(page, page.findResources(), stream.getStream());
        //Print out all text
        ranges.sort(new Comparator<Range>() {
            @Override
            public int compare(Range o1, Range o2) {
                return o1.lowerEndpoint().compareTo(o2.lowerEndpoint());
            }
        });
        for (Range range : ranges) {
            System.out.println("> " + range);
        }
        //Print out all ranges
        List<Range<Integer>> trapRanges = trapRangeBuilder.build();
        for (Range trapRange : trapRanges) {
            System.out.println("TrapRange: " + trapRange);
        }
    }

    //--------------------------------------------------------------------------
    //  Implement N Override
    @Override
    protected void processTextPosition(TextPosition text) {
        Range range = Range.closed((int) text.getY(), (int) (text.getY() + text.getHeight()));
        System.out.println("Text: " + text.getCharacter());
        trapRangeBuilder.addRange(range);
    }
    //--------------------------------------------------------------------------
    //  Utils
    //--------------------------------------------------------------------------
    //  Inner class
}