io.github.karols.hocr4j.Page.java Source code

Java tutorial

Introduction

Here is the source code for io.github.karols.hocr4j.Page.java

Source

/* Copyright (c) 2014 Karol Stasiak
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/

package io.github.karols.hocr4j;

import io.github.karols.hocr4j.dom.HocrElement;
import io.github.karols.hocr4j.dom.HocrParser;
import io.github.karols.hocr4j.dom.HocrTag;
import io.github.karols.hocr4j.utils.CollectionUtils;
import io.github.karols.hocr4j.utils.DelegatingUnmodifiableList;
import io.github.karols.hocr4j.utils.TextUtils;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import org.apache.commons.lang3.ObjectUtils;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.Immutable;

/**
 * Represents a page in the OCR'd document.
 *
 * Corresponding hOCR class: <code>ocr_page</code>.
 */
@Immutable
public class Page extends DelegatingUnmodifiableList<Area> implements Bounded {

    /**
     * Creates a list of pages from a list of HOCR documents.
     * A document can contain several pages.
     * The pages are numbered consecutively, starting from 1.
     *
     * @param hocr list of HOCR documents
     * @return list of pages
     */
    public static List<Page> fromHocr(@Nonnull List<String> hocr) {
        List<Page> pages = new ArrayList<Page>();
        int pageNo = 1;
        for (String h : hocr) {
            List<HocrElement> parseResult = HocrParser.createAst(h);
            List<Page> ps = HocrParser.parse(parseResult, pageNo);
            pages.addAll(ps);
            pageNo += ps.size();
        }
        return pages;
    }

    /**
     * Creates a list of pages with new page numbers
     *
     * @param startFrom new page number for the first page
     * @param pages     list of pages
     * @return list of pages with new page numbers
     */
    @Nonnull
    public static List<Page> changePageNumbers(int startFrom, @Nonnull List<Page> pages) {
        List<Page> result = new ArrayList<Page>();
        int pageNo = startFrom;
        for (Page p : pages) {
            result.add(p.changePageNumber(pageNo));
            pageNo++;
        }
        return result;
    }

    /**
     * Creates a list of pages with new page numbers
     *
     * @param startFrom new page number for the first page
     * @param pages     pages
     * @return list of pages with new page numbers
     */
    @Nonnull
    public static List<Page> changePageNumbers(int startFrom, @Nonnull Page... pages) {
        List<Page> result = new ArrayList<Page>();
        int pageNo = startFrom;
        for (Page p : pages) {
            result.add(p.changePageNumber(pageNo));
            pageNo++;
        }
        return result;
    }

    /**
     * Merges all pages by stacking them vertically into one long page.
     * @param pages input pages
     * @return resulting merged page
     */
    public static Page merge(List<Page> pages) {
        List<Area> resultAreas = new ArrayList<Area>();
        int verticalShift = 0;
        int maxWidth = 0;
        for (Page p : pages) {
            for (Area a : p.areas) {
                resultAreas.add(a.translate(0, verticalShift));
            }
            verticalShift += p.getBounds().getHeight();
            maxWidth = Math.max(maxWidth, p.getBounds().getWidth());
        }
        return new Page(null, pages.get(0).pageNo, resultAreas, new Bounds(0, 0, maxWidth, verticalShift));
    }

    private final List<Area> areas;
    private final Bounds bounds;
    private final int pageNo;

    /**
     * Creates a page from the corresponding HOCR &lt;div&gt; tag
     *
     * @param pageNo page number
     * @param e      HOCR tag
     * @throws IllegalArgumentException if not a valid &lt;div&gt; tag
     */
    public Page(int pageNo, @Nonnull HocrElement e) {
        this.pageNo = pageNo;
        areas = new ArrayList<Area>();
        if (e instanceof HocrTag) {
            HocrTag tag = (HocrTag) e;
            if (tag.name.equals("div")) {
                for (HocrElement k : tag.elements) {
                    if (k.isNotBlank()) {
                        areas.add(new Area(k));
                    }
                }
                Bounds b = Bounds.fromHocrTitleValue(tag.title);
                if (b == null) {
                    b = Bounds.ofAll(areas);
                }
                bounds = b;
                return;
            }
        }
        throw new IllegalArgumentException(e.mkString());
    }

    /**
     * Creates a page containing given areas.
     * The page bounds are calculated.
     *
     * @param pageNo page number
     * @param a      list of areas (not empty)
     */
    public Page(int pageNo, @Nonnull List<Area> a) {
        if (a == null || a.isEmpty())
            throw new IllegalArgumentException();
        this.pageNo = pageNo;
        areas = new ArrayList<Area>(a);
        bounds = Bounds.ofAll(a);
    }

    /**
     * Creates a page containing given areas.
     *
     * @param pageNo page number
     * @param a      list of areas (can be empty)
     * @param b      bounds of the page
     */
    public Page(int pageNo, @Nonnull List<Area> a, Bounds b) {
        if (a == null)
            throw new IllegalArgumentException();
        this.pageNo = pageNo;
        areas = new ArrayList<Area>(a);
        bounds = b;
    }

    private Page(Void v, int pageNo, @Nonnull List<Area> a) {
        if (a == null || a.isEmpty())
            throw new IllegalArgumentException();
        this.pageNo = pageNo;
        areas = a;
        bounds = Bounds.ofAll(a);
    }

    private Page(Void v, int pageNo, Bounds b) {
        this.pageNo = pageNo;
        areas = new ArrayList<Area>();
        bounds = b;
    }

    private Page(Void v, int pageNo, @Nonnull List<Area> a, Bounds b) {
        if (a == null)
            throw new IllegalArgumentException();
        this.pageNo = pageNo;
        areas = a;
        bounds = b;
    }

    /**
     * Returns a copy of this page with all tiny print removed.
     * If the page contains less that 10 words, nothing is removed.
     * Tiny print is defined as having height lower than
     * 1/6th of median word height, or 1/12th if the word is smaller.
     *
     * @return page without tiny print
     * @see TextUtils#isSmaller(String)
     */
    @Nonnull
    public Page cleanTinyPrint() {
        List<Integer> heightList = new ArrayList<Integer>();
        int bad = 0;
        for (Word w : getAllWords()) {
            Bounds b = w.getBounds();
            if (b != null) {
                heightList.add(b.getHeight());
            } else {
                bad++;
            }
        }
        if (heightList.size() < 10) {
            return this;
        }
        if (bad > heightList.size()) {
            return this;
        }

        Collections.sort(heightList);
        int medianHeight = heightList.get(heightList.size() / 2);
        final int cutoffHeight = medianHeight / 6;

        final Predicate<Word> requirement = new Predicate<Word>() {
            public boolean apply(Word input) {
                if (TextUtils.isSmaller(input.getText())) {
                    return input.getBounds().getHeight() * 2 > cutoffHeight;
                } else {
                    return input.getBounds().getHeight() > cutoffHeight;
                }
            }
        };
        final Function<Line, Line> lineCleaner = new Function<Line, Line>() {
            public Line apply(@Nullable Line line) {
                if (line == null)
                    throw new IllegalStateException();
                return new Line(CollectionUtils.listFilter(line, requirement), line.bounds);
            }
        };
        final Function<Paragraph, Paragraph> paragraphCleaner = new Function<Paragraph, Paragraph>() {
            public Paragraph apply(@Nullable Paragraph paragraph) {
                if (paragraph == null)
                    throw new IllegalStateException();
                return paragraph.map(lineCleaner);
            }
        };
        final Function<Area, Area> areaCleaner = new Function<Area, Area>() {
            public Area apply(@Nullable Area area) {
                if (area == null)
                    throw new IllegalStateException();
                return area.map(paragraphCleaner);
            }
        };
        return map(areaCleaner);
    }

    /**
     * Creates copy of this page containing
     * only the words that are contained in given rectangle.
     *
     * @param rectangle bounding rectangle
     * @return page cropped to the bounding rectangle
     */
    @Nonnull
    public Page createBounded(@Nonnull Bounds rectangle) {
        Page p = new Page(null, pageNo, bounds.intersection(rectangle));
        for (Area a : areas) {
            Area a2 = a.createBounded(rectangle);
            if (!a2.isBlank()) {
                p.areas.add(a2);
            }
        }
        return p;
    }

    /**
     * Creates copy of this page containing
     * only the lines that are contained in and/or touch given rectangle.
     *
     * @param rectangle bounding rectangle
     * @return page with lines touching the bounding rectangle
     */
    @Nonnull
    public Page createTouching(@Nonnull Bounds rectangle) {
        ArrayList<Area> result = new ArrayList<Area>();
        for (Area a : areas) {
            Area a2 = a.createTouching(rectangle);
            if (!a2.isBlank()) {
                result.add(a2);
            }
        }
        return new Page(null, pageNo, result);
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        final Page other = (Page) obj;
        return ObjectUtils.equals(this.areas, other.areas) && ObjectUtils.equals(this.bounds, other.bounds);
    }

    /**
     * Finds all lines that satisfy the given predicate.
     * The line order is unspecified.
     *
     * @param predicate predicate to satisfy
     * @return lines that satisfy the predicate
     */
    @Nonnull
    public List<Line> findAllLines(@Nonnull Predicate<Line> predicate) {
        ArrayList<Line> result = new ArrayList<Line>();
        for (Area a : areas) {
            for (Paragraph p : a) {
                for (Line l : p) {
                    if (predicate.apply(l)) {
                        result.add(l);
                    }
                }
            }
        }
        return result;
    }

    /**
     * Finds a line that satisfies given predicate and according to the given comparator is the "largest".
     * If not found, returns <code>null</code>.
     *
     * @param comparatorForMaximizing comparator to choose the "largest" line
     * @param predicate               predicate the found line has to satisfy
     * @return a line that satisfies the predicate, or <code>null</code> if there are none
     */
    @Nullable
    public Line findLine(@Nonnull Comparator<Line> comparatorForMaximizing, @Nonnull Predicate<Line> predicate) {
        Line result = null;
        for (Area a : areas) {
            Line l = a.findLine(comparatorForMaximizing, predicate);
            if (l != null) {
                if (result == null || comparatorForMaximizing.compare(l, result) > 0) {
                    result = l;
                }
            }
        }
        return result;
    }

    /**
     * Finds a line in this page that when applied to the given function
     * gives the highest <b>non-negative</b> result.
     * Lines that yield negative or <code>null</code> result are considered not matching.
     * If no line is found, returns <code>null</code>.
     *
     * @param scoreFunction score function
     * @return a line with the highest non-negative score,
     *         or <code>null</code> if none found
     */
    @Nullable
    public Line findLineMaximizing(@Nonnull Function<Line, Double> scoreFunction) {
        return findLineMaximizingImpl(scoreFunction, null, false);
    }

    /**
     * Finds a line in this page that when applied to the given function
     * gives one of the highest <b>non-negative</b> results,
     * biased towards lines that are closer to <code>header</code>.
     * Lines that yield negative or <code>null</code> result are considered not matching.
     * If no line is found, returns <code>null</code>.
     *
     * @param scoreFunction score function
     * @param header        object in proximity of which the result is preferred
     * @return a line with one of the highest non-negative scores near <code>header</code>,
     *         or <code>null</code> if none found
     */
    @Nullable
    public Line findLineMaximizingCloseTo(Function<Line, Double> scoreFunction, @Nonnull Bounds header) {
        return findLineMaximizingImpl(scoreFunction, header, false);
    }

    @Nullable
    private Line findLineMaximizingImpl(@Nonnull Function<Line, Double> scoreFunction, @Nullable Bounded header,
            boolean slightlyBelow) {
        Line result = null;
        double maxScore = Double.NEGATIVE_INFINITY;
        for (Area a : areas) {
            for (Paragraph p : a) {
                for (Line l : p) {
                    Double thisScore = scoreFunction.apply(l);
                    if (thisScore == null || thisScore < 0) {
                        continue;
                    }
                    if (header != null) {
                        thisScore += 0.1;
                        double multiplier = header.getBounds().distance(l.bounds) + bounds.getHeight() / 10.0;
                        multiplier = Math.sqrt(multiplier);
                        multiplier = Math.sqrt(multiplier);
                        thisScore /= multiplier;
                        if (slightlyBelow && header.getBounds().isBelow(l.bounds)) {
                            thisScore /= 2;
                        }
                    }
                    if (thisScore > maxScore) {
                        result = l;
                        maxScore = thisScore;
                    }
                }
            }
        }
        return result;
    }

    /**
     * Finds a line in this page that when applied to the given function
     * gives one of the highest <b>non-negative</b> results,
     * biased towards lines that are close to and below <code>header</code>.
     * Lines that yield negative or <code>null</code> result are considered not matching.
     * If no line is found, returns <code>null</code>.
     *
     * @param scoreFunction score function
     * @param header        object in proximity of which the result is preferred
     * @return a line with one of the highest non-negative scores near <code>header</code>,
     *         or <code>null</code> if none found
     */
    @Nullable
    public Line findLineMaximizingPreferablySlightlyBelow(@Nonnull Function<Line, Double> scoreFunction,
            @Nonnull Bounds header) {
        return findLineMaximizingImpl(scoreFunction, header, true);
    }

    /**
     * Finds a line in this page that when applied to the given function
     * gives the lowest <b>non-negative</b> result.
     * Lines that yield negative or <code>null</code> result are considered not matching.
     * If no line is found, returns <code>null</code>.
     *
     * @param scoreFunction score function
     * @return a line with the lowest non-negative score,
     *         or <code>null</code> if none found
     */
    @Nullable
    public Line findLineMinimizing(@Nonnull Function<Line, Double> scoreFunction) {
        return findLineMinimizingImpl(scoreFunction, null, false);
    }

    /**
     * Finds a line in this page that when applied to the given function
     * gives one of the lowest <b>non-negative</b> results,
     * biased towards lines that are closer to <code>header</code>.
     * Lines that yield negative or <code>null</code> result are considered not matching.
     * If no line is found, returns <code>null</code>.
     *
     * @param scoreFunction score function
     * @param header        object in proximity of which the result is preferred
     * @return a line with one of the lowest non-negative scores near <code>header</code>,
     *         or <code>null</code> if none found
     */
    @Nullable
    public Line findLineMinimizingCloseTo(Function<Line, Double> scoreFunction, @Nonnull Bounds header) {
        return findLineMinimizingImpl(scoreFunction, header, false);
    }

    @Nullable
    private Line findLineMinimizingImpl(@Nonnull Function<Line, Double> scoreFunction, @Nullable Bounded header,
            boolean slightlyBelow) {
        Line result = null;
        double minScore = Double.POSITIVE_INFINITY;
        for (Area a : areas) {
            for (Paragraph p : a) {
                for (Line l : p) {
                    Double thisScore = scoreFunction.apply(l);
                    if (thisScore == null || thisScore < 0) {
                        continue;
                    }
                    if (header != null) {
                        thisScore += 0.1;
                        double multiplier = header.getBounds().distance(l.bounds) + bounds.getHeight() / 10.0;
                        multiplier = Math.sqrt(multiplier);
                        multiplier = Math.sqrt(multiplier);
                        thisScore *= multiplier;
                        if (slightlyBelow && header.getBounds().isBelow(l.bounds)) {
                            thisScore *= 2;
                        }
                    }
                    if (thisScore < minScore) {
                        result = l;
                        minScore = thisScore;
                    }
                }
            }
        }
        return result;
    }

    /**
     * Finds a line in this page that when applied to the given function
     * gives one of the lowest <b>non-negative</b> results,
     * biased towards lines that are close to and below <code>header</code>.
     * Lines that yield negative or <code>null</code> result are considered not matching.
     * If no line is found, returns <code>null</code>.
     *
     * @param scoreFunction score function
     * @param header        object in proximity of which the result is preferred
     * @return a line with one of the lowest non-negative scores near <code>header</code>,
     *         or <code>null</code> if none found
     */
    @Nullable
    public Line findLineMinimizingPreferablySlightlyBelow(@Nonnull Function<Line, Double> scoreFunction,
            @Nonnull Bounds header) {
        return findLineMinimizingImpl(scoreFunction, header, true);
    }

    /**
     * Returns the list of all lines in the page
     * in the natural left-to-right reading order.
     *
     * @return all lines
     */
    @Nonnull
    public List<Line> getAllLines() {
        ArrayList<Line> result = new ArrayList<Line>();
        for (Area a : areas) {
            for (Paragraph p : a) {
                result.addAll(p);
            }
        }
        Collections.sort(result, OrderedBy.flowOrder());
        return result;
    }

    /**
     * Returns the list of all lines in the page
     * in the natural left-to-right reading order,
     * converted to mutable lists of words.
     *
     * @return all lines as lists of words
     */
    @Nonnull
    public List<List<Word>> getAllLinesAsListsOfWords() {
        ArrayList<Line> result = new ArrayList<Line>();
        for (Area a : areas) {
            for (Paragraph p : a) {
                result.addAll(p);
            }
        }
        Collections.sort(result, OrderedBy.flowOrder());
        return CollectionUtils.listMap(result, Line.GET_WORDS);
    }

    /**
     * Returns the list of all lines in the page
     * in the natural left-to-right reading order,
     * converted to strings.
     *
     * @return all lines as strings
     * @see Line#mkString()
     */
    @Nonnull
    public List<String> getAllLinesAsStrings() {
        ArrayList<Line> result = new ArrayList<Line>();
        for (Area a : areas) {
            for (Paragraph p : a) {
                result.addAll(p);
            }
        }
        Collections.sort(result, OrderedBy.flowOrder());
        return CollectionUtils.listMap(result, Line.MK_STRING);
    }

    /**
     * Returns the list of all paragraphs in the page.
     * The order is unspecified.
     *
     * @return list of all the words
     */
    @Nonnull
    public List<Paragraph> getAllParagraphs() {
        ArrayList<Paragraph> paragraphs = new ArrayList<Paragraph>();
        for (Area a : areas) {
            paragraphs.addAll(a);
        }
        return paragraphs;
    }

    /**
     * Returns the list of all words in the page.
     * The order is unspecified.
     *
     * @return list of all the words
     */
    @Nonnull
    public List<Word> getAllWords() {
        ArrayList<Word> words = new ArrayList<Word>();
        for (Area a : areas) {
            for (Paragraph p : a) {
                for (Line l : p) {
                    for (Word w : l.words) {
                        words.add(w);
                    }
                }
            }
        }
        return words;
    }

    @Override
    public Bounds getBounds() {
        return bounds;
    }

    /**
     * Returns height of this page, or -1 if not known.
     *
     * @return height of this page
     */
    public double getHeight() {
        if (bounds != null) {
            return bounds.getHeight();
        }
        return -1;
    }

    /**
     * Calculates the number of lines in this page.
     *
     * @return number of lines
     */
    public int getLineCount() {
        int sum = 0;
        for (Area a : areas) {
            sum += a.getLineCount();
        }
        return sum;
    }

    /**
     * Returns the number of this page.
     *
     * @return page number
     */
    public int getPageNo() {
        return pageNo;
    }

    /**
     * Calculates the number of paragraphs in this page.
     *
     * @return number of paragraphs
     */
    public int getParagraphCount() {
        int sum = 0;
        for (Area a : areas) {
            sum += a.getParagraphCount();
        }
        return sum;
    }

    @Override
    protected List<Area> getUnderlying() {
        return areas;
    }

    /**
     * Returns width of this page, or -1 if not known.
     *
     * @return width of this page
     */
    public double getWidth() {
        if (bounds != null) {
            return bounds.getWidth();
        }
        return -1;
    }

    /**
     * Calculates the number of words in this page,
     * calculated as the sum of numbers of words in all areas.
     *
     * @return number of words
     */
    public int getWordCount() {
        int sum = 0;
        for (Area a : areas) {
            sum += a.getWordCount();
        }
        return sum;
    }

    /**
     * Creates bounds for the entire left-aligned column
     * given bounds that span its entire height.
     * The resulting bounds do not cut words.
     * If there are few lines longer than the rest of the column,
     * the bounds are expanded to the right until they meet the page edge
     * or another column of text.
     * May behave unexpectedly if the column has only few lines.
     *
     * @param b bounds that span the entire height of the column
     * @return bounds of the column
     * @see Page#growBoundsUntilTheyStopCuttingWords(Bounds)
     */
    @Nullable
    public Bounds growBoundsForLeftAlignedColumn(@Nullable final Bounds b) {
        if (b == null) {
            return null;
        }
        Bounds rect = growBoundsUntilTheyStopCuttingWords(b);
        assert rect != null;
        List<Line> lines = findAllLines(LineThat.hasWordsIntersecting(rect));
        double sum = 0;
        int count = 0;
        for (Line l : lines) {
            for (int i = 1; i < l.words.size(); i++) {

                Word leftE = l.words.get(i - 1);
                Word rightE = l.words.get(i);
                if (leftE.getBounds().in(rect) && rightE.getBounds().in(rect)) {
                    sum += rightE.getBounds().getLeft() - leftE.getBounds().getRight();
                    count++;
                }
            }
        }
        double spaceWidth = sum / count;
        Bounds movingEdge = rect.getRightEdge();
        Bounds nearTheRightEdge = movingEdge.moveToTheLeft((int) (spaceWidth / 4));
        int noofLongLines = findAllLines(LineThat.hasWordsIntersecting(nearTheRightEdge)).size();
        if (noofLongLines < 2) {
            noofLongLines = 2;
        }
        Bounds lastGoodPosition = movingEdge;
        int step = (int) (spaceWidth / 2);
        if (step < 1) {
            step = 1;
        }
        while (movingEdge.in(this.bounds) && movingEdge.getLeft() - rect.getRight() < rect.getWidth()) {
            movingEdge = movingEdge.moveToTheRight(step);
            int cutLinesCount = findAllLines(LineThat.hasWordsIntersecting(movingEdge)).size();
            if (cutLinesCount > noofLongLines) {
                break;
            }
            if (cutLinesCount == 0) {
                lastGoodPosition = movingEdge;
            }
        }
        // TODO: move left edge until it gets into the gap between columns
        return rect.union(lastGoodPosition);
    }

    /**
     * Returns the smallest possible bounds
     * that contain the given bounds and do not cut any word,
     * Bounds cut a word if the word is neither inside the bounds nor outside the bounds.
     * Returns <code>null</code> if given <code>null</code>.
     *
     * @param b bounds to expand
     * @return expanded bounds, or <code>null</code> if <code>b</code> is <code>null</code>
     */
    @Nullable
    public Bounds growBoundsUntilTheyStopCuttingWords(@Nullable final Bounds b) {
        if (b == null) {
            return null;
        }
        List<Word> words = getAllWords();
        boolean modified = true;
        Bounds rect = b;
        while (modified) {
            modified = false;
            for (Word w : words) {
                if (rect.cuts(w.getBounds())) {
                    rect = rect.union(w.getBounds());
                    modified = true;
                }
            }
        }
        return rect;
    }

    @Override
    public int hashCode() {
        return ObjectUtils.hashCodeMulti(areas, bounds);
    }

    /**
     * Checks if this page is blank, i.e. all of its areas are blank.
     *
     * @return <code>true</code> if this page is blank, <code>false</code> otherwise
     */
    public boolean isBlank() {
        for (Area a : areas) {
            if (!a.isBlank()) {
                return false;
            }
        }
        return true;
    }

    /**
     * Creates a new page with all areas modified by the given function.
     * Bounds are recalculated unless this page contains no areas.
     *
     * @param f area-modifying function
     * @return modified page
     */
    @Nonnull
    public Page map(@Nonnull Function<Area, Area> f) {
        List<Area> areaList = CollectionUtils.listMap(areas, f);
        if (areaList.isEmpty()) {
            return new Page(null, pageNo, areaList, bounds);
        } else {
            return new Page(null, pageNo, areaList);
        }
    }

    /**
     * Creates a new page with all bounds modified by the given function.
     * Bounds are recalculated unless this page contains no areas;
     * If there are no areas, the bounds of this page
     * are modified using the given function.
     *
     * @param f bounds-modifying function
     * @return modified page
     */
    @Nonnull
    public Page mapBounds(@Nonnull final Function<Bounds, Bounds> f) {
        List<Area> areaList = CollectionUtils.listMap(areas, new Function<Area, Area>() {
            @Nullable
            public Area apply(@Nullable Area area) {
                assert area != null;
                return area.mapBounds(f);
            }
        });
        if (areaList.isEmpty()) {
            return new Page(null, pageNo, areaList, f.apply(bounds));
        } else {
            return new Page(null, pageNo, areaList);
        }
    }

    /**
     * Creates a new page with all lines modified by the given function.
     * Bounds are recalculated unless this page contains no lines.
     *
     * @param lineFunction line-modifying function
     * @return modified paragraph
     */
    @Nonnull
    public Page mapLines(@Nonnull final Function<Line, Line> lineFunction) {
        final Function<Paragraph, Paragraph> paragraphFunction = new Function<Paragraph, Paragraph>() {
            public Paragraph apply(@Nullable Paragraph paragraph) {
                if (paragraph == null)
                    throw new IllegalStateException();
                return paragraph.map(lineFunction);
            }
        };
        final Function<Area, Area> areaFunction = new Function<Area, Area>() {
            public Area apply(@Nullable Area area) {
                if (area == null)
                    throw new IllegalStateException();
                return area.map(paragraphFunction);
            }
        };
        return new Page(pageNo, CollectionUtils.listMap(areas, areaFunction));
    }

    /**
     * Creates a copy of this page with new page number.
     *
     * @param newPageNo new page number
     * @return copy with new page number
     */
    public Page changePageNumber(int newPageNo) {
        return new Page(null, newPageNo, areas, bounds);
    }

    /**
     * Returns a new page with only one area containing all the paragraphs.
     *
     * @return a page with merged areas
     */
    @Nonnull
    public Page mergeAllAreas() {
        Area a = new Area(getAllParagraphs());
        return new Page(pageNo, Collections.singletonList(a));
    }

    @Nonnull
    @Override
    public Page subList(int i, int j) {
        return new Page(pageNo, areas.subList(i, j));
    }

    @Nonnull
    public String toString() {
        return "PAGE #" + pageNo + ": " + areas;
    }

}