Java tutorial
/* Copyright (c) 2014 Karol Stasiak * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package io.github.karols.hocr4j; import io.github.karols.hocr4j.dom.HocrElement; import io.github.karols.hocr4j.dom.HocrTag; import io.github.karols.hocr4j.utils.CollectionUtils; import io.github.karols.hocr4j.utils.DelegatingUnmodifiableList; import com.google.common.base.Function; import com.google.common.base.Predicate; import org.apache.commons.lang3.ObjectUtils; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; /** * Represents a paragraph in the OCR'd document. * * Corresponding hOCR class: <code>ocr_par</code>. */ @Immutable public class Paragraph extends DelegatingUnmodifiableList<Line> implements Bounded { private final Bounds bounds; private final List<Line> lines; /** * Creates a paragraph from the corresponding HOCR <p> tag * * @param e HOCR tag * @throws IllegalArgumentException if not a valid <p> tag */ public Paragraph(@Nonnull HocrElement e) { lines = new ArrayList<Line>(); if (e instanceof HocrTag) { HocrTag tag = (HocrTag) e; if (tag.name.equals("p")) { for (HocrElement k : tag.elements) { if (k.isNotBlank()) { lines.add(new Line(k)); } } Bounds b = Bounds.fromHocrTitleValue(tag.title); if (b == null) { b = Bounds.ofAll(lines); } bounds = b; return; } } throw new IllegalArgumentException(e.mkString()); } /** * Creates a paragraph containing given lines. * The paragraph bounds are calculated. * * @param l list of lines (not empty) */ public Paragraph(@Nonnull List<Line> l) { if (l.isEmpty()) throw new IllegalArgumentException(); lines = new ArrayList<Line>(l); bounds = Bounds.ofAll(l); } /** * Creates a paragraph containing given lines. * * @param l list of lines * @param b bounds of the paragraph */ public Paragraph(List<Line> l, Bounds b) { lines = new ArrayList<Line>(l); bounds = b; } private Paragraph(Void v, List<Line> l) { if (l.isEmpty()) throw new IllegalArgumentException(); lines = l; bounds = Bounds.ofAll(l); } private Paragraph(Void v, List<Line> l, Bounds b) { lines = l; bounds = b; } private Paragraph(Void v, Bounds b) { lines = new ArrayList<Line>(); bounds = b; } /** * Creates copy of this paragraph containing * only the words that are contained in given rectangle. * * @param rectangle bounding rectangle * @return paragraph cropped to the bounding rectangle */ @Nonnull public Paragraph createBounded(@Nonnull Bounds rectangle) { Paragraph p = new Paragraph((Void) null, bounds.intersection(rectangle)); for (Line l : lines) { Line l2 = l.createBounded(rectangle); if (!l2.isBlank()) { p.lines.add(l2); } } return p; } /** * Creates copy of this paragraph containing * only the lines that are contained in and/or touch given rectangle. * * @param rectangle bounding rectangle * @return paragraph lines words touching the bounding rectangle */ @Nonnull public Paragraph createTouching(Bounds rectangle) { Paragraph p = new Paragraph((Void) null, bounds); //TODO for (Line l : lines) { if (l.bounds != null && l.bounds.touches(rectangle)) { p.lines.add(l); } } return p; } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null || getClass() != obj.getClass()) { return false; } final Paragraph other = (Paragraph) obj; return ObjectUtils.equals(this.lines, other.lines) && ObjectUtils.equals(this.bounds, other.bounds); } /** * Finds a line that satisfies given predicate and according to the given comparator is the "largest". * If not found, returns <code>null</code>. * * @param comparatorForMaximizing comparator to choose the "largest" line * @param predicate predicate the found line has to satisfy * @return a line that satisfies the predicate, or <code>null</code> if there are none */ @Nullable public Line findLine(@Nonnull Comparator<Line> comparatorForMaximizing, @Nonnull Predicate<Line> predicate) { Line result = null; for (Line l : lines) { if (predicate.apply(l)) { if (result == null || comparatorForMaximizing.compare(l, result) > 0) { result = l; } } } return result; } /** * Returns the list of all lines in this paragraph converted to strings. * * @return all lines as strings * @see Line#mkString() */ @Nonnull public List<String> getAllLinesAsString() { List<String> result = new ArrayList<String>(size()); for (Line l : lines) { result.add(l.mkString()); } return result; } @Override public Bounds getBounds() { return bounds; } /** * Calculates the number of lines in this paragraph. * * @return number of lines */ public int getLineCount() { return lines.size(); } @Override protected List<Line> getUnderlying() { return lines; } /** * Calculates the number of words in this paragraph, calculated as the sum of numbers of words in all lines. * * @return number of words */ public int getWordCount() { int sum = 0; for (Line l : lines) { sum += l.words.size(); } return sum; } @Override public int hashCode() { return ObjectUtils.hashCodeMulti(lines, bounds); } /** * Checks if this paragraph is blank, i.e. all of its lines are blank. * * @return <code>true</code> if this paragraph is blank, <code>false</code> otherwise */ public boolean isBlank() { for (Line l : lines) { if (!l.isBlank()) { return false; } } return true; } /** * Creates a new paragraph with all lines modified by the given function. * Bounds are recalculated unless this paragraph contains no lines. * * @param f line-modifying function * @return modified paragraph */ @Nonnull public Paragraph map(@Nonnull Function<Line, Line> f) { List<Line> lineList = CollectionUtils.listMap(lines, f); if (lineList.isEmpty()) { return new Paragraph(null, lineList, bounds); } else { return new Paragraph(null, lineList); } } /** * Creates a new paragraph with all bounds modified by the given function. * Bounds are recalculated unless this paragraph contains no lines; * If there are no lines, the bounds of this paragraph * are modified using the given function. * * @param f bounds-modifying function * @return modified paragraph */ @Nonnull public Paragraph mapBounds(@Nonnull final Function<Bounds, Bounds> f) { List<Line> lineList = CollectionUtils.listMap(lines, new Function<Line, Line>() { @Nullable public Line apply(@Nullable Line line) { assert line != null; return line.mapBounds(f); } }); if (lineList.isEmpty()) { return new Paragraph(null, lineList, f.apply(bounds)); } else { return new Paragraph(null, lineList); } } @Override @Nonnull public Paragraph subList(int i, int j) { return new Paragraph(lines.subList(i, j)); } @Nonnull public String toString() { return lines.toString(); } /** * Translates the paragraph by given vector. * * @param dx x displacement * @param dy y displacement * @return translated paragraph */ public Paragraph translate(int dx, int dy) { List<Line> ls = new ArrayList<Line>(lines.size()); for (Line l : lines) { ls.add(l.translate(dx, dy)); } return new Paragraph(null, ls, bounds.translate(dx, dy)); } }