com.zilbo.flamingSailor.TE.model.TextLine.java Source code

Java tutorial

Introduction

Here is the source code for com.zilbo.flamingSailor.TE.model.TextLine.java

Source

package com.zilbo.flamingSailor.TE.model;

import org.apache.commons.lang3.StringUtils;

import java.io.PrintStream;
import java.util.Map;

/*
 * Copyright 2012 Zilbo.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
public class TextLine extends Component {

    public TextLine(long id, Component tp) {
        super(id);
        addChild(tp);
    }

    public int size() {
        return pieces.size();
    }

    @Override
    public String toString() {
        return this.getText();

    }

    public String getText() {
        StringBuilder sb = new StringBuilder();
        TextPiece prev = null;
        for (Component component : this.getChildren()) {
            if (component instanceof TextPiece) {
                TextPiece p = (TextPiece) component;
                if (prev != null) {
                    if (!prev.isNextTo(p)) {
                        sb.append(' ');
                    }
                }
                sb.append(p.getText());
                prev = p;
            }
        }
        return sb.toString();
    }

    @Override
    public double density() {
        if (width() == 0) {
            return 1.0;
        }
        String text = this.getText();
        text = text.replaceAll("[^\\w]", "");
        return (1.0) * (text.length()) / this.width();
    }

    @Override
    public double height() {
        double height = 0.0;
        double length = 0.0;
        for (Component c : getChildren()) {
            height += c.height() * c.getText().length();
            length += c.getText().length();
        }
        if (length == 0.0) {
            return super.height();
        }
        return height / length;
    }

    double lineIsRegularProbability = 0.0;

    protected void calcLineHeightProbability(double highestFreqSize, double minFontSize, double maxFontSize,
            Map<String, Map<Integer, Double>> normalizedFontCounts, Map<String, Double> normalizedFonts,
            Map<Integer, Double> normalizedSizes, double textLength) {
        if (textLength == 0) {
            lineIsRegularProbability = 0;
            return;
        }
        Double probability = 0.0;
        for (Component c : getChildren()) {
            if (c instanceof TextPiece) {
                TextPiece tp = (TextPiece) c;
                /*
                Map<Double, Double> fontSizes = normalizedFontCounts.get(tp.getFontName());
                if (fontSizes != null) {
                Double TPprob = fontSizes.get(tp.getHeight());
                if (TPprob != null) {
                    probability += TPprob * tp.getText().length();
                }
                }
                */
                Double TPprob = normalizedFonts.get(tp.getFontName());
                Double sizeProb = normalizedSizes.get((int) Math.round(tp.getHeight()));
                if (sizeProb != null) {
                    if (TPprob != null) {
                        TPprob *= sizeProb;
                    } else {
                        TPprob = sizeProb;
                    }
                }
                if (TPprob != null) {
                    probability += TPprob * tp.getText().length();
                }
            }
        }
        probability /= textLength;
        double heightDiff = 1 - Math.abs((this.height() - highestFreqSize) / (maxFontSize - minFontSize));
        lineIsRegularProbability = probability * heightDiff;
    }

    public void categorizeLine(double highestFreqSize, double minFontSize, double maxFontSize,
            Map<String, Map<Integer, Double>> normalizedFontCounts, Map<String, Double> normalizedFonts,
            Map<Integer, Double> normalizedSizes, double avgLeft, double avgRight, double avgWidth,
            double charDensity, double linesPerPage) {
        String text = getText();

        calcLineHeightProbability(highestFreqSize, minFontSize, maxFontSize, normalizedFontCounts, normalizedFonts,
                normalizedSizes, text.length());

        if (text.toUpperCase().equals(text)) {
            lineIsRegularProbability *= 0.90;
        }

        if (!text.matches(".*[A-Za-z].*")) {
            // headings need some text in there.
            lineIsRegularProbability *= 0.8;
        }

        // for regular lines we would be expecting a p() of over 40%
        /*
        double thisHeight = this.height();
        if (lineIsRegularProbability < 0.3 && Math.round(thisHeight) >= highestFreqSize) {
        isHeading = true;
        }
        */
    }

    public double getLineIsRegularProbability() {
        return lineIsRegularProbability;
    }

    @Override
    public void dumpChildren(PrintStream out, int level) {

        StringBuilder sb = new StringBuilder();

        sb.append(StringUtils.repeat("..", level));
        sb.append(getClass().getSimpleName());
        if (isHeading()) {
            sb.append("      (H) ");
        }
        if (sb.length() < 20) {
            sb.append(StringUtils.repeat(' ', 20 - sb.length()));
        }
        sb.append('\t');

        sb.append(getRectangleDebug()).append("\t");

        out.print(sb.toString() + " " + normHistoGramToString() + String.format(" H:%5.1f W:%6.1f D:%4.2f P:%4.2f",
                height(), width(), density(), getLineIsRegularProbability()) + "\t");

        String text;
        text = getText().replace("\n", "\n" + StringUtils.repeat(' ', 43));

        if (text.length() > 256) {
            text = text.substring(0, 256 - 4) + " ...";

        }

        out.println(text);
        /*
          for (Component component : getChildren()) {
        component.dumpChildren(out, level + 1);
          }
          */
    }

}