List of usage examples for java.awt Shape toString
public String toString()
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
void removeOversizedShapes(List<Shape> shapes) { LOG.debug("########## removeOversizedShapes #########"); Mean shapeHeightMean = new Mean(); Mean shapeWidthMean = new Mean(); for (Shape shape : shapes) { shapeHeightMean.increment(shape.getHeight()); shapeWidthMean.increment(shape.getWidth()); }/*from w ww.ja v a 2s. com*/ double heightMean = shapeHeightMean.getResult(); double widthMean = shapeWidthMean.getResult(); LOG.debug("heightMean: " + heightMean); LOG.debug("widthMean: " + widthMean); shapeHeightMean = new Mean(); shapeWidthMean = new Mean(); StandardDeviation shapeHeightStdDev = new StandardDeviation(); for (Shape shape : shapes) { if (shape.getHeight() > heightMean && shape.getHeight() < (heightMean * 2.0) && shape.getWidth() > widthMean && shape.getWidth() < (widthMean * 2.0)) { shapeHeightMean.increment(shape.getHeight()); shapeHeightStdDev.increment(shape.getHeight()); shapeWidthMean.increment(shape.getWidth()); } } heightMean = shapeHeightMean.getResult(); widthMean = shapeWidthMean.getResult(); LOG.debug("average shape heightMean: " + heightMean); LOG.debug("average shape widthMean: " + widthMean); double minHeightBigShape = heightMean * 6; double minWidthWideShape = widthMean * 6; double minHeightWideShape = heightMean * 1.5; double minHeightTallShape = heightMean * 2.5; double maxWidthTallShape = widthMean / 2; LOG.debug("minHeightBigShape: " + minHeightBigShape); LOG.debug("minWidthWideShape: " + minWidthWideShape); LOG.debug("minHeightWideShape: " + minHeightWideShape); LOG.debug("minHeightTallShape: " + minHeightTallShape); LOG.debug("maxWidthTallShape: " + maxWidthTallShape); List<Shape> largeShapes = new ArrayList<Shape>(); List<Shape> horizontalRules = new ArrayList<Shape>(); for (Shape shape : shapes) { if (shape.getHeight() > minHeightBigShape) { LOG.debug("Removing " + shape + " (height)"); largeShapes.add(shape); } else if (shape.getWidth() > minWidthWideShape && shape.getHeight() > minHeightWideShape) { // we don't want to remove horizontal bars, but we do want to remove other shapes. // why not? I suppose horizontal bars are easily represented as characters? LOG.debug("Removing " + shape + " (width)"); largeShapes.add(shape); } else if (shape.getWidth() > minWidthWideShape) { // ok, we will remove horizontal rules after all LOG.debug("Removing " + shape + " (horizontal rule)"); largeShapes.add(shape); horizontalRules.add(shape); } else if (shape.getWidth() <= maxWidthTallShape && shape.getHeight() > minHeightTallShape) { LOG.debug("Removing " + shape + " (narrow)"); largeShapes.add(shape); } } // Only want to remove enclosed shapes if the large shape isn't a frame/grid // A) first reduce the shape by 5 percent and see it's cardinality reduces vastly (in which case it's a frame) // if so, don't remove enclosed shapes // B) next, detect white rectangles within the shape - if they're big enough, don't remove enclosed shapes LOG.debug("Are large shapes frames or illustrations?"); double maxFrameCardinalityRatio = 0.5; double minFrameWhiteAreaSizeRatio = 0.9; List<Shape> illustrations = new ArrayList<Shape>(largeShapes); for (Shape largeShape : largeShapes) { LOG.debug(largeShape.toString()); int xOrigin = largeShape.getStartingPoint()[0] - largeShape.getLeft(); int yOrigin = largeShape.getStartingPoint()[1] - largeShape.getTop(); Shape dummyShape = graphicsService.getDot(sourceImage, xOrigin, yOrigin); // We want to fill up a mirror of the contiguous pixels within this shape, // which is what we'll use for further analysis to know // if it's a frame or not. WritableImageGrid mirror = graphicsService.getEmptyMirror(largeShape); this.findContiguousPixels(largeShape, mirror, dummyShape, xOrigin, yOrigin, sourceImage.getSeparationThreshold()); int adjustedLeft = (int) Math.round((double) mirror.getWidth() * 0.05); int adjustedRight = (int) Math.round((double) mirror.getWidth() * 0.95); int adjustedTop = (int) Math.round((double) mirror.getHeight() * 0.05); int adjustedBottom = (int) Math.round((double) mirror.getHeight() * 0.95); int cardinality = 0; int innerCardinality = 0; for (int x = 0; x < mirror.getWidth(); x++) { for (int y = 0; y < mirror.getHeight(); y++) { if (mirror.getPixel(x, y) > 0) { cardinality++; if (x >= adjustedLeft && x <= adjustedRight && y >= adjustedTop && y <= adjustedBottom) innerCardinality++; } } } LOG.debug("cardinality: " + cardinality); LOG.debug("innerCardinality: " + innerCardinality); double ratio = (double) innerCardinality / (double) cardinality; LOG.debug("ratio: " + ratio); if (ratio <= maxFrameCardinalityRatio) { LOG.debug("maxFrameCardinalityRatio: " + maxFrameCardinalityRatio); LOG.debug("Frame by cardinality! Removing from illustrations"); illustrations.remove(largeShape); } else { // Now, it could still be a grid // to find this out we need to detect white areas inside the shape. WhiteAreaFinder whiteAreaFinder = new WhiteAreaFinder(); double minWhiteAreaWidth = widthMean * 10; double minWhiteAreaHeight = heightMean * 4; List<Rectangle> whiteAreas = whiteAreaFinder.getWhiteAreas(mirror, 0, 0, 0, mirror.getWidth() - 1, mirror.getHeight() - 1, minWhiteAreaWidth, minWhiteAreaHeight); int whiteAreaSize = 0; for (Rectangle whiteArea : whiteAreas) { whiteAreaSize += (whiteArea.getWidth() * whiteArea.getHeight()); } int totalSize = mirror.getWidth() * mirror.getHeight(); LOG.debug("whiteAreaSize: " + whiteAreaSize); LOG.debug("totalSize: " + totalSize); double sizeRatio = (double) whiteAreaSize / (double) totalSize; LOG.debug("sizeRatio: " + sizeRatio); if (sizeRatio >= minFrameWhiteAreaSizeRatio) { LOG.debug("minFrameWhiteAreaSizeRatio: " + minFrameWhiteAreaSizeRatio); LOG.debug("Frame by white area size! Removing from illustrations"); illustrations.remove(largeShape); } } } for (Shape largeShape : illustrations) { // Add this to large shapes if it's not a "frame" // large shapes are used for paragraph detection sourceImage.getLargeShapes().add(largeShape); } // remove shapes that are enclosed inside illustrations List<Shape> enclosedShapesToDelete = new ArrayList<Shape>(); int extension = 5; for (Shape shape : shapes) { for (Shape shapeToDelete : illustrations) { if (shape.getLeft() >= shapeToDelete.getLeft() - extension && shape.getRight() <= shapeToDelete.getRight() + extension && shape.getTop() >= shapeToDelete.getTop() - extension && shape.getBottom() <= shapeToDelete.getBottom() + extension) { LOG.debug("Enclosed shape: " + shape); LOG.debug(" enclosed by " + shapeToDelete); enclosedShapesToDelete.add(shape); } } } shapes.removeAll(largeShapes); shapes.removeAll(enclosedShapesToDelete); // remove shapes that are practically touching horizontal rules (probably segments of the rule that got split) extension = 3; List<Shape> listToTestAgainst = horizontalRules; for (int i = 0; i < 3; i++) { List<Shape> horizontalRuleSegments = new ArrayList<Shape>(); for (Shape horizontalRule : listToTestAgainst) { for (Shape shape : shapes) { if ((shape.getLeft() <= horizontalRule.getRight() + extension || shape.getRight() >= horizontalRule.getLeft() - extension) && shape.getTop() >= horizontalRule.getTop() - extension && shape.getBottom() <= horizontalRule.getBottom() + extension) { LOG.debug("Horizontal rule segment: " + shape); LOG.debug(" touching " + horizontalRule); horizontalRuleSegments.add(shape); enclosedShapesToDelete.add(shape); } } } shapes.removeAll(horizontalRuleSegments); listToTestAgainst = horizontalRuleSegments; if (listToTestAgainst.size() == 0) break; } }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
/** * Split a shape into 2 or more shapes, in the case where two letters have been mistakenly joined together. * @param shape the shape to split//w w w .j ava 2s . c o m * @param sourceImage the source image containing this shape * @param maxBridgeWidth maximum width of a bridge between the two letters (measured vertically) * @param minLetterWeight minimum pixel count for a shape portion to be counted a separate letter * @param maxOverlap maximum vertical overlap (in pixels) between a right-hand and left-hand shape to be counted as separate letters * @return List of Shape, where the list is empty if no split was performed */ List<Shape> splitShape(Shape shape, SourceImage sourceImage, int maxBridgeWidth, int minLetterWeight, int maxOverlap) { LOG.debug("Trying to split shape: " + shape.toString()); LOG.debug("maxBridgeWidth " + maxBridgeWidth); LOG.debug("minLetterWeight " + minLetterWeight); LOG.debug("maxOverlap " + maxOverlap); Collection<BridgeCandidate> bridgeCandidates = ((ShapeInternal) shape).getBridgeCandidates(maxBridgeWidth); if (bridgeCandidates.size() > 0) { // (B) weight of right shape & weight of left shape > a certain threshold // (C) little overlap right boundary of left shape, left boundary of right shape // check if the right and left weight of each bridge candidate is sufficiently big LOG.debug("minLetterWeight: " + minLetterWeight); LOG.debug("maxOverlap: " + maxOverlap); LOG.debug("Eliminating candidates based on pixel count and overlap"); Set<BridgeCandidate> candidatesToEliminate = new HashSet<BridgeCandidate>(); for (BridgeCandidate candidate : bridgeCandidates) { LOG.debug("Bridge candidate: leftPixels = " + candidate.leftPixels + ", rightPixels = " + candidate.rightPixels); LOG.debug("leftShapeRightBoundary = " + candidate.leftShapeRightBoundary + ", rightShapeLeftBoundary = " + candidate.rightShapeLeftBoundary); boolean isBridge = true; if (candidate.rightPixels < minLetterWeight || candidate.leftPixels < minLetterWeight) isBridge = false; if (candidate.leftShapeRightBoundary - candidate.rightShapeLeftBoundary > maxOverlap) isBridge = false; if (!isBridge) candidatesToEliminate.add(candidate); } bridgeCandidates.removeAll(candidatesToEliminate); LOG.debug("Remaining bridge candidate size: " + bridgeCandidates.size()); } // have candidates List<Shape> shapes = new ArrayList<Shape>(); // apply any splits detected if (bridgeCandidates.size() > 0) { int[] startingPoint = shape.getStartingPoint(); int startX = startingPoint[0]; int startY = startingPoint[1]; for (BridgeCandidate bridge : bridgeCandidates) { bridge.leftGroup.touched = false; bridge.rightGroup.touched = false; } // perform split for (BridgeCandidate bridge : bridgeCandidates) { Shape leftShape = graphicsService.getDot(sourceImage, startX, startY); leftShape.setLeft(shape.getRight()); leftShape.setRight(shape.getLeft()); leftShape.setTop(shape.getBottom()); leftShape.setBottom(shape.getTop()); Shape rightShape = graphicsService.getDot(sourceImage, startX, startY); rightShape.setLeft(shape.getRight()); rightShape.setRight(shape.getLeft()); rightShape.setTop(shape.getBottom()); rightShape.setBottom(shape.getTop()); Stack<VerticalLineGroup> groupStack = new Stack<VerticalLineGroup>(); groupStack.push(bridge.leftGroup); while (!groupStack.isEmpty()) { VerticalLineGroup lineGroup = groupStack.pop(); if (lineGroup.touched) continue; lineGroup.touched = true; LOG.debug("Touching group, pixelCount: " + lineGroup.pixelCount + ", leftBoundary: " + lineGroup.leftBoundary + ", rightBoundary: " + lineGroup.rightBoundary); if (shape.getLeft() + lineGroup.leftBoundary < leftShape.getLeft()) leftShape.setLeft(shape.getLeft() + lineGroup.leftBoundary); if (shape.getLeft() + lineGroup.rightBoundary > leftShape.getRight()) leftShape.setRight(shape.getLeft() + lineGroup.rightBoundary); if (shape.getTop() + lineGroup.topBoundary < leftShape.getTop()) leftShape.setTop(shape.getTop() + lineGroup.topBoundary); if (shape.getTop() + lineGroup.bottomBoundary > leftShape.getBottom()) leftShape.setBottom(shape.getTop() + lineGroup.bottomBoundary); for (BridgeCandidate leftCandidate : lineGroup.leftCandidates) { if (!bridge.equals(leftCandidate) && !(bridgeCandidates.contains(leftCandidate))) { groupStack.push(leftCandidate.leftGroup); } } for (BridgeCandidate rightCandidate : lineGroup.rightCandidates) { if (!bridge.equals(rightCandidate) && !(bridgeCandidates.contains(rightCandidate))) { groupStack.push(rightCandidate.rightGroup); } } } // next left group groupStack.push(bridge.rightGroup); while (!groupStack.isEmpty()) { VerticalLineGroup lineGroup = groupStack.pop(); if (lineGroup.touched) continue; lineGroup.touched = true; LOG.debug("Touching group, pixelCount: " + lineGroup.pixelCount + ", leftBoundary: " + lineGroup.leftBoundary + ", rightBoundary: " + lineGroup.rightBoundary); if (shape.getLeft() + lineGroup.leftBoundary < rightShape.getLeft()) rightShape.setLeft(shape.getLeft() + lineGroup.leftBoundary); if (shape.getLeft() + lineGroup.rightBoundary > rightShape.getRight()) rightShape.setRight(shape.getLeft() + lineGroup.rightBoundary); if (shape.getTop() + lineGroup.topBoundary < rightShape.getTop()) rightShape.setTop(shape.getTop() + lineGroup.topBoundary); if (shape.getTop() + lineGroup.bottomBoundary > rightShape.getBottom()) rightShape.setBottom(shape.getTop() + lineGroup.bottomBoundary); for (BridgeCandidate leftCandidate : lineGroup.leftCandidates) { if (!bridge.equals(leftCandidate) && !(bridgeCandidates.contains(leftCandidate))) { groupStack.push(leftCandidate.leftGroup); } } for (BridgeCandidate rightCandidate : lineGroup.rightCandidates) { if (!bridge.equals(rightCandidate) && !(bridgeCandidates.contains(rightCandidate))) { groupStack.push(rightCandidate.rightGroup); } } } // next right group if (leftShape.getWidth() > 0) { LOG.debug("Adding left split: " + leftShape); shapes.add(leftShape); } if (rightShape.getWidth() > 0) { LOG.debug("Adding right split: " + rightShape); shapes.add(rightShape); } } // next bridge } // do we have any bridges? // TODO: we need to join split shapes back together when more than 1 split is applied // and the shape in the middle is too small on its own (< minPixelCount) return shapes; }
From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java
/** * Assign guidelines for a certain subset of shapes, and return the x-height. * @param startShape/* w w w. j a v a2 s .c om*/ * @param endShape * @return */ int assignGuideLines(List<GroupOfShapes> groupsToAssign) { LOG.debug("assignGuideLines internal"); double meanHorizontalSlope = this.getContainer().getMeanHorizontalSlope(); // the base-line and mean-line will be at a fixed distance away from the midpoint // the question is, which distance! // To find this out, we count number of black pixels on each row above this line // And then start analysing from the top and the bottom until the number drops off sharply // The notion of "groupsToAssign" is used to only assign guidelines // to a subset of the groups on the line // when the line contains two different font sizes List<Shape> shapes = new ArrayList<Shape>(); if (groupsToAssign != null) { for (GroupOfShapes group : groupsToAssign) { shapes.addAll(group.getShapes()); } } else { shapes = this.getShapes(); } int i = 0; DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics(); DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics(); for (Shape shape : this.getShapes()) { shapeWidthStats.addValue(shape.getWidth()); shapeHeightStats.addValue(shape.getHeight()); } double minWidth = shapeWidthStats.getPercentile(25); double maxWidth = shapeWidthStats.getPercentile(75); double minHeight = shapeHeightStats.getPercentile(45); double maxHeight = shapeHeightStats.getPercentile(75); double rowMidPointX = (double) (this.getLeft() + this.getRight()) / 2.0; // calculating the Y midpoint by the shapes in the row, instead of by the top & bottom of row Mean rowMidPointYMean = new Mean(); for (Shape shape : this.getShapes()) { // only add points whose shape is of "average" width and height (to leave out commas, etc.) if (shape.getWidth() >= minWidth && shape.getWidth() <= maxWidth && shape.getHeight() >= minHeight && shape.getHeight() <= maxHeight) { rowMidPointYMean.increment((double) (shape.getBottom() + shape.getTop()) / 2.0); } } double rowMidPointY = (double) (this.getTop() + this.getBottom()) / 2.0; if (rowMidPointYMean.getN() > 0) rowMidPointY = rowMidPointYMean.getResult(); LOG.debug("rowMidPointX: " + rowMidPointX); LOG.debug("rowMidPointY: " + rowMidPointY); // figure out where the top-most shape starts and the bottom-most shape ends, relative to the y midline int minTop = Integer.MAX_VALUE; int maxBottom = Integer.MIN_VALUE; List<Integer> rowYMidPoints = new ArrayList<Integer>(shapes.size()); for (Shape shape : shapes) { double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0; int shapeMidPointY = (int) Math .round(rowMidPointY + (meanHorizontalSlope * (shapeMidPointX - rowMidPointX))); rowYMidPoints.add(shapeMidPointY); int relativeTop = shape.getTop() - shapeMidPointY; int relativeBottom = shape.getBottom() - shapeMidPointY; if (relativeTop < minTop) minTop = relativeTop; if (relativeBottom > maxBottom) maxBottom = relativeBottom; } if (minTop > 0) minTop = 0; if (maxBottom < 0) maxBottom = 0; int yIntervalTop = 0 - minTop; int yIntervalBottom = maxBottom; int yInterval = yIntervalTop + 1 + yIntervalBottom; LOG.debug("yIntervalTop: " + yIntervalTop); LOG.debug("yIntervalBottom: " + yIntervalBottom); LOG.debug("yInterval: " + yInterval); int[] pixelCounts = new int[yInterval]; // Get the pixel count for each row // examining one shape at a time to limit ourselves to the pixels that are // actually considered to be in this row int blackThreshold = this.getContainer().getSeparationThreshold(); int shapeIndex = 0; int shapeCount = 0; for (Shape shape : shapes) { if (shape.getHeight() >= minHeight) { LOG.trace(shape.toString()); shapeCount++; int shapeMidPointY = rowYMidPoints.get(shapeIndex); int zeroLine = shapeMidPointY - yIntervalTop; int topIndex = shape.getTop() - zeroLine; for (int x = 0; x < shape.getWidth(); x++) { for (int y = 0; y < shape.getHeight(); y++) { int yIndex = topIndex + y; if (yIndex >= 0 && yIndex < pixelCounts.length && shape.isPixelBlack(x, y, blackThreshold)) { pixelCounts[yIndex]++; } } } } shapeIndex++; } LOG.debug("Got pixels from " + shapeCount + " shapes."); boolean notEnoughShapes = shapeCount < 3; LOG.debug("notEnoughShapes? " + notEnoughShapes); // We start at the top // As soon as we reach a line with more pixels than the mean, we assume this is the mean-line Mean pixelCountMeanTop = new Mean(); StandardDeviation pixelCountStdDevTop = new StandardDeviation(); for (i = 0; i <= yIntervalTop; i++) { pixelCountMeanTop.increment(pixelCounts[i]); pixelCountStdDevTop.increment(pixelCounts[i]); } LOG.debug("Top: pixel count mean: " + pixelCountMeanTop.getResult() + ", std dev: " + pixelCountStdDevTop.getResult()); double threshold = pixelCountMeanTop.getResult() * 1.1; if (notEnoughShapes) { threshold = threshold / 2.0; } double lowerThreshold = threshold / 2.0; LOG.debug("Top threshold: " + threshold); LOG.debug("Top lowerThreshold: " + lowerThreshold); int meanLine = 0; boolean findMeanLine = true; for (i = 0; i <= yIntervalTop; i++) { int pixelCount = pixelCounts[i]; if (findMeanLine && pixelCount > threshold) { meanLine = i; findMeanLine = false; } else if (!findMeanLine && pixelCount < lowerThreshold) { findMeanLine = true; } } // We start at the bottom // As soon as we reach a line with more pixels than the mean, we assume this is the base-line Mean pixelCountMeanBottom = new Mean(); StandardDeviation pixelCountStdDevBottom = new StandardDeviation(); for (i = pixelCounts.length - 1; i >= yIntervalTop; i--) { pixelCountMeanBottom.increment(pixelCounts[i]); pixelCountStdDevBottom.increment(pixelCounts[i]); } LOG.debug("Bottom: pixel count mean: " + pixelCountMeanBottom.getResult() + ", std dev: " + pixelCountStdDevBottom.getResult()); threshold = pixelCountMeanBottom.getResult() * 1.1; if (notEnoughShapes) { threshold = threshold / 2.0; } lowerThreshold = threshold / 2.0; LOG.debug("Bottom threshold: " + threshold); LOG.debug("Bottom lowerThreshold: " + lowerThreshold); int baseLine = meanLine; boolean findBaseLine = true; for (i = pixelCounts.length - 1; i >= yIntervalTop; i--) { int pixelCount = pixelCounts[i]; if (findBaseLine && pixelCount > threshold) { baseLine = i; findBaseLine = false; } else if (!findBaseLine && pixelCount < lowerThreshold) { findBaseLine = true; } } for (i = 0; i < yInterval; i++) { int pixelCount = pixelCounts[i]; if (i == meanLine) LOG.trace("======= MEAN LINE " + i + " =========="); LOG.trace("pixel row " + i + ". pixel count " + pixelCount); if (i == baseLine) LOG.trace("======= BASE LINE " + i + " =========="); } // assign base lines and mean lines to each shape shapeIndex = 0; for (Shape shape : shapes) { int shapeMidPointY = rowYMidPoints.get(shapeIndex); int yMeanline = (shapeMidPointY - yIntervalTop) + meanLine; int yBaseline = (shapeMidPointY - yIntervalTop) + baseLine; LOG.trace(shape.toString() + ", meanLine: " + (yMeanline - shape.getTop()) + ", baseLine: " + (yBaseline - shape.getTop())); shape.setBaseLine(yBaseline - shape.getTop()); shape.setMeanLine(yMeanline - shape.getTop()); shapeIndex++; } // next shape int xHeight = baseLine - meanLine; return xHeight; }