com.mitzuli.core.ocr.OcrPreprocessor.java Source code

Java tutorial

Introduction

Here is the source code for com.mitzuli.core.ocr.OcrPreprocessor.java

Source

/*
 * Copyright (C) 2014 Mikel Artetxe <artetxem@gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

package com.mitzuli.core.ocr;

import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.opencv.core.Core;
import org.opencv.core.CvType;
import org.opencv.core.Mat;
import org.opencv.core.MatOfPoint;
import org.opencv.core.Rect;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;

import com.googlecode.leptonica.android.Dewarp;
import com.googlecode.leptonica.android.Pix;
import com.mitzuli.Image;

public class OcrPreprocessor {

    // Scalar constants
    private static final Scalar WHITE = new Scalar(255);
    private static final Scalar BLACK = new Scalar(0);
    private static final Scalar BLUE = new Scalar(0, 0, 255);
    private static final Scalar GREEN = new Scalar(0, 255, 0);
    private static final Scalar PURPLE = new Scalar(128, 0, 128);
    private static final Scalar DARK_RED = new Scalar(128, 0, 0);
    private static final Scalar DARK_GREEN = new Scalar(0, 128, 0);

    // Structuring element constants
    private static final Mat KERNEL_3X3 = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3));
    private static final Mat KERNEL_15X15 = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(15, 15));
    private static final Mat KERNEL_30X30 = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(30, 30));

    // Debugging stuff
    private static final boolean DEBUG = false;
    private static final SimpleDateFormat TIMESTAMP = new SimpleDateFormat("yyyy_MM_dd_HH_mm_ss");

    /**
     * Binarizes and cleans the input image for OCR.
     *
     * @param input the input image, which is recycled by this method, so the caller should make a defensive copy of it if necessary.
     * @return the preprocessed image.
     */
    public static Image preprocess(final Image input) {
        return preprocess(input, null);
    }

    /**
     * Binarizes and cleans the input image for OCR, saving debugging images in the given directory.
     *
     * @param input the input image, which is recycled by this method, so the caller should make a defensive copy of it if necessary.
     * @param debugDir the directory to write the debugging images to, or null to disable debugging.
     * @return the preprocessed image.
     */
    static Image preprocess(final Image input, final File debugDir) {
        // TODO Temporary workaround to allow to manually enable debugging (the global final variable should be used)
        boolean DEBUG = debugDir != null;

        // Initialization
        final Mat mat = input.toGrayscaleMat();
        final Mat debugMat = DEBUG ? input.toRgbMat() : null;
        input.recycle();
        final Mat aux = new Mat(mat.size(), CvType.CV_8UC1);
        final Mat binary = new Mat(mat.size(), CvType.CV_8UC1);
        if (DEBUG)
            Image.fromMat(mat).write(new File(debugDir, "1_input.jpg"));

        // Binarize the input image in mat through adaptive Gaussian thresholding
        Imgproc.adaptiveThreshold(mat, binary, 255, Imgproc.ADAPTIVE_THRESH_GAUSSIAN_C, Imgproc.THRESH_BINARY, 51,
                13);
        // Imgproc.adaptiveThreshold(mat, binary, 255, Imgproc.ADAPTIVE_THRESH_GAUSSIAN_C, Imgproc.THRESH_BINARY, 31, 7);

        // Edge detection
        Imgproc.morphologyEx(mat, mat, Imgproc.MORPH_OPEN, KERNEL_3X3); // Open
        Imgproc.morphologyEx(mat, aux, Imgproc.MORPH_CLOSE, KERNEL_3X3); // Close
        Core.addWeighted(mat, 0.5, aux, 0.5, 0, mat); // Average
        Imgproc.morphologyEx(mat, mat, Imgproc.MORPH_GRADIENT, KERNEL_3X3); // Gradient
        Imgproc.threshold(mat, mat, 0, 255, Imgproc.THRESH_BINARY | Imgproc.THRESH_OTSU); // Edge map
        if (DEBUG)
            Image.fromMat(mat).write(new File(debugDir, "2_edges.jpg"));

        // Extract word level connected-components from the dilated edge map
        Imgproc.dilate(mat, mat, KERNEL_3X3);
        if (DEBUG)
            Image.fromMat(mat).write(new File(debugDir, "3_dilated_edges.jpg"));
        final List<MatOfPoint> wordCCs = new ArrayList<MatOfPoint>();
        Imgproc.findContours(mat, wordCCs, new Mat(), Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE);

        // Filter word level connected-components individually and calculate their average attributes
        final List<MatOfPoint> individuallyFilteredWordCCs = new ArrayList<MatOfPoint>();
        final List<MatOfPoint> removedWordCCs = new ArrayList<MatOfPoint>();
        double avgWidth = 0, avgHeight = 0, avgArea = 0;
        for (MatOfPoint cc : wordCCs) {
            final Rect boundingBox = Imgproc.boundingRect(cc);
            if (boundingBox.height >= 6 // bounding box height >= 6
                    && boundingBox.area() >= 50 // bounding box area >= 50
                    && (double) boundingBox.width / (double) boundingBox.height >= 0.25 // bounding box aspect ratio >= 1:4
                    && boundingBox.width <= 0.75 * mat.width() // bounding box width <= 0.75 image width
                    && boundingBox.height <= 0.75 * mat.height()) // bounding box height <= 0.75 image height
            {
                individuallyFilteredWordCCs.add(cc);
                avgWidth += boundingBox.width;
                avgHeight += boundingBox.height;
                avgArea += boundingBox.area();
            } else {
                if (DEBUG)
                    removedWordCCs.add(cc);
            }
        }
        wordCCs.clear();
        avgWidth /= individuallyFilteredWordCCs.size();
        avgHeight /= individuallyFilteredWordCCs.size();
        avgArea /= individuallyFilteredWordCCs.size();
        if (DEBUG) {
            Imgproc.drawContours(debugMat, removedWordCCs, -1, BLUE, -1);
            removedWordCCs.clear();
        }

        // Filter word level connected-components in relation to their average attributes
        final List<MatOfPoint> filteredWordCCs = new ArrayList<MatOfPoint>();
        for (MatOfPoint cc : individuallyFilteredWordCCs) {
            final Rect boundingBox = Imgproc.boundingRect(cc);
            if (boundingBox.width >= 0.125 * avgWidth // bounding box width >= 0.125 average width
                    && boundingBox.width <= 8 * avgWidth // bounding box width <= 8 average width
                    && boundingBox.height >= 0.25 * avgHeight // bounding box height >= 0.25 average height
                    && boundingBox.height <= 4 * avgHeight) // bounding box height <= 4 average height
            {
                filteredWordCCs.add(cc);
            } else {
                if (DEBUG)
                    removedWordCCs.add(cc);
            }
        }
        individuallyFilteredWordCCs.clear();
        if (DEBUG) {
            Imgproc.drawContours(debugMat, filteredWordCCs, -1, GREEN, -1);
            Imgproc.drawContours(debugMat, removedWordCCs, -1, PURPLE, -1);
            removedWordCCs.clear();
        }

        // Extract paragraph level connected-components
        mat.setTo(BLACK);
        Imgproc.drawContours(mat, filteredWordCCs, -1, WHITE, -1);
        final List<MatOfPoint> paragraphCCs = new ArrayList<MatOfPoint>();
        Imgproc.morphologyEx(mat, aux, Imgproc.MORPH_CLOSE, KERNEL_30X30);
        Imgproc.findContours(aux, paragraphCCs, new Mat(), Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE);

        // Filter paragraph level connected-components according to the word level connected-components inside
        final List<MatOfPoint> textCCs = new ArrayList<MatOfPoint>();
        for (MatOfPoint paragraphCC : paragraphCCs) {
            final List<MatOfPoint> wordCCsInParagraphCC = new ArrayList<MatOfPoint>();
            aux.setTo(BLACK);
            Imgproc.drawContours(aux, Collections.singletonList(paragraphCC), -1, WHITE, -1);
            Core.bitwise_and(mat, aux, aux);
            Imgproc.findContours(aux, wordCCsInParagraphCC, new Mat(), Imgproc.RETR_EXTERNAL,
                    Imgproc.CHAIN_APPROX_SIMPLE);
            final Rect boundingBox = Imgproc.boundingRect(paragraphCC);
            final double center = mat.size().width / 2;
            final double distToCenter = center > boundingBox.x + boundingBox.width
                    ? center - boundingBox.x - boundingBox.width
                    : center < boundingBox.x ? boundingBox.x - center : 0.0;
            if (DEBUG) {
                System.err.println("****************************************");
                System.err.println("\tArea:                " + boundingBox.area());
                System.err.println("\tDistance to center:  " + distToCenter);
                System.err.println("\tCCs inside:          " + wordCCsInParagraphCC.size());
            }
            if ((wordCCsInParagraphCC.size() >= 10 || wordCCsInParagraphCC.size() >= 0.3 * filteredWordCCs.size())
                    && mat.size().width / distToCenter >= 4) {
                textCCs.addAll(wordCCsInParagraphCC);
                if (DEBUG) {
                    System.err.println("\tText:                YES");
                    Imgproc.drawContours(debugMat, Collections.singletonList(paragraphCC), -1, DARK_GREEN, 5);
                }
            } else {
                if (DEBUG) {
                    System.err.println("\tText:                NO");
                    Imgproc.drawContours(debugMat, Collections.singletonList(paragraphCC), -1, DARK_RED, 5);
                }
            }
        }
        filteredWordCCs.clear();
        paragraphCCs.clear();
        mat.setTo(WHITE);
        Imgproc.drawContours(mat, textCCs, -1, BLACK, -1);
        textCCs.clear();
        if (DEBUG)
            Image.fromMat(debugMat).write(new File(debugDir, "4_filtering.jpg"));

        // Obtain the final text mask from the filtered connected-components
        Imgproc.erode(mat, mat, KERNEL_15X15);
        Imgproc.morphologyEx(mat, mat, Imgproc.MORPH_OPEN, KERNEL_30X30);
        if (DEBUG)
            Image.fromMat(mat).write(new File(debugDir, "5_text_mask.jpg"));

        // Apply the text mask to the binarized image
        if (DEBUG)
            Image.fromMat(binary).write(new File(debugDir, "6_binary.jpg"));
        binary.setTo(WHITE, mat);
        if (DEBUG)
            Image.fromMat(binary).write(new File(debugDir, "7_binary_text.jpg"));

        // Dewarp the text using Leptonica
        Pix pixs = Image.fromMat(binary).toGrayscalePix();
        Pix pixsDewarp = Dewarp.dewarp(pixs, 0, Dewarp.DEFAULT_SAMPLING, 5, true);
        final Image result = Image.fromGrayscalePix(pixsDewarp);
        if (DEBUG)
            result.write(new File(debugDir, "8_dewarp.jpg"));

        // Clean up
        pixs.recycle();
        mat.release();
        aux.release();
        binary.release();
        if (debugMat != null)
            debugMat.release();

        return result;
    }

}