com.fun.util.TesseractUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.fun.util.TesseractUtil.java

Source

/*
 * Copyright 2014 Qunar.com All right reserved. This software is the
 * confidential and proprietary information of Qunar.com ("Confidential
 * Information"). You shall not disclose such Confidential Information and shall
 * use it only in accordance with the terms of the license agreement you entered
 * into with Qunar.com.
 */
package com.fun.util;

import com.google.common.base.Charsets;
import com.google.common.io.Files;
import org.apache.commons.lang3.StringUtils;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

/**
 * ??tesseract??
 *
 * @author: reeboo
 * @since: 2016-08-16 19:25
 */
public class TesseractUtil {

    /**
     * 
     *
     * @param imageFile
     * @return
     * @throws java.io.IOException
     */
    public static String recognize(File imageFile) throws IOException {
        return recognize(imageFile, 3, true);
    }

    /**
     * url
     * @param url
     * @return
     * @throws IOException
     */
    public static String recognize(URL url) throws IOException {
        File tmpFile = getFileFromUrl(url);
        String result = recognize(tmpFile);
        tmpFile.delete();
        return result;
    }

    /**
     * 
     *
     * @param imageFile
     * @param enlargeTimes
     * @param isEnlarge
     * @return
     * @throws IOException
     */
    public static String recognize(File imageFile, int enlargeTimes, boolean isEnlarge) throws IOException {
        // 3???
        File tmpScaledImage = File.createTempFile("tesseract-ocr-scaled", null);
        tmpScaledImage.deleteOnExit();
        if (isEnlarge)
            scaled(imageFile, enlargeTimes, tmpScaledImage);

        // 
        File tmpOutputBase = new File(tmpScaledImage.getAbsolutePath() + ".out");
        File tmpOutputText = new File(tmpScaledImage.getAbsolutePath() + ".out.txt");

        try {
            int exitCode = Runtime.getRuntime().exec(new String[] { "tesseract", // command
                    tmpScaledImage.getAbsolutePath(), // imagename
                    tmpOutputBase.getAbsolutePath(), // outputbase
                    "-psm", "8", // pagesegmode, treat the image as a single word
            }).waitFor();
            tmpScaledImage.delete(); // 
        } catch (Exception e) {
            e.printStackTrace();
        }

        // ?
        String text = Files.readFirstLine(tmpOutputText, Charsets.UTF_8);
        tmpOutputText.delete(); // 
        if (StringUtils.isNotBlank(text)) {
            return text.trim().replaceAll("\\s|,", "");
        }
        tmpOutputBase.delete();

        return StringUtils.EMPTY;
    }

    /**
     * 
     *
     * @param imageFile
     * @param times
     * @param targetFile
     * @throws IOException
     */
    private static void scaled(File imageFile, int times, File targetFile) throws IOException {
        BufferedImage image = ImageIO.read(imageFile);
        int targetWidth = image.getWidth() * times;
        int targetHeight = image.getHeight() * times;
        int type = (image.getTransparency() == Transparency.OPAQUE) ? BufferedImage.TYPE_INT_RGB
                : BufferedImage.TYPE_INT_ARGB;
        BufferedImage tmp = new BufferedImage(targetWidth, targetHeight, type);
        Graphics2D g2 = tmp.createGraphics();
        g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
        g2.drawImage(image, 0, 0, targetWidth, targetHeight, null);
        g2.dispose();
        ImageIO.write(tmp, "png", targetFile);
    }

    /**
     * url?
     *
     * @param url
     * @return
     * @throws IOException
     */
    private static File getFileFromUrl(URL url) throws IOException {
        File tmpImage = File.createTempFile("tesseract-ocr-download", null);
        InputStream in = url.openConnection().getInputStream();
        FileOutputStream fos = new FileOutputStream(tmpImage);
        byte[] buf = new byte[1024];
        int len = 0;
        while ((len = in.read(buf)) != -1) {
            fos.write(buf, 0, len);
        }
        fos.flush();
        fos.close();
        return tmpImage;
    }

}