com.app.azza.ocr.OcrCaptureActivity.java Source code

Java tutorial

Introduction

Here is the source code for com.app.azza.ocr.OcrCaptureActivity.java

Source

/*
 * Copyright (C) The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.app.azza.ocr;

import android.Manifest;
import android.annotation.SuppressLint;
import android.app.Activity;
import android.app.AlertDialog;
import android.app.Dialog;
import android.content.Context;
import android.content.DialogInterface;
import android.content.Intent;
import android.content.IntentFilter;
import android.content.pm.PackageManager;
import android.hardware.Camera;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.support.annotation.NonNull;
import android.support.design.widget.Snackbar;
import android.support.v4.app.ActivityCompat;
import android.support.v7.app.AppCompatActivity;
import android.util.Log;
import android.view.GestureDetector;
import android.view.KeyEvent;
import android.view.MotionEvent;
import android.view.ScaleGestureDetector;
import android.view.View;
import android.widget.Toast;

import com.app.azza.ocr.ui.camera.CameraSource;
import com.app.azza.ocr.ui.camera.CameraSourcePreview;
import com.app.azza.ocr.ui.camera.GraphicOverlay;
import com.google.android.gms.common.ConnectionResult;
import com.google.android.gms.common.GoogleApiAvailability;
import com.google.android.gms.vision.text.Text;
import com.google.android.gms.vision.text.TextBlock;
import com.google.android.gms.vision.text.TextRecognizer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;

/**
 * Activity for the Ocr Detecting app.  This app detects text and displays the value with the
 * rear facing camera. During detection overlay graphics are drawn to indicate the position,
 * size, and contents of each TextBlock.
 */
public final class OcrCaptureActivity extends AppCompatActivity {
    private static final String TAG = "OcrCaptureActivity";

    // Intent request code to handle updating play services if needed.
    private static final int RC_HANDLE_GMS = 9001;

    // Permission request codes need to be < 256
    private static final int RC_HANDLE_CAMERA_PERM = 2;

    // Constants used to pass extra data in the intent
    public static final String AutoFocus = "AutoFocus";
    public static final String UseFlash = "UseFlash";
    public static final String TextBlockObject = "String";

    private CameraSource mCameraSource;
    private CameraSourcePreview mPreview;
    private GraphicOverlay<OcrGraphic> mGraphicOverlay;

    // Helper objects for detecting taps and pinches.
    private ScaleGestureDetector scaleGestureDetector;
    private GestureDetector gestureDetector;

    // A TextToSpeech engine for speaking a String value.
    private TextToSpeech tts;
    private boolean screenClickedToStop = true;
    private int screenClicksCounter = -1;
    private int lineNumberToSpeak = 0;
    //private   List mtextList;

    /**
     * Initializes the UI and creates the detector pipeline.
     */
    @Override
    public void onCreate(Bundle bundle) {
        super.onCreate(bundle);
        setContentView(R.layout.ocr_capture);

        mPreview = (CameraSourcePreview) findViewById(R.id.preview);
        mGraphicOverlay = (GraphicOverlay<OcrGraphic>) findViewById(R.id.graphicOverlay);

        // Set good defaults for capturing text.
        boolean autoFocus = true;
        boolean useFlash = false;

        // Check for the camera permission before accessing the camera.  If the
        // permission is not granted yet, request permission.
        int rc = ActivityCompat.checkSelfPermission(this, Manifest.permission.CAMERA);
        if (rc == PackageManager.PERMISSION_GRANTED) {
            createCameraSource(autoFocus, useFlash);
        } else {
            requestCameraPermission();
        }

        gestureDetector = new GestureDetector(this, new CaptureGestureListener());
        scaleGestureDetector = new ScaleGestureDetector(this, new ScaleListener());

        Snackbar.make(mGraphicOverlay, "Tap to Speak. Pinch/Stretch to zoom", Snackbar.LENGTH_LONG).show();

        // Set up the Text To Speech engine.
        TextToSpeech.OnInitListener listener = new TextToSpeech.OnInitListener() {
            @Override
            public void onInit(final int status) {
                if (status == TextToSpeech.SUCCESS) {
                    Log.d("OnInitListener", "Text to speech engine started successfully.");
                    tts.setLanguage(Locale.US);
                } else {
                    Log.d("OnInitListener", "Error starting the text to speech engine.");
                }
            }
        };
        tts = new TextToSpeech(this.getApplicationContext(), listener);

    }

    /**
     * Handles the requesting of the camera permission.  This includes
     * showing a "Snackbar" message of why the permission is needed then
     * sending the request.
     */
    private void requestCameraPermission() {
        Log.w(TAG, "Camera permission is not granted. Requesting permission");

        final String[] permissions = new String[] { Manifest.permission.CAMERA };

        if (!ActivityCompat.shouldShowRequestPermissionRationale(this, Manifest.permission.CAMERA)) {
            ActivityCompat.requestPermissions(this, permissions, RC_HANDLE_CAMERA_PERM);
            return;
        }

        final Activity thisActivity = this;

        View.OnClickListener listener = new View.OnClickListener() {
            @Override
            public void onClick(View view) {
                ActivityCompat.requestPermissions(thisActivity, permissions, RC_HANDLE_CAMERA_PERM);
            }
        };

        Snackbar.make(mGraphicOverlay, R.string.permission_camera_rationale, Snackbar.LENGTH_INDEFINITE)
                .setAction(R.string.ok, listener).show();
    }

    @Override
    public boolean onTouchEvent(MotionEvent e) {
        boolean b = scaleGestureDetector.onTouchEvent(e);

        boolean c = gestureDetector.onTouchEvent(e);

        return b || c || super.onTouchEvent(e);
    }

    /**
     * Creates and starts the camera.  Note that this uses a higher resolution in comparison
     * to other detection examples to enable the ocr detector to detect small text samples
     * at long distances.
     *
     * Suppressing InlinedApi since there is a check that the minimum version is met before using
     * the constant.
     */
    @SuppressLint("InlinedApi")
    private void createCameraSource(boolean autoFocus, boolean useFlash) {
        Context context = getApplicationContext();

        // A text recognizer is created to find text.  An associated multi-processor instance
        // is set to receive the text recognition results, track the text, and maintain
        // graphics for each text block on screen.  The factory is used by the multi-processor to
        // create a separate tracker instance for each text block.
        TextRecognizer textRecognizer = new TextRecognizer.Builder(context).build();
        textRecognizer.setProcessor(new OcrDetectorProcessor(mGraphicOverlay));

        if (!textRecognizer.isOperational()) {
            // Note: The first time that an app using a Vision API is installed on a
            // device, GMS will download a native libraries to the device in order to do detection.
            // Usually this completes before the app is run for the first time.  But if that
            // download has not yet completed, then the above call will not detect any text,
            // barcodes, or faces.
            //
            // isOperational() can be used to check if the required native libraries are currently
            // available.  The detectors will automatically become operational once the library
            // downloads complete on device.
            Log.w(TAG, "Detector dependencies are not yet available.");

            // Check for low storage.  If there is low storage, the native library will not be
            // downloaded, so detection will not become operational.
            IntentFilter lowstorageFilter = new IntentFilter(Intent.ACTION_DEVICE_STORAGE_LOW);
            boolean hasLowStorage = registerReceiver(null, lowstorageFilter) != null;

            if (hasLowStorage) {
                Toast.makeText(this, R.string.low_storage_error, Toast.LENGTH_LONG).show();
                Log.w(TAG, getString(R.string.low_storage_error));
            }
        }

        // Creates and starts the camera.  Note that this uses a higher resolution in comparison
        // to other detection examples to enable the text recognizer to detect small pieces of text.
        mCameraSource = new CameraSource.Builder(getApplicationContext(), textRecognizer)
                .setFacing(CameraSource.CAMERA_FACING_BACK).setRequestedPreviewSize(1280, 1024)
                .setRequestedFps(2.0f).setFlashMode(useFlash ? Camera.Parameters.FLASH_MODE_TORCH : null)
                .setFocusMode(autoFocus ? Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE : null).build();
    }

    /**
     * Restarts the camera.
     */
    @Override
    protected void onResume() {
        super.onResume();
        startCameraSource();
    }

    /**
     * Stops the camera.
     */
    @Override
    protected void onPause() {
        super.onPause();
        if (mPreview != null) {
            mPreview.stop();
        }
    }

    /**
     * Releases the resources associated with the camera source, the associated detectors, and the
     * rest of the processing pipeline.
     */
    @Override
    protected void onDestroy() {
        super.onDestroy();
        if (mPreview != null) {
            mPreview.release();
        }
    }

    /**
     * Callback for the result from requesting permissions. This method
     * is invoked for every call on {@link #requestPermissions(String[], int)}.
     * <p>
     * <strong>Note:</strong> It is possible that the permissions request interaction
     * with the user is interrupted. In this case you will receive empty permissions
     * and results arrays which should be treated as a cancellation.
     * </p>
     *
     * @param requestCode  The request code passed in {@link #requestPermissions(String[], int)}.
     * @param permissions  The requested permissions. Never null.
     * @param grantResults The grant results for the corresponding permissions
     *                     which is either {@link PackageManager#PERMISSION_GRANTED}
     *                     or {@link PackageManager#PERMISSION_DENIED}. Never null.
     * @see #requestPermissions(String[], int)
     */
    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions,
            @NonNull int[] grantResults) {
        if (requestCode != RC_HANDLE_CAMERA_PERM) {
            Log.d(TAG, "Got unexpected permission result: " + requestCode);
            super.onRequestPermissionsResult(requestCode, permissions, grantResults);
            return;
        }

        if (grantResults.length != 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
            Log.d(TAG, "Camera permission granted - initialize the camera source");
            // we have permission, so create the camerasource
            boolean autoFocus = getIntent().getBooleanExtra(AutoFocus, false);
            boolean useFlash = getIntent().getBooleanExtra(UseFlash, false);
            createCameraSource(autoFocus, useFlash);
            return;
        }

        Log.e(TAG, "Permission not granted: results len = " + grantResults.length + " Result code = "
                + (grantResults.length > 0 ? grantResults[0] : "(empty)"));

        DialogInterface.OnClickListener listener = new DialogInterface.OnClickListener() {
            public void onClick(DialogInterface dialog, int id) {
                finish();
            }
        };

        AlertDialog.Builder builder = new AlertDialog.Builder(this);
        builder.setTitle("Multitracker sample").setMessage(R.string.no_camera_permission)
                .setPositiveButton(R.string.ok, listener).show();
    }

    /**
     * Starts or restarts the camera source, if it exists.  If the camera source doesn't exist yet
     * (e.g., because onResume was called before the camera source was created), this will be called
     * again when the camera source is created.
     */
    private void startCameraSource() throws SecurityException {
        // check that the device has play services available.
        int code = GoogleApiAvailability.getInstance().isGooglePlayServicesAvailable(getApplicationContext());
        if (code != ConnectionResult.SUCCESS) {
            Dialog dlg = GoogleApiAvailability.getInstance().getErrorDialog(this, code, RC_HANDLE_GMS);
            dlg.show();
        }

        if (mCameraSource != null) {
            try {
                mPreview.start(mCameraSource, mGraphicOverlay);
            } catch (IOException e) {
                Log.e(TAG, "Unable to start camera source.", e);
                mCameraSource.release();
                mCameraSource = null;
            }
        }
    }

    /**
     * onTap is called to speak the tapped TextBlock, if any, out loud.
     *
     * @param rawX - the raw position of the tap
     * @param rawY - the raw position of the tap.
     * @return true if the tap was on a TextBlock
     */
    private boolean onTap(float rawX, float rawY) {
        Log.v("tap", "clicks");
        //screenClicksCounter++;
        //check boolean inorder to know the tap means speak or stop
        if (screenClickedToStop) {
            screenClickedToStop = false;
        } else
            screenClickedToStop = true;
        OcrGraphic graphic = mGraphicOverlay.getGraphicAtLocation(rawX, rawY);
        Text text = null;
        List textList = getAllGraphicsText();

        //        for(Object block:textList){
        //             text=(Text)block;
        //
        //            tts.speak(text.getValue(), TextToSpeech.QUEUE_ADD, null, "DEFAULT");
        //        }
        if (!screenClickedToStop) {
            Log.v("tap", "speak");
            if (textList.size() > 0) {
                for (int i = 0; i < textList.size(); i++) {
                    text = (Text) textList.get(i);
                    if (text != null && text.getValue() != null) {
                        tts.speak(text.getValue(), TextToSpeech.QUEUE_ADD, null, "DEFAULT");
                        // lineNumberToSpeak = i;

                    } else {
                        Log.d(TAG, "text data is null");
                    }
                }
            } else {
                Log.d(TAG, "no text detected");
            }
        } else {
            Log.v("tap", "stop");
            tts.stop();
        }
        //        if (graphic != null) {
        //
        //            text = graphic.getTextBlock();
        //            if (text != null && text.getValue() != null) {
        //                Log.d(TAG, "text data is being spoken! " + text.getValue());
        //                // Speak the string.
        //                tts.speak(text.getValue(), TextToSpeech.QUEUE_ADD, null, "DEFAULT");
        //            }
        //            else {
        //                Log.d(TAG, "text data is null");
        //            }
        //        }
        //        else {
        //            Log.d(TAG,"no text detected");
        //        }

        return text != null;
    }

    // fetch all the graphics from the overlay then get the text block of each graphic,
    // from each text block get list of the lines forming that block
    // after merging all these lists we would have a list of lines in a Text object where we can get the bounds of the rect.
    private List getAllGraphicsText() {
        Set mGraphics = new HashSet<>();

        mGraphics = mGraphicOverlay.getmGraphics();
        // int size=mGraphics.size();
        List AllBlockText = new ArrayList();
        //        if (graphic != null) {
        //
        //            text = graphic.getTextBlock();
        //            if (text != null && text.getValue() != null) {
        //                Log.d(TAG, "text data is being spoken! " + text.getValue());
        //                // Speak the string.
        //                tts.speak(text.getValue(), TextToSpeech.QUEUE_ADD, null, "DEFAULT");
        //            }
        //            else {
        //                Log.d(TAG, "text data is null");
        //            }
        //        }
        //        else {
        //            Log.d(TAG,"no text detected");
        //        }

        for (Object graphicc : mGraphics) {
            OcrGraphic g = (OcrGraphic) graphicc;
            TextBlock mTextBlock = g.getTextBlock();
            List<Text> texts = (List<Text>) mTextBlock.getComponents();
            AllBlockText.addAll(texts);

        }

        //sorting the list according to the y-axis coordinate
        // in order to make app speak the text inorder from the top of the page to its button
        //because the graphics is saved at first according to the detection process of camera which is random
        //
        Comparator<Text> comparable = new Comparator<Text>() {
            @Override
            public int compare(Text o1, Text o2) {
                int x = o1.getBoundingBox().top;
                int y = o2.getBoundingBox().top;
                return x - y;
            }
            //
            //        @Override
            //        public int compare(TextBlock o,TextBlock o2) {
            //           int x= o.getBoundingBox().top;
            //            int y= o2.getBoundingBox().top;
            //            return x-y;
            //        }
        };
        Collections.sort(AllBlockText, comparable);

        return AllBlockText;
    }

    private class CaptureGestureListener extends GestureDetector.SimpleOnGestureListener {

        @Override
        public boolean onSingleTapConfirmed(MotionEvent e) {
            return onTap(e.getRawX(), e.getRawY()) || super.onSingleTapConfirmed(e);
        }
    }

    private class ScaleListener implements ScaleGestureDetector.OnScaleGestureListener {

        /**
         * Responds to scaling events for a gesture in progress.
         * Reported by pointer motion.
         *
         * @param detector The detector reporting the event - use this to
         *                 retrieve extended info about event state.
         * @return Whether or not the detector should consider this event
         * as handled. If an event was not handled, the detector
         * will continue to accumulate movement until an event is
         * handled. This can be useful if an application, for example,
         * only wants to update scaling factors if the change is
         * greater than 0.01.
         */
        @Override
        public boolean onScale(ScaleGestureDetector detector) {
            return false;
        }

        /**
         * Responds to the beginning of a scaling gesture. Reported by
         * new pointers going down.
         *
         * @param detector The detector reporting the event - use this to
         *                 retrieve extended info about event state.
         * @return Whether or not the detector should continue recognizing
         * this gesture. For example, if a gesture is beginning
         * with a focal point outside of a region where it makes
         * sense, onScaleBegin() may return false to ignore the
         * rest of the gesture.
         */
        @Override
        public boolean onScaleBegin(ScaleGestureDetector detector) {
            return true;
        }

        /**
         * Responds to the end of a scale gesture. Reported by existing
         * pointers going up.
         * <p/>
         * Once a scale has ended, {@link ScaleGestureDetector#getFocusX()}
         * and {@link ScaleGestureDetector#getFocusY()} will return focal point
         * of the pointers remaining on the screen.
         *
         * @param detector The detector reporting the event - use this to
         *                 retrieve extended info about event state.
         */
        @Override
        public void onScaleEnd(ScaleGestureDetector detector) {
            if (mCameraSource != null) {
                mCameraSource.doZoom(detector.getScaleFactor());
            }
        }
    }

    /// make volume buttons make an action
    //up volume means next line
    //down volume means previous line
    //as we get the lines List from the beginning each time we press the button so the size of list may vary depends on the detection
    //leading to inaccurate int the lineNumberToSpeak index
    @Override
    public boolean dispatchKeyEvent(KeyEvent event) {
        int action = event.getAction();
        int keyCode = event.getKeyCode();

        Text text = null;
        List mtextList = getAllGraphicsText();
        switch (keyCode) {
        case KeyEvent.KEYCODE_VOLUME_UP:
            if (action == KeyEvent.ACTION_DOWN) {

                Toast toast = Toast.makeText(this, "volume up pressed", Toast.LENGTH_SHORT);
                toast.show();

                Log.v("upVolume", "up " + lineNumberToSpeak);
                //        for(Object block:textList){
                //             text=(Text)block;
                //
                //            tts.speak(text.getValue(), TextToSpeech.QUEUE_ADD, null, "DEFAULT");
                //        }
                if (mtextList.size() > 0) {
                    // for (int i = 0; i < textList.size(); i++) {
                    if (lineNumberToSpeak < mtextList.size()) {
                        text = (Text) mtextList.get(lineNumberToSpeak);
                        if (text != null && text.getValue() != null) {
                            tts.speak(text.getValue(), TextToSpeech.QUEUE_ADD, null, "DEFAULT");
                            if (lineNumberToSpeak < mtextList.size())
                                lineNumberToSpeak++;

                        } else {
                            Log.d(TAG, "text data is null");
                        }
                        // }
                    } else {
                        // al list al gdeda lines feha 22al
                        lineNumberToSpeak = 0;
                    }
                } else {
                    Log.d(TAG, "no text detected");
                }

            }
            return true;
        case KeyEvent.KEYCODE_VOLUME_DOWN:
            if (action == KeyEvent.ACTION_DOWN) {
                Toast toast = Toast.makeText(this, "volume down pressed", Toast.LENGTH_SHORT);
                toast.show();
                if (mtextList.size() > 0) {
                    // for (int i = 0; i < textList.size(); i++) {
                    if (lineNumberToSpeak > -1) {
                        text = (Text) mtextList.get(lineNumberToSpeak);
                        if (text != null && text.getValue() != null) {
                            tts.speak(text.getValue(), TextToSpeech.QUEUE_ADD, null, "DEFAULT");
                            if (lineNumberToSpeak > 0)
                                lineNumberToSpeak--;

                        } else {
                            Log.d(TAG, "text data is null");
                        }
                    } else {
                        lineNumberToSpeak = 0;
                    }
                } else {
                    Log.d(TAG, "no text detected");
                }
            }
            return true;
        default:
            return super.dispatchKeyEvent(event);
        }
    }
}