Python install tesseract

說明

這篇要說明安裝 tesseract 這個套件的一些步驟,這個套件主要在做文字的偵測跟辨識,目前想用於偵測影像中的文字並視為雜訊而去除,所以安裝這個套件

不過因為本身使用 Pycharm 當作IDE,可是好像在安裝的過程中無法以平常的方式直接安裝,所以參考網路的作法

屬於Python 安裝套件的基本方式再修正一些步驟,基本的流程是直接下載套件包,然後引入環境變數,之後再用 pip 來裝

但這邊前面兩個步驟相同,最後pip 的方式則省略

操作流程

下載EXE

先到 下載地址,下載與電腦本身要相符,否則會安裝失敗,例如電腦為64位元則安裝W64

下載成功會有下載好的 exe檔案

EXE 安裝

依照圖片中的說明安裝,大部分都是按下一步,其中有一個多國語言要打勾,還有實際安裝的路徑要記錄下來,等下環境變數會用到

設定環境變數

因為安裝的時間會有點久,可以先來設定環境變數

依照圖片的方式設定,在3的時候 在Path 新增剛剛記錄下來的路徑

因為參考其他作者提供的方法在系統變數下,如果有 Path,也加入

加入系統變數設定新資料夾

在系統變數下,新增一個變數名稱為TESSDATA_PREFIX,變數值為剛剛記錄下來的路徑加上/tessdata

安裝完成

Demo偵測文字

Code

# Import required modules
import cv2 as cv
import math
import argparse
import numpy as np
import cv2

parser = argparse.ArgumentParser(
    description='Use this script to run text detection deep learning networks using OpenCV.')
# Input argument
parser.add_argument('--input',
                    help='Path to input image or video file. Skip this argument to capture frames from a camera.')
# Model argument
parser.add_argument('--model', default="./model/frozen_east_text_detection.pb",
                    help='Path to a binary .pb file of model contains trained weights.'
                    )
# Width argument
parser.add_argument('--width', type=int, default=320,
                    help='Preprocess input image by resizing to a specific width. It should be multiple by 32.'
                    )
# Height argument
parser.add_argument('--height', type=int, default=320,
                    help='Preprocess input image by resizing to a specific height. It should be multiple by 32.'
                    )
# Confidence threshold
parser.add_argument('--thr', type=float, default=0.5,
                    help='Confidence threshold.'
                    )
# Non-maximum suppression threshold
parser.add_argument('--nms', type=float, default=0.4,
                    help='Non-maximum suppression threshold.'
                    )

args = parser.parse_args()


############ Utility functions ############
def decode(scores, geometry, scoreThresh):
    detections = []
    confidences = []

    ############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############
    assert len(scores.shape) == 4, "Incorrect dimensions of scores"
    assert len(geometry.shape) == 4, "Incorrect dimensions of geometry"
    assert scores.shape[0] == 1, "Invalid dimensions of scores"
    assert geometry.shape[0] == 1, "Invalid dimensions of geometry"
    assert scores.shape[1] == 1, "Invalid dimensions of scores"
    assert geometry.shape[1] == 5, "Invalid dimensions of geometry"
    assert scores.shape[2] == geometry.shape[2], "Invalid dimensions of scores and geometry"
    assert scores.shape[3] == geometry.shape[3], "Invalid dimensions of scores and geometry"
    height = scores.shape[2]
    width = scores.shape[3]
    for y in range(0, height):

        # Extract data from scores
        scoresData = scores[0][0][y]
        x0_data = geometry[0][0][y]
        x1_data = geometry[0][1][y]
        x2_data = geometry[0][2][y]
        x3_data = geometry[0][3][y]
        anglesData = geometry[0][4][y]
        for x in range(0, width):
            score = scoresData[x]

            # If score is lower than threshold score, move to next x
            if (score < scoreThresh):
                continue

            # Calculate offset
            offsetX = x * 4.0
            offsetY = y * 4.0
            angle = anglesData[x]

            # Calculate cos and sin of angle
            cosA = math.cos(angle)
            sinA = math.sin(angle)
            h = x0_data[x] + x2_data[x]
            w = x1_data[x] + x3_data[x]

            # Calculate offset
            offset = (
                [offsetX + cosA * x1_data[x] + sinA * x2_data[x], offsetY - sinA * x1_data[x] + cosA * x2_data[x]])

            # Find points for rectangle
            p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
            p3 = (-cosA * w + offset[0], sinA * w + offset[1])
            center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1]))
            detections.append((center, (w, h), -1 * angle * 180.0 / math.pi))
            confidences.append(float(score))

    # Return detections and confidences
    return [detections, confidences]


if __name__ == "__main__":
    # Read and store arguments
    confThreshold = args.thr
    nmsThreshold = args.nms
    inpWidth = args.width
    inpHeight = args.height
    model = args.model

    # Load network
    net = cv.dnn.readNet('./frozen_east_text_detection.pb')

    # Create a new named window
    kWinName = "EAST: An Efficient and Accurate Scene Text Detector"
    outputLayers = []
    outputLayers.append("feature_fusion/Conv_7/Sigmoid")
    outputLayers.append("feature_fusion/concat_3")

    # Read frame
    frame = cv.imread("./images/car_wash.jpg")

    # Get frame height and width
    height_ = frame.shape[0]
    width_ = frame.shape[1]
    rW = width_ / float(inpWidth)
    rH = height_ / float(inpHeight)

    # Create a 4D blob from frame.
    blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False)

    # Run the model
    net.setInput(blob)
    output = net.forward(outputLayers)
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())

    # Get scores and geometry
    scores = output[0]
    geometry = output[1]
    [boxes, confidences] = decode(scores, geometry, confThreshold)
    # Apply NMS
    indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        # get 4 corners of the rotated rect
        vertices = cv.boxPoints(boxes[i[0]])
        # scale the bounding box coordinates based on the respective ratios
        for j in range(4):
            vertices[j][0] *= rW
            vertices[j][1] *= rH
        for j in range(4):
            p1 = (vertices[j][0], vertices[j][1])
            p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
            print(j)
            cv.line(frame, p1, p2, (0, 255, 0), 2, cv.LINE_AA)
            # print(p1,p2)

            if j == 0:
                x = (int(p1[0]), int(p1[1]))

            if j == 2:
                y = (int(p1[0]), int(p1[1]))
                print(y)

                #cv2.rectangle(frame, x, y, (0, 0, 0), -1)

    # Put efficiency information
    # cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

    # Display the frame
    cv2.namedWindow('result', cv2.WINDOW_NORMAL)
    cv2.resizeWindow("result", 640, 480)
    cv.imshow("result", frame)
    cv.waitKey(0)

留言