markdown
#說明
前兩篇皆與這個相關,會寫三篇的原因也是因為在尋找這個問題解答的同時,有搜尋到這樣的三種方法,所以就把他們都記錄下來,這一篇的方法為,偵測文字,所以用已經訓練好的model,去辨識圖片中的文字部分,找到圖片後用方框框起來,這裡有示範一個框成藍色的版本,方便確認真的找到文字,另外實際用的時候直接採用黑色的框就可以了,不過目前用這個方法有遇到一個問題是,如果文字為傾斜的,就無法去偵測出來
#操作流程
##Code
```
import math
import argparse
import os
import cv2
ImgPath = './old'
OutputPath = './new'
parser = argparse.ArgumentParser(
    description='Use this script to run text detection deep learning networks using OpenCV.')
# Input argument
parser.add_argument('--input',
                    help='Path to input image or video file. Skip this argument to capture frames from a camera.')
# Model argument
parser.add_argument('--model', default="./model/frozen_east_text_detection.pb",
                    help='Path to a binary .pb file of model contains trained weights.'
                    )
# Width argument
parser.add_argument('--width', type=int, default=320,
                    help='Preprocess input image by resizing to a specific width. It should be multiple by 32.'
                    )
# Height argument
parser.add_argument('--height', type=int, default=320,
                    help='Preprocess input image by resizing to a specific height. It should be multiple by 32.'
                    )
# Confidence threshold
parser.add_argument('--thr', type=float, default=0.5,
                    help='Confidence threshold.'
                    )
# Non-maximum suppression threshold
parser.add_argument('--nms', type=float, default=0.4,
                    help='Non-maximum suppression threshold.'
                    )
args = parser.parse_args()
############ Utility functions ############
def decode(scores, geometry, scoreThresh):
    detections = []
    confidences = []
    ############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############
    assert len(scores.shape) == 4, "Incorrect dimensions of scores"
    assert len(geometry.shape) == 4, "Incorrect dimensions of geometry"
    assert scores.shape[0] == 1, "Invalid dimensions of scores"
    assert geometry.shape[0] == 1, "Invalid dimensions of geometry"
    assert scores.shape[1] == 1, "Invalid dimensions of scores"
    assert geometry.shape[1] == 5, "Invalid dimensions of geometry"
    assert scores.shape[2] == geometry.shape[2], "Invalid dimensions of scores and geometry"
    assert scores.shape[3] == geometry.shape[3], "Invalid dimensions of scores and geometry"
    height = scores.shape[2]
    width = scores.shape[3]
    for y in range(0, height):
        # Extract data from scores
        scoresData = scores[0][0][y]
        x0_data = geometry[0][0][y]
        x1_data = geometry[0][1][y]
        x2_data = geometry[0][2][y]
        x3_data = geometry[0][3][y]
        anglesData = geometry[0][4][y]
        for x in range(0, width):
            score = scoresData[x]
            # If score is lower than threshold score, move to next x
            if (score < scoreThresh):
                continue
            # Calculate offset
            offsetX = x * 4.0
            offsetY = y * 4.0
            angle = anglesData[x]
            # Calculate cos and sin of angle
            cosA = math.cos(angle)
            sinA = math.sin(angle)
            h = x0_data[x] + x2_data[x]
            w = x1_data[x] + x3_data[x]
            # Calculate offset
            offset = (
                [offsetX + cosA * x1_data[x] + sinA * x2_data[x], offsetY - sinA * x1_data[x] + cosA * x2_data[x]])
            # Find points for rectangle
            p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
            p3 = (-cosA * w + offset[0], sinA * w + offset[1])
            center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1]))
            detections.append((center, (w, h), -1 * angle * 180.0 / math.pi))
            confidences.append(float(score))
    # Return detections and confidences
    return [detections, confidences]
if __name__ == "__main__":
    # Read and store arguments
    confThreshold = args.thr
    nmsThreshold = args.nms
    inpWidth = args.width
    inpHeight = args.height
    model = args.model
    img_list = []
    # Load network
    net = cv2.dnn.readNet('./frozen_east_text_detection.pb')
    # Create a new named window
    kWinName = "EAST: An Efficient and Accurate Scene Text Detector"
    outputLayers = []
    outputLayers.append("feature_fusion/Conv_7/Sigmoid")
    outputLayers.append("feature_fusion/concat_3")
    def read_directory(directory_name):
        path = directory_name
        GrandpaPath = os.listdir(path)
        for i in range(len(GrandpaPath)):
            DadPath = os.listdir(path + '/' + GrandpaPath[i])
            for j in range(len(DadPath)):
                KidPath = os.listdir(path + '/' + GrandpaPath[i] + '/' + DadPath[j])
                # print(KidPath)
                KidName_path = str(path + '/' + GrandpaPath[i] + '/' + DadPath[j] + '/')
                KidName = os.listdir(path + '/' + GrandpaPath[i] + '/' + DadPath[j])
                # print(KidPath)
                for k in range(len(KidPath)):
                    OldName = str(KidName_path + KidName[k])
                    NewName = str(KidName_path + KidName[k])
                    # print(OldName)
                    # print(NewName)
                    # print(NewName)
                    img_list.append(NewName)
                    # print(img_list)
        return img_list
        '''''
                    for root, dirs, files in os.walk(ImgPath):
                        for file in files:
                            if file.endswith(".jpg"):
                                img_list.append(str(os.path.join(root, file)))
                    return img_list
        '''''
        # ---------------------------------------------
        '''''
        name_of_img = []
        for filename in os.listdir(r"./" + directory_name):
            path =  directory_name + "/" + filename
            name_of_img.append(path)
        return name_of_img
        '''''
    def detectwords(ImgPath):
        print("start")
        img_path = read_directory(ImgPath)
        # print(img_path)
        for k in range(len(img_path)):
            frame = cv2.imread(img_path[k])
            # frame = cv2.imread('./106_C_1/0000528/V110138349/C1_0000528_V110138349_0000.jpg')
            # print(frame)
            # Read frame
            # frame = cv2.imread(ImgPath)
            # print(ImgPath)
            # Get frame height and width
            height_ = frame.shape[0]
            width_ = frame.shape[1]
            rW = width_ / float(inpWidth)
            rH = height_ / float(inpHeight)
            # Create a 4D blob from frame.
            blob = cv2.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False)
            # Run the model
            net.setInput(blob)
            output = net.forward(outputLayers)
            t, _ = net.getPerfProfile()
            # Get scores and geometry
            scores = output[0]
            geometry = output[1]
            [boxes, confidences] = decode(scores, geometry, confThreshold)
            # Apply NMS
            indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)
            for i in indices:
                # get 4 corners of the rotated rect
                vertices = cv2.boxPoints(boxes[i[0]])
                # scale the bounding box coordinates based on the respective ratios
                for j in range(4):
                    vertices[j][0] *= rW
                    vertices[j][1] *= rH
                for j in range(4):
                    p1 = (vertices[j][0], vertices[j][1])
                    # p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
                    # cv2.line(frame, p1, p2, (0, 255, 0), 2, cv2.LINE_AA)
                    if j == 0:
                        x = (int(p1[0]) - 50, int(p1[1]) + 50)
                    if j == 2:
                        y = (int(p1[0] + 50), int(p1[1]) - 50)
                        cv2.rectangle(frame, x, y, (255, 0, 0), -1)
            # Display the frame
            cv2.namedWindow('result', cv2.WINDOW_NORMAL)
            cv2.resizeWindow("result", 640, 480)
            cv2.imshow("result", frame)
            cv2.waitKey(-1)
            NewName = (img_path[k]).split('/')[-1]
            print(NewName)
            # NewName = img_path[i].split('/')[-1]
            NewName = OutputPath + str(NewName)
            import os
            path = OutputPath
            if not os.path.isdir(path):
                os.mkdir(path)
            cv2.imwrite(NewName, frame)
    detectwords(ImgPath)
print("finish")
```
##引用package
- 參考 : href="https://github.com/oyyd/frozen_east_text_detection.pb
- 下載點 : https://drive.google.com/file/d/1uGoSKfWKU1h8O89s04Gr2JrI3iMB79I8/view?usp=sharing
##DEMO

留言
張貼留言