Python 偵測文字的方式去除白色無效資訊:以mammography為例

markdown #說明前兩篇皆與這個相關，會寫三篇的原因也是因為在尋找這個問題解答的同時，有搜尋到這樣的三種方法，所以就把他們都記錄下來，這一篇的方法為，偵測文字，所以用已經訓練好的model，去辨識圖片中的文字部分，找到圖片後用方框框起來，這裡有示範一個框成藍色的版本，方便確認真的找到文字，另外實際用的時候直接採用黑色的框就可以了，不過目前用這個方法有遇到一個問題是，如果文字為傾斜的，就無法去偵測出來 #操作流程 ##Code ``` import math import argparse import os import cv2 ImgPath = './old' OutputPath = './new' parser = argparse.ArgumentParser( description='Use this script to run text detection deep learning networks using OpenCV.') # Input argument parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') # Model argument parser.add_argument('--model', default="./model/frozen_east_text_detection.pb", help='Path to a binary .pb file of model contains trained weights.' ) # Width argument parser.add_argument('--width', type=int, default=320, help='Preprocess input image by resizing to a specific width. It should be multiple by 32.' ) # Height argument parser.add_argument('--height', type=int, default=320, help='Preprocess input image by resizing to a specific height. It should be multiple by 32.' ) # Confidence threshold parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold.' ) # Non-maximum suppression threshold parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold.' ) args = parser.parse_args() ############ Utility functions ############ def decode(scores, geometry, scoreThresh): detections = [] confidences = [] ############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############ assert len(scores.shape) == 4, "Incorrect dimensions of scores" assert len(geometry.shape) == 4, "Incorrect dimensions of geometry" assert scores.shape[0] == 1, "Invalid dimensions of scores" assert geometry.shape[0] == 1, "Invalid dimensions of geometry" assert scores.shape[1] == 1, "Invalid dimensions of scores" assert geometry.shape[1] == 5, "Invalid dimensions of geometry" assert scores.shape[2] == geometry.shape[2], "Invalid dimensions of scores and geometry" assert scores.shape[3] == geometry.shape[3], "Invalid dimensions of scores and geometry" height = scores.shape[2] width = scores.shape[3] for y in range(0, height): # Extract data from scores scoresData = scores[0][0][y] x0_data = geometry[0][0][y] x1_data = geometry[0][1][y] x2_data = geometry[0][2][y] x3_data = geometry[0][3][y] anglesData = geometry[0][4][y] for x in range(0, width): score = scoresData[x] # If score is lower than threshold score, move to next x if (score < scoreThresh): continue # Calculate offset offsetX = x * 4.0 offsetY = y * 4.0 angle = anglesData[x] # Calculate cos and sin of angle cosA = math.cos(angle) sinA = math.sin(angle) h = x0_data[x] + x2_data[x] w = x1_data[x] + x3_data[x] # Calculate offset offset = ( [offsetX + cosA * x1_data[x] + sinA * x2_data[x], offsetY - sinA * x1_data[x] + cosA * x2_data[x]]) # Find points for rectangle p1 = (-sinA * h + offset[0], -cosA * h + offset[1]) p3 = (-cosA * w + offset[0], sinA * w + offset[1]) center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1])) detections.append((center, (w, h), -1 * angle * 180.0 / math.pi)) confidences.append(float(score)) # Return detections and confidences return [detections, confidences] if __name__ == "__main__": # Read and store arguments confThreshold = args.thr nmsThreshold = args.nms inpWidth = args.width inpHeight = args.height model = args.model img_list = [] # Load network net = cv2.dnn.readNet('./frozen_east_text_detection.pb') # Create a new named window kWinName = "EAST: An Efficient and Accurate Scene Text Detector" outputLayers = [] outputLayers.append("feature_fusion/Conv_7/Sigmoid") outputLayers.append("feature_fusion/concat_3") def read_directory(directory_name): path = directory_name GrandpaPath = os.listdir(path) for i in range(len(GrandpaPath)): DadPath = os.listdir(path + '/' + GrandpaPath[i]) for j in range(len(DadPath)): KidPath = os.listdir(path + '/' + GrandpaPath[i] + '/' + DadPath[j]) # print(KidPath) KidName_path = str(path + '/' + GrandpaPath[i] + '/' + DadPath[j] + '/') KidName = os.listdir(path + '/' + GrandpaPath[i] + '/' + DadPath[j]) # print(KidPath) for k in range(len(KidPath)): OldName = str(KidName_path + KidName[k]) NewName = str(KidName_path + KidName[k]) # print(OldName) # print(NewName) # print(NewName) img_list.append(NewName) # print(img_list) return img_list ''''' for root, dirs, files in os.walk(ImgPath): for file in files: if file.endswith(".jpg"): img_list.append(str(os.path.join(root, file))) return img_list ''''' # --------------------------------------------- ''''' name_of_img = [] for filename in os.listdir(r"./" + directory_name): path = directory_name + "/" + filename name_of_img.append(path) return name_of_img ''''' def detectwords(ImgPath): print("start") img_path = read_directory(ImgPath) # print(img_path) for k in range(len(img_path)): frame = cv2.imread(img_path[k]) # frame = cv2.imread('./106_C_1/0000528/V110138349/C1_0000528_V110138349_0000.jpg') # print(frame) # Read frame # frame = cv2.imread(ImgPath) # print(ImgPath) # Get frame height and width height_ = frame.shape[0] width_ = frame.shape[1] rW = width_ / float(inpWidth) rH = height_ / float(inpHeight) # Create a 4D blob from frame. blob = cv2.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False) # Run the model net.setInput(blob) output = net.forward(outputLayers) t, _ = net.getPerfProfile() # Get scores and geometry scores = output[0] geometry = output[1] [boxes, confidences] = decode(scores, geometry, confThreshold) # Apply NMS indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold) for i in indices: # get 4 corners of the rotated rect vertices = cv2.boxPoints(boxes[i[0]]) # scale the bounding box coordinates based on the respective ratios for j in range(4): vertices[j][0] *= rW vertices[j][1] *= rH for j in range(4): p1 = (vertices[j][0], vertices[j][1]) # p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) # cv2.line(frame, p1, p2, (0, 255, 0), 2, cv2.LINE_AA) if j == 0: x = (int(p1[0]) - 50, int(p1[1]) + 50) if j == 2: y = (int(p1[0] + 50), int(p1[1]) - 50) cv2.rectangle(frame, x, y, (255, 0, 0), -1) # Display the frame cv2.namedWindow('result', cv2.WINDOW_NORMAL) cv2.resizeWindow("result", 640, 480) cv2.imshow("result", frame) cv2.waitKey(-1) NewName = (img_path[k]).split('/')[-1] print(NewName) # NewName = img_path[i].split('/')[-1] NewName = OutputPath + str(NewName) import os path = OutputPath if not os.path.isdir(path): os.mkdir(path) cv2.imwrite(NewName, frame) detectwords(ImgPath) print("finish") ``` ##引用package - 參考 : href="https://github.com/oyyd/frozen_east_text_detection.pb - 下載點 : https://drive.google.com/file/d/1uGoSKfWKU1h8O89s04Gr2JrI3iMB79I8/view?usp=sharing

##DEMO

SYmm 微筆記

搜尋此網誌

Python 偵測文字的方式去除白色無效資訊:以mammography為例

留言

張貼留言