106 lines
2.9 KiB
Python
106 lines
2.9 KiB
Python
import os
|
|
import cv2
|
|
import numpy as np
|
|
from paddleocr import PaddleOCR
|
|
from .detectImg import generate_detect_img
|
|
from api.component.ultralytics import YOLOv10
|
|
|
|
|
|
ocr_model = PaddleOCR(use_angle_cls=True, use_gpu=False, lang="ch")
|
|
|
|
|
|
def extractTextWithPositions(image):
|
|
result = ocr_model.ocr(image, cls=True)
|
|
textComponents = []
|
|
|
|
for line in result[0]:
|
|
text_region = line[0]
|
|
text = line[1][0]
|
|
confidence = line[1][1]
|
|
|
|
if isinstance(text, str) and text.strip():
|
|
x1, y1 = map(int, text_region[0])
|
|
x2, y2 = map(int, text_region[2])
|
|
|
|
textComponents.append(
|
|
{"text": text, "confidence": confidence, "bbox": [x1, y1, x2, y2]}
|
|
)
|
|
|
|
return textComponents
|
|
|
|
|
|
def componentDetect(
|
|
imgURL,
|
|
modelPath=os.path.join("api", "component", "model.pt"),
|
|
confidenceThreshold=0.15,
|
|
):
|
|
model = YOLOv10(modelPath)
|
|
|
|
img = cv2.imread(imgURL)
|
|
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
|
|
textComponents = extractTextWithPositions(imgRGB)
|
|
|
|
results = model(imgRGB, conf=confidenceThreshold)
|
|
detections = results[0].boxes.data
|
|
height, width, _ = img.shape
|
|
detectionResult = []
|
|
|
|
componentID = 1
|
|
for detection in detections:
|
|
x1, y1, x2, y2, conf, cls = detection[:6]
|
|
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
|
|
|
componentType = model.names[int(cls)]
|
|
|
|
if componentType.lower() == "text":
|
|
continue
|
|
|
|
componentROI = imgRGB[y1:y2, x1:x2]
|
|
|
|
avg_color_per_row = np.average(componentROI, axis=0)
|
|
avg_color = np.average(avg_color_per_row, axis=0)
|
|
color = [int(c) for c in avg_color]
|
|
|
|
position = {
|
|
"x": x1 / width,
|
|
"y": y1 / height,
|
|
"width": (x2 - x1) / width,
|
|
"height": (y2 - y1) / height,
|
|
}
|
|
|
|
componentData = {
|
|
"id": componentID,
|
|
"componentType": componentType,
|
|
"position": position,
|
|
"color": {"r": color[0], "g": color[1], "b": color[2]},
|
|
}
|
|
|
|
detectionResult.append(componentData)
|
|
componentID += 1
|
|
|
|
for textComp in textComponents:
|
|
x1, y1, x2, y2 = textComp["bbox"]
|
|
# text_region = img[y1:y2, x1:x2]
|
|
# avg_color = cv2.mean(text_region)[:3] # BGR format
|
|
position = {
|
|
"x": x1 / width,
|
|
"y": y1 / height,
|
|
"width": (x2 - x1) / width,
|
|
"height": (y2 - y1) / height,
|
|
}
|
|
detectionResult.append(
|
|
{
|
|
"id": componentID,
|
|
"componentType": "Text",
|
|
"position": position,
|
|
"text": textComp["text"],
|
|
# "color": {"r": int(avg_color[2]), "g": int(avg_color[1]), "b": int(avg_color[0])} # Convert to RGB
|
|
}
|
|
)
|
|
componentID += 1
|
|
|
|
detectImg = generate_detect_img(img, detectionResult)
|
|
|
|
return detectionResult, detectImg
|