57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
import cv2
|
|
import base64
|
|
import os
|
|
from PIL import ImageFont, ImageDraw, Image
|
|
import numpy as np
|
|
|
|
|
|
def generate_detect_img(img, model_detections):
|
|
"""
|
|
Generates an image with bounding boxes around detected components, including text from OCR.
|
|
|
|
Parameters:
|
|
- img: The original image (in BGR format).
|
|
- model_detections: The list of non-text components detected by the model (YOLO).
|
|
- ocr_detections: The list of text components detected by PaddleOCR.
|
|
|
|
Returns:
|
|
- detectImg: The base64 encoded image with all detected components displayed with bounding boxes.
|
|
"""
|
|
|
|
for detection in model_detections:
|
|
x1 = int(detection["position"]["x"] * img.shape[1])
|
|
y1 = int(detection["position"]["y"] * img.shape[0])
|
|
width = int(detection["position"]["width"] * img.shape[1])
|
|
height = int(detection["position"]["height"] * img.shape[0])
|
|
x2 = x1 + width
|
|
y2 = y1 + height
|
|
label = f"{detection['componentType']}"
|
|
|
|
if detection["componentType"].lower() == "text":
|
|
img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
|
draw = ImageDraw.Draw(img_pil)
|
|
font = ImageFont.truetype("simsun.ttc", size=15) # 中文字体
|
|
text_label = detection["text"]
|
|
# bordColor = (detection["color"]["r"], detection["color"]["g"], detection["color"]["b"])
|
|
bordColor = (28, 28, 28)
|
|
cv2.rectangle(img, (x1, y1), (x2, y2), bordColor, 2)
|
|
draw.text((x1, y1 - 10), text_label, font=font, fill=bordColor)
|
|
img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
|
|
else:
|
|
bordColor = (228, 53, 53)
|
|
cv2.rectangle(img, (x1, y1), (x2, y2), bordColor, 2)
|
|
cv2.putText(
|
|
img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, bordColor, 2
|
|
)
|
|
|
|
save_directory = "api/component/static"
|
|
save_file = os.path.join(save_directory, "detectImg.jpg")
|
|
cv2.imwrite(save_file, img)
|
|
print(f"Image saved to {save_file}")
|
|
|
|
# Encode image to base64
|
|
img_encoded = cv2.imencode(".jpg", img)[1]
|
|
detectImg = str(base64.b64encode(img_encoded))[2:-1]
|
|
|
|
return detectImg
|