inula/packages/inula-code-generator/inula-code-generator-web/backend/api/component/detectImg.py

57 lines
2.2 KiB
Python

import cv2
import base64
import os
from PIL import ImageFont, ImageDraw, Image
import numpy as np
def generate_detect_img(img, model_detections):
"""
Generates an image with bounding boxes around detected components, including text from OCR.
Parameters:
- img: The original image (in BGR format).
- model_detections: The list of non-text components detected by the model (YOLO).
- ocr_detections: The list of text components detected by PaddleOCR.
Returns:
- detectImg: The base64 encoded image with all detected components displayed with bounding boxes.
"""
for detection in model_detections:
x1 = int(detection["position"]["x"] * img.shape[1])
y1 = int(detection["position"]["y"] * img.shape[0])
width = int(detection["position"]["width"] * img.shape[1])
height = int(detection["position"]["height"] * img.shape[0])
x2 = x1 + width
y2 = y1 + height
label = f"{detection['componentType']}"
if detection["componentType"].lower() == "text":
img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_pil)
font = ImageFont.truetype("simsun.ttc", size=15) # 中文字体
text_label = detection["text"]
# bordColor = (detection["color"]["r"], detection["color"]["g"], detection["color"]["b"])
bordColor = (28, 28, 28)
cv2.rectangle(img, (x1, y1), (x2, y2), bordColor, 2)
draw.text((x1, y1 - 10), text_label, font=font, fill=bordColor)
img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
else:
bordColor = (228, 53, 53)
cv2.rectangle(img, (x1, y1), (x2, y2), bordColor, 2)
cv2.putText(
img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, bordColor, 2
)
save_directory = "api/component/static"
save_file = os.path.join(save_directory, "detectImg.jpg")
cv2.imwrite(save_file, img)
print(f"Image saved to {save_file}")
# Encode image to base64
img_encoded = cv2.imencode(".jpg", img)[1]
detectImg = str(base64.b64encode(img_encoded))[2:-1]
return detectImg