support recursive search dirs (#38)

* support recursive search dirs

* fix pylint error
This commit is contained in:
Wang Xin 2023-12-19 15:22:47 +08:00 committed by GitHub
parent dba6184a50
commit cc4171e182
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 129 deletions

View File

@ -1,3 +1,4 @@
opencv-python opencv-python
Pillow Pillow
numpy numpy
tqdm

View File

@ -4,4 +4,4 @@
''' '''
about version about version
''' '''
__version__ = '0.1.3' __version__ = '0.1.4'

View File

@ -5,26 +5,30 @@ Created on Aug 18, 2021
@author: GreatV(Wang Xin) @author: GreatV(Wang Xin)
""" """
import base64 import base64
import glob
import io import io
import json import json
import math import math
import os import os
import random import random
import shutil import shutil
from collections import OrderedDict import uuid
from multiprocessing import Pool import logging
import PIL.ExifTags import PIL.ExifTags
import PIL.Image import PIL.Image
import PIL.ImageOps import PIL.ImageOps
import cv2 import cv2
import numpy as np import numpy as np
import tqdm
# set seed
random.seed(12345678) random.seed(12345678)
random.Random().seed(12345678)
np.random.seed(12345678) np.random.seed(12345678)
# number of LabelMe2YOLO multiprocessing threads logging.basicConfig(level=logging.INFO)
NUM_THREADS = max(1, os.cpu_count() - 1) logger = logging.getLogger("labelme2yolo")
def train_test_split(dataset_index, test_size=0.2): def train_test_split(dataset_index, test_size=0.2):
@ -97,21 +101,6 @@ def img_data_to_png_data(img_data):
return f_in.read() return f_in.read()
def get_label_id_map(json_dir: str):
"""Get label id map from json files in json_dir"""
label_set = set()
for file_name in os.listdir(json_dir):
if file_name.endswith("json"):
json_path = os.path.join(json_dir, file_name)
with open(json_path, encoding="utf-8") as file:
data = json.load(file)
for shape in data["shapes"]:
label_set.add(shape["label"])
return OrderedDict([(label, label_id) for label_id, label in enumerate(label_set)])
def extend_point_list(point_list, out_format="polygon"): def extend_point_list(point_list, out_format="polygon"):
"""Extend point list to polygon or bbox""" """Extend point list to polygon or bbox"""
x_min = min(float(point) for point in point_list[::2]) x_min = min(float(point) for point in point_list[::2])
@ -131,32 +120,24 @@ def extend_point_list(point_list, out_format="polygon"):
return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max]) return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max])
def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list): def save_yolo_label(obj_list, label_dir, target_dir, target_name):
"""Save yolo label to txt file""" """Save yolo label to txt file"""
txt_path = os.path.join(label_dir_path, txt_path = os.path.join(label_dir, target_dir, target_name)
target_dir,
json_name.replace(".json", ".txt"))
with open(txt_path, "w+", encoding="utf-8") as file: with open(txt_path, "w+", encoding="utf-8") as file:
for yolo_obj in yolo_obj_list: for label, points in obj_list:
label, points = yolo_obj
points = [str(item) for item in points] points = [str(item) for item in points]
yolo_obj_line = f"{label} {' '.join(points)}\n" line = f"{label} {' '.join(points)}\n"
file.write(yolo_obj_line) file.write(line)
def save_yolo_image(json_data, json_path, image_dir_path, target_dir): def save_yolo_image(json_data, json_dir, image_dir, target_dir, target_name):
"""Save yolo image to image_dir_path/target_dir""" """Save yolo image to image_dir_path/target_dir"""
json_name = os.path.basename(json_path) img_path = os.path.join(image_dir, target_dir, target_name)
img_name = json_name.replace(".json", ".png")
# make image_path and save image
img_path = os.path.join(image_dir_path, target_dir, img_name)
if json_data["imageData"] is None: if json_data["imageData"] is None:
dirname = os.path.dirname(json_path)
image_name = json_data["imagePath"] image_name = json_data["imagePath"]
src_image_name = os.path.join(dirname, image_name) src_image_name = os.path.join(json_dir, image_name)
src_image = cv2.imread(src_image_name) src_image = cv2.imread(src_image_name)
cv2.imwrite(img_path, src_image) cv2.imwrite(img_path, src_image)
else: else:
@ -170,31 +151,36 @@ class Labelme2YOLO:
"""Labelme to YOLO format converter""" """Labelme to YOLO format converter"""
def __init__(self, json_dir, output_format, label_list): def __init__(self, json_dir, output_format, label_list):
self._json_dir = json_dir self._json_dir = os.path.expanduser(json_dir)
self._output_format = output_format self._output_format = output_format
self._label_list = label_list self._label_list = []
self._label_id_map = {}
self._label_dir_path = "" self._label_dir_path = ""
self._image_dir_path = "" self._image_dir_path = ""
if label_list: if label_list:
self._label_id_map = {label: label_id self._label_list = label_list
for label_id, label in enumerate(label_list)} self._label_id_map = {
else: label: label_id for label_id, label in enumerate(label_list)
self._label_id_map = get_label_id_map(self._json_dir) }
self._label_list = list(self._label_id_map.keys())
def _update_id_map(self, label: str):
if label not in self._label_list:
self._label_list.append(label)
self._label_id_map[label] = len(self._label_id_map)
def _make_train_val_dir(self): def _make_train_val_dir(self):
self._label_dir_path = os.path.join(self._json_dir, self._label_dir_path = os.path.join(self._json_dir, "YOLODataset/labels/")
'YOLODataset/labels/') self._image_dir_path = os.path.join(self._json_dir, "YOLODataset/images/")
self._image_dir_path = os.path.join(self._json_dir,
'YOLODataset/images/')
for yolo_path in (os.path.join(self._label_dir_path + 'train/'), for yolo_path in (
os.path.join(self._label_dir_path + 'val/'), os.path.join(self._label_dir_path + "train/"),
os.path.join(self._label_dir_path + 'test/'), os.path.join(self._label_dir_path + "val/"),
os.path.join(self._image_dir_path + 'train/'), os.path.join(self._label_dir_path + "test/"),
os.path.join(self._image_dir_path + 'val/'), os.path.join(self._image_dir_path + "train/"),
os.path.join(self._image_dir_path + 'test/')): os.path.join(self._image_dir_path + "val/"),
os.path.join(self._image_dir_path + "test/"),
):
if os.path.exists(yolo_path): if os.path.exists(yolo_path):
shutil.rmtree(yolo_path) shutil.rmtree(yolo_path)
@ -207,31 +193,21 @@ class Labelme2YOLO:
for sample_name in os.listdir(set_folder): for sample_name in os.listdir(set_folder):
set_dir = os.path.join(set_folder, sample_name) set_dir = os.path.join(set_folder, sample_name)
if os.path.isdir(set_dir): if os.path.isdir(set_dir):
json_names.append(sample_name + '.json') json_names.append(sample_name + ".json")
return json_names return json_names
def _train_test_split(self, folders, json_names, val_size, test_size): def _train_test_split(self, json_names, val_size, test_size):
"""Split json names to train, val, test""" """Split json names to train, val, test"""
if (len(folders) > 0 and
'train' in folders and
'val' in folders and
'test' in folders):
train_json_names = self._get_dataset_part_json_names('train')
val_json_names = self._get_dataset_part_json_names('val')
test_json_names = self._get_dataset_part_json_names('test')
return train_json_names, val_json_names, test_json_names
total_size = len(json_names) total_size = len(json_names)
dataset_index = list(range(total_size)) dataset_index = list(range(total_size))
train_ids, val_ids = train_test_split(dataset_index, train_ids, val_ids = train_test_split(dataset_index, test_size=val_size)
test_size=val_size)
test_ids = [] test_ids = []
if test_size is None: if test_size is None:
test_size = 0.0 test_size = 0.0
if test_size > 1e-8: if test_size > 1e-8:
train_ids, test_ids = train_test_split( train_ids, test_ids = train_test_split(
train_ids, test_size=test_size / (1 - val_size)) train_ids, test_size=test_size / (1 - val_size)
)
train_json_names = [json_names[train_idx] for train_idx in train_ids] train_json_names = [json_names[train_idx] for train_idx in train_ids]
val_json_names = [json_names[val_idx] for val_idx in val_ids] val_json_names = [json_names[val_idx] for val_idx in val_ids]
test_json_names = [json_names[test_idx] for test_idx in test_ids] test_json_names = [json_names[test_idx] for test_idx in test_ids]
@ -240,49 +216,41 @@ class Labelme2YOLO:
def convert(self, val_size, test_size): def convert(self, val_size, test_size):
"""Convert labelme format to yolo format""" """Convert labelme format to yolo format"""
json_names = [file_name for file_name in os.listdir(self._json_dir) json_names = glob.glob(
if os.path.isfile(os.path.join(self._json_dir, file_name)) and os.path.join(self._json_dir, "**", "*.json"), recursive=True
file_name.endswith('.json')] )
folders = [file_name for file_name in os.listdir(self._json_dir) json_names = sorted(json_names)
if os.path.isdir(os.path.join(self._json_dir, file_name))]
train_json_names, val_json_names, test_json_names = self._train_test_split( train_json_names, val_json_names, test_json_names = self._train_test_split(
folders, json_names, val_size, test_size) json_names, val_size, test_size
)
self._make_train_val_dir() self._make_train_val_dir()
# convert labelme object to yolo format object, and save them to files # convert labelme object to yolo format object, and save them to files
# also get image from labelme json file and save them under images folder # also get image from labelme json file and save them under images folder
dirs = ('train/', 'val/', 'test/') dirs = ("train/", "val/", "test/")
names = (train_json_names, val_json_names, test_json_names) names = (train_json_names, val_json_names, test_json_names)
for target_dir, json_names in zip(dirs, names): for target_dir, json_names in zip(dirs, names):
target_part = target_dir.replace("/", "")
logger.info("Converting %s set ...", target_part)
for json_name in tqdm.tqdm(json_names):
self.covert_json_to_text(target_dir, json_name)
with Pool(NUM_THREADS) as pool:
for json_name in json_names:
pool.apply_async(self.covert_json_to_text,
args=(target_dir, json_name))
pool.close()
pool.join()
print('Generating dataset.yaml file ...')
self._save_dataset_yaml() self._save_dataset_yaml()
def covert_json_to_text(self, target_dir, json_name): def covert_json_to_text(self, target_dir, json_name):
"""Convert json file to yolo format text file and save them to files""" """Convert json file to yolo format text file and save them to files"""
json_path = os.path.join(self._json_dir, json_name) with open(json_name, encoding="utf-8") as file:
with open(json_path, encoding="utf-8") as file:
json_data = json.load(file) json_data = json.load(file)
print(f"Converting {json_name} for {target_dir.replace('/', '')} ...") filename: str = uuid.UUID(int=random.Random().getrandbits(128)).hex
image_name = f"{filename}.png"
img_path = save_yolo_image(json_data, label_name = f"{filename}.txt"
json_path, img_path = save_yolo_image(
self._image_dir_path, json_data, self._json_dir, self._image_dir_path, target_dir, image_name
target_dir) )
yolo_obj_list = self._get_yolo_object_list(json_data, img_path) yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
save_yolo_label(json_name, save_yolo_label(yolo_obj_list, self._label_dir_path, target_dir, label_name)
self._label_dir_path,
target_dir,
yolo_obj_list)
def convert_one(self, json_name): def convert_one(self, json_name):
"""Convert one json file to yolo format text file and save them to files""" """Convert one json file to yolo format text file and save them to files"""
@ -290,14 +258,14 @@ class Labelme2YOLO:
with open(json_path, encoding="utf-8") as file: with open(json_path, encoding="utf-8") as file:
json_data = json.load(file) json_data = json.load(file)
print(f'Converting {json_name} ...') image_name = json_name.replace(".json", ".png")
label_name = json_name.replace(".json", ".txt")
img_path = save_yolo_image(json_data, json_name, img_path = save_yolo_image(
self._json_dir, '') json_data, self._json_dir, self._image_dir_path, "", image_name
)
yolo_obj_list = self._get_yolo_object_list(json_data, img_path) yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
save_yolo_label(json_name, self._json_dir, save_yolo_label(yolo_obj_list, self._label_dir_path, "", label_name)
'', yolo_obj_list)
def _get_yolo_object_list(self, json_data, img_path): def _get_yolo_object_list(self, json_data, img_path):
yolo_obj_list = [] yolo_obj_list = []
@ -306,22 +274,23 @@ class Labelme2YOLO:
for shape in json_data["shapes"]: for shape in json_data["shapes"]:
# labelme circle shape is different from others # labelme circle shape is different from others
# it only has 2 points, 1st is circle center, 2nd is drag end point # it only has 2 points, 1st is circle center, 2nd is drag end point
if shape['shape_type'] == 'circle': if shape["shape_type"] == "circle":
yolo_obj = self._get_circle_shape_yolo_object( yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w)
shape, img_h, img_w)
else: else:
yolo_obj = self._get_other_shape_yolo_object( yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w)
shape, img_h, img_w)
yolo_obj_list.append(yolo_obj) if yolo_obj:
yolo_obj_list.append(yolo_obj)
return yolo_obj_list return yolo_obj_list
def _get_circle_shape_yolo_object(self, shape, img_h, img_w): def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
obj_center_x, obj_center_y = shape['points'][0] obj_center_x, obj_center_y = shape["points"][0]
radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 + radius = math.sqrt(
(obj_center_y - shape['points'][1][1]) ** 2) (obj_center_x - shape["points"][1][0]) ** 2
+ (obj_center_y - shape["points"][1][1]) ** 2
)
obj_w = 2 * radius obj_w = 2 * radius
obj_h = 2 * radius obj_h = 2 * radius
@ -330,46 +299,55 @@ class Labelme2YOLO:
yolo_w = round(float(obj_w / img_w), 6) yolo_w = round(float(obj_w / img_w), 6)
yolo_h = round(float(obj_h / img_h), 6) yolo_h = round(float(obj_h / img_h), 6)
if shape['label'] in self._label_id_map: if shape["label"]:
label_id = self._label_id_map[shape['label']] label = shape["label"]
if label not in self._label_list:
self._update_id_map(label)
label_id = self._label_id_map[shape["label"]]
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
raise f"label {shape['label']} not in {self._label_list}" return None
def _get_other_shape_yolo_object(self, shape, img_h, img_w): def _get_other_shape_yolo_object(self, shape, img_h, img_w):
point_list = shape["points"]
point_list = shape['points']
points = np.zeros(2 * len(point_list)) points = np.zeros(2 * len(point_list))
points[::2] = [float(point[0]) / img_w for point in point_list] points[::2] = [float(point[0]) / img_w for point in point_list]
points[1::2] = [float(point[1]) / img_h for point in point_list] points[1::2] = [float(point[1]) / img_h for point in point_list]
if len(points) == 4: if len(points) == 4:
if self._output_format == "polygon": if self._output_format == "polygon":
points = extend_point_list(points) points = extend_point_list(points)
if self._output_format == "bbox": if self._output_format == "bbox":
points = extend_point_list(points, "bbox") points = extend_point_list(points, "bbox")
if shape['label'] in self._label_id_map: if shape["label"]:
label_id = self._label_id_map[shape['label']] label = shape["label"]
if label not in self._label_list:
self._update_id_map(label)
label_id = self._label_id_map[shape["label"]]
return label_id, points.tolist() return label_id, points.tolist()
raise f"label {shape['label']} not in {self._label_list}" return None
def _save_dataset_yaml(self): def _save_dataset_yaml(self):
yaml_path = os.path.join( yaml_path = os.path.join(self._json_dir, "YOLODataset/", "dataset.yaml")
self._json_dir, 'YOLODataset/', 'dataset.yaml')
with open(yaml_path, 'w+', encoding="utf-8") as yaml_file: with open(yaml_path, "w+", encoding="utf-8") as yaml_file:
train_dir = os.path.join(self._image_dir_path, 'train/') train_dir = os.path.join(self._image_dir_path, "train/")
val_dir = os.path.join(self._image_dir_path, 'val/') val_dir = os.path.join(self._image_dir_path, "val/")
test_dir = os.path.join(self._image_dir_path, 'test/') test_dir = os.path.join(self._image_dir_path, "test/")
names_str = '' names_str = ""
for label, _ in self._label_id_map.items(): for label, _ in self._label_id_map.items():
names_str += f"\"{label}\", " names_str += f'"{label}", '
names_str = names_str.rstrip(", ") names_str = names_str.rstrip(", ")
content = (f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n" content = (
f"nc: {len(self._label_id_map)}\n" f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n"
f"names: [{names_str}]") f"nc: {len(self._label_id_map)}\n"
f"names: [{names_str}]"
)
yaml_file.write(content) yaml_file.write(content)