support recursive search dirs (#38)

* support recursive search dirs

* fix pylint error
This commit is contained in:
Wang Xin 2023-12-19 15:22:47 +08:00 committed by GitHub
parent dba6184a50
commit cc4171e182
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 129 deletions

View File

@ -1,3 +1,4 @@
opencv-python
Pillow
numpy
tqdm

View File

@ -4,4 +4,4 @@
'''
about version
'''
__version__ = '0.1.3'
__version__ = '0.1.4'

View File

@ -5,26 +5,30 @@ Created on Aug 18, 2021
@author: GreatV(Wang Xin)
"""
import base64
import glob
import io
import json
import math
import os
import random
import shutil
from collections import OrderedDict
from multiprocessing import Pool
import uuid
import logging
import PIL.ExifTags
import PIL.Image
import PIL.ImageOps
import cv2
import numpy as np
import tqdm
# set seed
random.seed(12345678)
random.Random().seed(12345678)
np.random.seed(12345678)
# number of LabelMe2YOLO multiprocessing threads
NUM_THREADS = max(1, os.cpu_count() - 1)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("labelme2yolo")
def train_test_split(dataset_index, test_size=0.2):
@ -97,21 +101,6 @@ def img_data_to_png_data(img_data):
return f_in.read()
def get_label_id_map(json_dir: str):
"""Get label id map from json files in json_dir"""
label_set = set()
for file_name in os.listdir(json_dir):
if file_name.endswith("json"):
json_path = os.path.join(json_dir, file_name)
with open(json_path, encoding="utf-8") as file:
data = json.load(file)
for shape in data["shapes"]:
label_set.add(shape["label"])
return OrderedDict([(label, label_id) for label_id, label in enumerate(label_set)])
def extend_point_list(point_list, out_format="polygon"):
"""Extend point list to polygon or bbox"""
x_min = min(float(point) for point in point_list[::2])
@ -131,32 +120,24 @@ def extend_point_list(point_list, out_format="polygon"):
return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max])
def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list):
def save_yolo_label(obj_list, label_dir, target_dir, target_name):
"""Save yolo label to txt file"""
txt_path = os.path.join(label_dir_path,
target_dir,
json_name.replace(".json", ".txt"))
txt_path = os.path.join(label_dir, target_dir, target_name)
with open(txt_path, "w+", encoding="utf-8") as file:
for yolo_obj in yolo_obj_list:
label, points = yolo_obj
for label, points in obj_list:
points = [str(item) for item in points]
yolo_obj_line = f"{label} {' '.join(points)}\n"
file.write(yolo_obj_line)
line = f"{label} {' '.join(points)}\n"
file.write(line)
def save_yolo_image(json_data, json_path, image_dir_path, target_dir):
def save_yolo_image(json_data, json_dir, image_dir, target_dir, target_name):
"""Save yolo image to image_dir_path/target_dir"""
json_name = os.path.basename(json_path)
img_name = json_name.replace(".json", ".png")
# make image_path and save image
img_path = os.path.join(image_dir_path, target_dir, img_name)
img_path = os.path.join(image_dir, target_dir, target_name)
if json_data["imageData"] is None:
dirname = os.path.dirname(json_path)
image_name = json_data["imagePath"]
src_image_name = os.path.join(dirname, image_name)
src_image_name = os.path.join(json_dir, image_name)
src_image = cv2.imread(src_image_name)
cv2.imwrite(img_path, src_image)
else:
@ -170,31 +151,36 @@ class Labelme2YOLO:
"""Labelme to YOLO format converter"""
def __init__(self, json_dir, output_format, label_list):
self._json_dir = json_dir
self._json_dir = os.path.expanduser(json_dir)
self._output_format = output_format
self._label_list = label_list
self._label_list = []
self._label_id_map = {}
self._label_dir_path = ""
self._image_dir_path = ""
if label_list:
self._label_id_map = {label: label_id
for label_id, label in enumerate(label_list)}
else:
self._label_id_map = get_label_id_map(self._json_dir)
self._label_list = list(self._label_id_map.keys())
self._label_list = label_list
self._label_id_map = {
label: label_id for label_id, label in enumerate(label_list)
}
def _update_id_map(self, label: str):
if label not in self._label_list:
self._label_list.append(label)
self._label_id_map[label] = len(self._label_id_map)
def _make_train_val_dir(self):
self._label_dir_path = os.path.join(self._json_dir,
'YOLODataset/labels/')
self._image_dir_path = os.path.join(self._json_dir,
'YOLODataset/images/')
self._label_dir_path = os.path.join(self._json_dir, "YOLODataset/labels/")
self._image_dir_path = os.path.join(self._json_dir, "YOLODataset/images/")
for yolo_path in (os.path.join(self._label_dir_path + 'train/'),
os.path.join(self._label_dir_path + 'val/'),
os.path.join(self._label_dir_path + 'test/'),
os.path.join(self._image_dir_path + 'train/'),
os.path.join(self._image_dir_path + 'val/'),
os.path.join(self._image_dir_path + 'test/')):
for yolo_path in (
os.path.join(self._label_dir_path + "train/"),
os.path.join(self._label_dir_path + "val/"),
os.path.join(self._label_dir_path + "test/"),
os.path.join(self._image_dir_path + "train/"),
os.path.join(self._image_dir_path + "val/"),
os.path.join(self._image_dir_path + "test/"),
):
if os.path.exists(yolo_path):
shutil.rmtree(yolo_path)
@ -207,31 +193,21 @@ class Labelme2YOLO:
for sample_name in os.listdir(set_folder):
set_dir = os.path.join(set_folder, sample_name)
if os.path.isdir(set_dir):
json_names.append(sample_name + '.json')
json_names.append(sample_name + ".json")
return json_names
def _train_test_split(self, folders, json_names, val_size, test_size):
def _train_test_split(self, json_names, val_size, test_size):
"""Split json names to train, val, test"""
if (len(folders) > 0 and
'train' in folders and
'val' in folders and
'test' in folders):
train_json_names = self._get_dataset_part_json_names('train')
val_json_names = self._get_dataset_part_json_names('val')
test_json_names = self._get_dataset_part_json_names('test')
return train_json_names, val_json_names, test_json_names
total_size = len(json_names)
dataset_index = list(range(total_size))
train_ids, val_ids = train_test_split(dataset_index,
test_size=val_size)
train_ids, val_ids = train_test_split(dataset_index, test_size=val_size)
test_ids = []
if test_size is None:
test_size = 0.0
if test_size > 1e-8:
train_ids, test_ids = train_test_split(
train_ids, test_size=test_size / (1 - val_size))
train_ids, test_size=test_size / (1 - val_size)
)
train_json_names = [json_names[train_idx] for train_idx in train_ids]
val_json_names = [json_names[val_idx] for val_idx in val_ids]
test_json_names = [json_names[test_idx] for test_idx in test_ids]
@ -240,49 +216,41 @@ class Labelme2YOLO:
def convert(self, val_size, test_size):
"""Convert labelme format to yolo format"""
json_names = [file_name for file_name in os.listdir(self._json_dir)
if os.path.isfile(os.path.join(self._json_dir, file_name)) and
file_name.endswith('.json')]
folders = [file_name for file_name in os.listdir(self._json_dir)
if os.path.isdir(os.path.join(self._json_dir, file_name))]
json_names = glob.glob(
os.path.join(self._json_dir, "**", "*.json"), recursive=True
)
json_names = sorted(json_names)
train_json_names, val_json_names, test_json_names = self._train_test_split(
folders, json_names, val_size, test_size)
json_names, val_size, test_size
)
self._make_train_val_dir()
# convert labelme object to yolo format object, and save them to files
# also get image from labelme json file and save them under images folder
dirs = ('train/', 'val/', 'test/')
dirs = ("train/", "val/", "test/")
names = (train_json_names, val_json_names, test_json_names)
for target_dir, json_names in zip(dirs, names):
target_part = target_dir.replace("/", "")
logger.info("Converting %s set ...", target_part)
for json_name in tqdm.tqdm(json_names):
self.covert_json_to_text(target_dir, json_name)
with Pool(NUM_THREADS) as pool:
for json_name in json_names:
pool.apply_async(self.covert_json_to_text,
args=(target_dir, json_name))
pool.close()
pool.join()
print('Generating dataset.yaml file ...')
self._save_dataset_yaml()
def covert_json_to_text(self, target_dir, json_name):
"""Convert json file to yolo format text file and save them to files"""
json_path = os.path.join(self._json_dir, json_name)
with open(json_path, encoding="utf-8") as file:
with open(json_name, encoding="utf-8") as file:
json_data = json.load(file)
print(f"Converting {json_name} for {target_dir.replace('/', '')} ...")
img_path = save_yolo_image(json_data,
json_path,
self._image_dir_path,
target_dir)
filename: str = uuid.UUID(int=random.Random().getrandbits(128)).hex
image_name = f"{filename}.png"
label_name = f"{filename}.txt"
img_path = save_yolo_image(
json_data, self._json_dir, self._image_dir_path, target_dir, image_name
)
yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
save_yolo_label(json_name,
self._label_dir_path,
target_dir,
yolo_obj_list)
save_yolo_label(yolo_obj_list, self._label_dir_path, target_dir, label_name)
def convert_one(self, json_name):
"""Convert one json file to yolo format text file and save them to files"""
@ -290,14 +258,14 @@ class Labelme2YOLO:
with open(json_path, encoding="utf-8") as file:
json_data = json.load(file)
print(f'Converting {json_name} ...')
img_path = save_yolo_image(json_data, json_name,
self._json_dir, '')
image_name = json_name.replace(".json", ".png")
label_name = json_name.replace(".json", ".txt")
img_path = save_yolo_image(
json_data, self._json_dir, self._image_dir_path, "", image_name
)
yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
save_yolo_label(json_name, self._json_dir,
'', yolo_obj_list)
save_yolo_label(yolo_obj_list, self._label_dir_path, "", label_name)
def _get_yolo_object_list(self, json_data, img_path):
yolo_obj_list = []
@ -306,22 +274,23 @@ class Labelme2YOLO:
for shape in json_data["shapes"]:
# labelme circle shape is different from others
# it only has 2 points, 1st is circle center, 2nd is drag end point
if shape['shape_type'] == 'circle':
yolo_obj = self._get_circle_shape_yolo_object(
shape, img_h, img_w)
if shape["shape_type"] == "circle":
yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w)
else:
yolo_obj = self._get_other_shape_yolo_object(
shape, img_h, img_w)
yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w)
yolo_obj_list.append(yolo_obj)
if yolo_obj:
yolo_obj_list.append(yolo_obj)
return yolo_obj_list
def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
obj_center_x, obj_center_y = shape['points'][0]
obj_center_x, obj_center_y = shape["points"][0]
radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 +
(obj_center_y - shape['points'][1][1]) ** 2)
radius = math.sqrt(
(obj_center_x - shape["points"][1][0]) ** 2
+ (obj_center_y - shape["points"][1][1]) ** 2
)
obj_w = 2 * radius
obj_h = 2 * radius
@ -330,46 +299,55 @@ class Labelme2YOLO:
yolo_w = round(float(obj_w / img_w), 6)
yolo_h = round(float(obj_h / img_h), 6)
if shape['label'] in self._label_id_map:
label_id = self._label_id_map[shape['label']]
if shape["label"]:
label = shape["label"]
if label not in self._label_list:
self._update_id_map(label)
label_id = self._label_id_map[shape["label"]]
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
raise f"label {shape['label']} not in {self._label_list}"
return None
def _get_other_shape_yolo_object(self, shape, img_h, img_w):
point_list = shape['points']
point_list = shape["points"]
points = np.zeros(2 * len(point_list))
points[::2] = [float(point[0]) / img_w for point in point_list]
points[1::2] = [float(point[1]) / img_h for point in point_list]
if len(points) == 4:
if self._output_format == "polygon":
points = extend_point_list(points)
if self._output_format == "bbox":
points = extend_point_list(points, "bbox")
if shape['label'] in self._label_id_map:
label_id = self._label_id_map[shape['label']]
if shape["label"]:
label = shape["label"]
if label not in self._label_list:
self._update_id_map(label)
label_id = self._label_id_map[shape["label"]]
return label_id, points.tolist()
raise f"label {shape['label']} not in {self._label_list}"
return None
def _save_dataset_yaml(self):
yaml_path = os.path.join(
self._json_dir, 'YOLODataset/', 'dataset.yaml')
yaml_path = os.path.join(self._json_dir, "YOLODataset/", "dataset.yaml")
with open(yaml_path, 'w+', encoding="utf-8") as yaml_file:
train_dir = os.path.join(self._image_dir_path, 'train/')
val_dir = os.path.join(self._image_dir_path, 'val/')
test_dir = os.path.join(self._image_dir_path, 'test/')
with open(yaml_path, "w+", encoding="utf-8") as yaml_file:
train_dir = os.path.join(self._image_dir_path, "train/")
val_dir = os.path.join(self._image_dir_path, "val/")
test_dir = os.path.join(self._image_dir_path, "test/")
names_str = ''
names_str = ""
for label, _ in self._label_id_map.items():
names_str += f"\"{label}\", "
names_str += f'"{label}", '
names_str = names_str.rstrip(", ")
content = (f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n"
f"nc: {len(self._label_id_map)}\n"
f"names: [{names_str}]")
content = (
f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n"
f"nc: {len(self._label_id_map)}\n"
f"names: [{names_str}]"
)
yaml_file.write(content)