From cc4171e182c65f82ef51e98f04da9e9f868bd3e2 Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Tue, 19 Dec 2023 15:22:47 +0800 Subject: [PATCH] support recursive search dirs (#38) * support recursive search dirs * fix pylint error --- requirements.txt | 1 + src/labelme2yolo/__about__.py | 2 +- src/labelme2yolo/l2y.py | 234 +++++++++++++++------------------- 3 files changed, 108 insertions(+), 129 deletions(-) diff --git a/requirements.txt b/requirements.txt index 64ea0ae..c525125 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ opencv-python Pillow numpy +tqdm diff --git a/src/labelme2yolo/__about__.py b/src/labelme2yolo/__about__.py index 76a87c9..2387de3 100644 --- a/src/labelme2yolo/__about__.py +++ b/src/labelme2yolo/__about__.py @@ -4,4 +4,4 @@ ''' about version ''' -__version__ = '0.1.3' +__version__ = '0.1.4' diff --git a/src/labelme2yolo/l2y.py b/src/labelme2yolo/l2y.py index 850e177..9da24cd 100644 --- a/src/labelme2yolo/l2y.py +++ b/src/labelme2yolo/l2y.py @@ -5,26 +5,30 @@ Created on Aug 18, 2021 @author: GreatV(Wang Xin) """ import base64 +import glob import io import json import math import os import random import shutil -from collections import OrderedDict -from multiprocessing import Pool +import uuid +import logging import PIL.ExifTags import PIL.Image import PIL.ImageOps import cv2 import numpy as np +import tqdm +# set seed random.seed(12345678) +random.Random().seed(12345678) np.random.seed(12345678) -# number of LabelMe2YOLO multiprocessing threads -NUM_THREADS = max(1, os.cpu_count() - 1) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("labelme2yolo") def train_test_split(dataset_index, test_size=0.2): @@ -97,21 +101,6 @@ def img_data_to_png_data(img_data): return f_in.read() -def get_label_id_map(json_dir: str): - """Get label id map from json files in json_dir""" - label_set = set() - - for file_name in os.listdir(json_dir): - if file_name.endswith("json"): - json_path = os.path.join(json_dir, file_name) - with open(json_path, encoding="utf-8") as file: - data = json.load(file) - for shape in data["shapes"]: - label_set.add(shape["label"]) - - return OrderedDict([(label, label_id) for label_id, label in enumerate(label_set)]) - - def extend_point_list(point_list, out_format="polygon"): """Extend point list to polygon or bbox""" x_min = min(float(point) for point in point_list[::2]) @@ -131,32 +120,24 @@ def extend_point_list(point_list, out_format="polygon"): return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max]) -def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list): +def save_yolo_label(obj_list, label_dir, target_dir, target_name): """Save yolo label to txt file""" - txt_path = os.path.join(label_dir_path, - target_dir, - json_name.replace(".json", ".txt")) + txt_path = os.path.join(label_dir, target_dir, target_name) with open(txt_path, "w+", encoding="utf-8") as file: - for yolo_obj in yolo_obj_list: - label, points = yolo_obj + for label, points in obj_list: points = [str(item) for item in points] - yolo_obj_line = f"{label} {' '.join(points)}\n" - file.write(yolo_obj_line) + line = f"{label} {' '.join(points)}\n" + file.write(line) -def save_yolo_image(json_data, json_path, image_dir_path, target_dir): +def save_yolo_image(json_data, json_dir, image_dir, target_dir, target_name): """Save yolo image to image_dir_path/target_dir""" - json_name = os.path.basename(json_path) - img_name = json_name.replace(".json", ".png") - - # make image_path and save image - img_path = os.path.join(image_dir_path, target_dir, img_name) + img_path = os.path.join(image_dir, target_dir, target_name) if json_data["imageData"] is None: - dirname = os.path.dirname(json_path) image_name = json_data["imagePath"] - src_image_name = os.path.join(dirname, image_name) + src_image_name = os.path.join(json_dir, image_name) src_image = cv2.imread(src_image_name) cv2.imwrite(img_path, src_image) else: @@ -170,31 +151,36 @@ class Labelme2YOLO: """Labelme to YOLO format converter""" def __init__(self, json_dir, output_format, label_list): - self._json_dir = json_dir + self._json_dir = os.path.expanduser(json_dir) self._output_format = output_format - self._label_list = label_list + self._label_list = [] + self._label_id_map = {} self._label_dir_path = "" self._image_dir_path = "" if label_list: - self._label_id_map = {label: label_id - for label_id, label in enumerate(label_list)} - else: - self._label_id_map = get_label_id_map(self._json_dir) - self._label_list = list(self._label_id_map.keys()) + self._label_list = label_list + self._label_id_map = { + label: label_id for label_id, label in enumerate(label_list) + } + + def _update_id_map(self, label: str): + if label not in self._label_list: + self._label_list.append(label) + self._label_id_map[label] = len(self._label_id_map) def _make_train_val_dir(self): - self._label_dir_path = os.path.join(self._json_dir, - 'YOLODataset/labels/') - self._image_dir_path = os.path.join(self._json_dir, - 'YOLODataset/images/') + self._label_dir_path = os.path.join(self._json_dir, "YOLODataset/labels/") + self._image_dir_path = os.path.join(self._json_dir, "YOLODataset/images/") - for yolo_path in (os.path.join(self._label_dir_path + 'train/'), - os.path.join(self._label_dir_path + 'val/'), - os.path.join(self._label_dir_path + 'test/'), - os.path.join(self._image_dir_path + 'train/'), - os.path.join(self._image_dir_path + 'val/'), - os.path.join(self._image_dir_path + 'test/')): + for yolo_path in ( + os.path.join(self._label_dir_path + "train/"), + os.path.join(self._label_dir_path + "val/"), + os.path.join(self._label_dir_path + "test/"), + os.path.join(self._image_dir_path + "train/"), + os.path.join(self._image_dir_path + "val/"), + os.path.join(self._image_dir_path + "test/"), + ): if os.path.exists(yolo_path): shutil.rmtree(yolo_path) @@ -207,31 +193,21 @@ class Labelme2YOLO: for sample_name in os.listdir(set_folder): set_dir = os.path.join(set_folder, sample_name) if os.path.isdir(set_dir): - json_names.append(sample_name + '.json') + json_names.append(sample_name + ".json") return json_names - def _train_test_split(self, folders, json_names, val_size, test_size): + def _train_test_split(self, json_names, val_size, test_size): """Split json names to train, val, test""" - if (len(folders) > 0 and - 'train' in folders and - 'val' in folders and - 'test' in folders): - train_json_names = self._get_dataset_part_json_names('train') - val_json_names = self._get_dataset_part_json_names('val') - test_json_names = self._get_dataset_part_json_names('test') - - return train_json_names, val_json_names, test_json_names - total_size = len(json_names) dataset_index = list(range(total_size)) - train_ids, val_ids = train_test_split(dataset_index, - test_size=val_size) + train_ids, val_ids = train_test_split(dataset_index, test_size=val_size) test_ids = [] if test_size is None: test_size = 0.0 if test_size > 1e-8: train_ids, test_ids = train_test_split( - train_ids, test_size=test_size / (1 - val_size)) + train_ids, test_size=test_size / (1 - val_size) + ) train_json_names = [json_names[train_idx] for train_idx in train_ids] val_json_names = [json_names[val_idx] for val_idx in val_ids] test_json_names = [json_names[test_idx] for test_idx in test_ids] @@ -240,49 +216,41 @@ class Labelme2YOLO: def convert(self, val_size, test_size): """Convert labelme format to yolo format""" - json_names = [file_name for file_name in os.listdir(self._json_dir) - if os.path.isfile(os.path.join(self._json_dir, file_name)) and - file_name.endswith('.json')] - folders = [file_name for file_name in os.listdir(self._json_dir) - if os.path.isdir(os.path.join(self._json_dir, file_name))] + json_names = glob.glob( + os.path.join(self._json_dir, "**", "*.json"), recursive=True + ) + json_names = sorted(json_names) train_json_names, val_json_names, test_json_names = self._train_test_split( - folders, json_names, val_size, test_size) + json_names, val_size, test_size + ) self._make_train_val_dir() # convert labelme object to yolo format object, and save them to files # also get image from labelme json file and save them under images folder - dirs = ('train/', 'val/', 'test/') + dirs = ("train/", "val/", "test/") names = (train_json_names, val_json_names, test_json_names) for target_dir, json_names in zip(dirs, names): + target_part = target_dir.replace("/", "") + logger.info("Converting %s set ...", target_part) + for json_name in tqdm.tqdm(json_names): + self.covert_json_to_text(target_dir, json_name) - with Pool(NUM_THREADS) as pool: - for json_name in json_names: - pool.apply_async(self.covert_json_to_text, - args=(target_dir, json_name)) - pool.close() - pool.join() - - print('Generating dataset.yaml file ...') self._save_dataset_yaml() def covert_json_to_text(self, target_dir, json_name): """Convert json file to yolo format text file and save them to files""" - json_path = os.path.join(self._json_dir, json_name) - with open(json_path, encoding="utf-8") as file: + with open(json_name, encoding="utf-8") as file: json_data = json.load(file) - print(f"Converting {json_name} for {target_dir.replace('/', '')} ...") - - img_path = save_yolo_image(json_data, - json_path, - self._image_dir_path, - target_dir) + filename: str = uuid.UUID(int=random.Random().getrandbits(128)).hex + image_name = f"{filename}.png" + label_name = f"{filename}.txt" + img_path = save_yolo_image( + json_data, self._json_dir, self._image_dir_path, target_dir, image_name + ) yolo_obj_list = self._get_yolo_object_list(json_data, img_path) - save_yolo_label(json_name, - self._label_dir_path, - target_dir, - yolo_obj_list) + save_yolo_label(yolo_obj_list, self._label_dir_path, target_dir, label_name) def convert_one(self, json_name): """Convert one json file to yolo format text file and save them to files""" @@ -290,14 +258,14 @@ class Labelme2YOLO: with open(json_path, encoding="utf-8") as file: json_data = json.load(file) - print(f'Converting {json_name} ...') - - img_path = save_yolo_image(json_data, json_name, - self._json_dir, '') + image_name = json_name.replace(".json", ".png") + label_name = json_name.replace(".json", ".txt") + img_path = save_yolo_image( + json_data, self._json_dir, self._image_dir_path, "", image_name + ) yolo_obj_list = self._get_yolo_object_list(json_data, img_path) - save_yolo_label(json_name, self._json_dir, - '', yolo_obj_list) + save_yolo_label(yolo_obj_list, self._label_dir_path, "", label_name) def _get_yolo_object_list(self, json_data, img_path): yolo_obj_list = [] @@ -306,22 +274,23 @@ class Labelme2YOLO: for shape in json_data["shapes"]: # labelme circle shape is different from others # it only has 2 points, 1st is circle center, 2nd is drag end point - if shape['shape_type'] == 'circle': - yolo_obj = self._get_circle_shape_yolo_object( - shape, img_h, img_w) + if shape["shape_type"] == "circle": + yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w) else: - yolo_obj = self._get_other_shape_yolo_object( - shape, img_h, img_w) + yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w) - yolo_obj_list.append(yolo_obj) + if yolo_obj: + yolo_obj_list.append(yolo_obj) return yolo_obj_list def _get_circle_shape_yolo_object(self, shape, img_h, img_w): - obj_center_x, obj_center_y = shape['points'][0] + obj_center_x, obj_center_y = shape["points"][0] - radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 + - (obj_center_y - shape['points'][1][1]) ** 2) + radius = math.sqrt( + (obj_center_x - shape["points"][1][0]) ** 2 + + (obj_center_y - shape["points"][1][1]) ** 2 + ) obj_w = 2 * radius obj_h = 2 * radius @@ -330,46 +299,55 @@ class Labelme2YOLO: yolo_w = round(float(obj_w / img_w), 6) yolo_h = round(float(obj_h / img_h), 6) - if shape['label'] in self._label_id_map: - label_id = self._label_id_map[shape['label']] + if shape["label"]: + label = shape["label"] + if label not in self._label_list: + self._update_id_map(label) + label_id = self._label_id_map[shape["label"]] + return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h - raise f"label {shape['label']} not in {self._label_list}" + return None def _get_other_shape_yolo_object(self, shape, img_h, img_w): - - point_list = shape['points'] + point_list = shape["points"] points = np.zeros(2 * len(point_list)) points[::2] = [float(point[0]) / img_w for point in point_list] points[1::2] = [float(point[1]) / img_h for point in point_list] + if len(points) == 4: if self._output_format == "polygon": points = extend_point_list(points) if self._output_format == "bbox": points = extend_point_list(points, "bbox") - if shape['label'] in self._label_id_map: - label_id = self._label_id_map[shape['label']] + if shape["label"]: + label = shape["label"] + if label not in self._label_list: + self._update_id_map(label) + label_id = self._label_id_map[shape["label"]] + return label_id, points.tolist() - raise f"label {shape['label']} not in {self._label_list}" + return None def _save_dataset_yaml(self): - yaml_path = os.path.join( - self._json_dir, 'YOLODataset/', 'dataset.yaml') + yaml_path = os.path.join(self._json_dir, "YOLODataset/", "dataset.yaml") - with open(yaml_path, 'w+', encoding="utf-8") as yaml_file: - train_dir = os.path.join(self._image_dir_path, 'train/') - val_dir = os.path.join(self._image_dir_path, 'val/') - test_dir = os.path.join(self._image_dir_path, 'test/') + with open(yaml_path, "w+", encoding="utf-8") as yaml_file: + train_dir = os.path.join(self._image_dir_path, "train/") + val_dir = os.path.join(self._image_dir_path, "val/") + test_dir = os.path.join(self._image_dir_path, "test/") - names_str = '' + names_str = "" for label, _ in self._label_id_map.items(): - names_str += f"\"{label}\", " + names_str += f'"{label}", ' names_str = names_str.rstrip(", ") - content = (f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n" - f"nc: {len(self._label_id_map)}\n" - f"names: [{names_str}]") + content = ( + f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n" + f"nc: {len(self._label_id_map)}\n" + f"names: [{names_str}]" + ) yaml_file.write(content)