diff --git a/README.md b/README.md index 5e283bb..6df8de5 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,12 @@ Help converting LabelMe Annotation Tool JSON format to YOLO text file format. If you've already marked your segmentation dataset by LabelMe, it's easy to use this tool to help converting to YOLO format dataset. +--------- + +## New + +- export data as yolo polygon annotation (for YOLOv5 v7.0 segmentation) + ## Installation ```console diff --git a/src/labelme2yolo/__about__.py b/src/labelme2yolo/__about__.py index 118c1e3..2fc3af0 100644 --- a/src/labelme2yolo/__about__.py +++ b/src/labelme2yolo/__about__.py @@ -1,4 +1,5 @@ # SPDX-FileCopyrightText: 2022-present Wang Xin # # SPDX-License-Identifier: MIT -__version__ = "0.0.2" + +__version__ = '0.0.5' diff --git a/src/labelme2yolo/l2y.py b/src/labelme2yolo/l2y.py index a93ee33..6075c01 100644 --- a/src/labelme2yolo/l2y.py +++ b/src/labelme2yolo/l2y.py @@ -119,122 +119,131 @@ def save_yolo_image(json_data, json_name, image_dir_path, target_dir): return img_path + class Labelme2YOLO(object): + def __init__(self, json_dir): self._json_dir = json_dir - self._label_id_map = get_label_id_map(self._json_dir) + self._label_id_map = self._get_label_id_map(self._json_dir) def _make_train_val_dir(self): - self._label_dir_path = os.path.join(self._json_dir, "YOLODataset/labels/") - self._image_dir_path = os.path.join(self._json_dir, "YOLODataset/images/") + self._label_dir_path = os.path.join(self._json_dir, + 'YOLODataset/labels/') + self._image_dir_path = os.path.join(self._json_dir, + 'YOLODataset/images/') - for yolo_path in ( - os.path.join(self._label_dir_path + "train/"), - os.path.join(self._label_dir_path + "val/"), - os.path.join(self._label_dir_path + "test/"), - os.path.join(self._image_dir_path + "train/"), - os.path.join(self._image_dir_path + "val/"), - os.path.join(self._image_dir_path + "test/"), - ): + for yolo_path in (os.path.join(self._label_dir_path + 'train/'), + os.path.join(self._label_dir_path + 'val/'), + os.path.join(self._label_dir_path + 'test/'), + os.path.join(self._image_dir_path + 'train/'), + os.path.join(self._image_dir_path + 'val/'), + os.path.join(self._image_dir_path + 'test/')): if os.path.exists(yolo_path): shutil.rmtree(yolo_path) os.makedirs(yolo_path) + def _get_label_id_map(self, json_dir): + label_set = set() + + for file_name in os.listdir(json_dir): + if file_name.endswith('json'): + json_path = os.path.join(json_dir, file_name) + data = json.load(open(json_path)) + for shape in data['shapes']: + label_set.add(shape['label']) + + return OrderedDict([(label, label_id) + for label_id, label in enumerate(label_set)]) + def _train_test_split(self, folders, json_names, val_size, test_size): - if ( - len(folders) > 0 - and "train" in folders - and "val" in folders - and "test" in folders - ): - train_json_names = self.get_json_names("train/") - val_json_names = self.get_json_names("val/") - test_json_names = self.get_json_names("test/") + if len(folders) > 0 and 'train' in folders and 'val' in folders and 'test' in folders: + train_folder = os.path.join(self._json_dir, 'train/') + train_json_names = [train_sample_name + '.json' + for train_sample_name in os.listdir(train_folder) + if os.path.isdir(os.path.join(train_folder, train_sample_name))] + + val_folder = os.path.join(self._json_dir, 'val/') + val_json_names = [val_sample_name + '.json' + for val_sample_name in os.listdir(val_folder) + if os.path.isdir(os.path.join(val_folder, val_sample_name))] + + test_folder = os.path.join(self._json_dir, 'test/') + test_json_names = [test_sample_name + '.json' + for test_sample_name in os.listdir(test_folder) + if os.path.isdir(os.path.join(test_folder, test_sample_name))] return train_json_names, val_json_names, test_json_names - train_indexes, val_indexes = train_test_split( - range(len(json_names)), test_size=val_size - ) - tmp_train_len = len(train_indexes) - test_indexes = [] - if test_size: - train_indexes, test_indexes = train_test_split( - range(tmp_train_len), test_size=test_size / (1 - val_size) - ) - train_json_names = [json_names[train_idx] for train_idx in train_indexes] - val_json_names = [json_names[val_idx] for val_idx in val_indexes] - test_json_names = [json_names[test_idx] for test_idx in test_indexes] + train_idxs, val_idxs = train_test_split(range(len(json_names)), + test_size=val_size) + tmp_train_len = len(train_idxs) + test_idxs = [] + if test_size > 1e-8: + train_idxs, test_idxs = train_test_split( + range(tmp_train_len), test_size=test_size / (1 - val_size)) + train_json_names = [json_names[train_idx] for train_idx in train_idxs] + val_json_names = [json_names[val_idx] for val_idx in val_idxs] + test_json_names = [json_names[test_idx] for test_idx in test_idxs] return train_json_names, val_json_names, test_json_names - def get_json_names(self, data_type: str): - data_folder = os.path.join(self._json_dir, data_type) - data_json_names = [ - data_sample_name + ".json" - for data_sample_name in os.listdir(data_folder) - if os.path.isdir(os.path.join(data_folder, data_sample_name)) - ] - return data_json_names - def convert(self, val_size, test_size): - json_names = [ - file_name - for file_name in os.listdir(self._json_dir) - if os.path.isfile(os.path.join(self._json_dir, file_name)) - and file_name.endswith(".json") - ] - folders = [ - file_name - for file_name in os.listdir(self._json_dir) - if os.path.isdir(os.path.join(self._json_dir, file_name)) - ] + json_names = [file_name for file_name in os.listdir(self._json_dir) + if os.path.isfile(os.path.join(self._json_dir, file_name)) and + file_name.endswith('.json')] + folders = [file_name for file_name in os.listdir(self._json_dir) + if os.path.isdir(os.path.join(self._json_dir, file_name))] train_json_names, val_json_names, test_json_names = self._train_test_split( - folders, json_names, val_size, test_size - ) + folders, json_names, val_size, test_size) self._make_train_val_dir() # convert labelme object to yolo format object, and save them to files # also get image from labelme json file and save them under images folder - for target_dir, json_names in zip( - ("train/", "val/", "test/"), - (train_json_names, val_json_names, test_json_names), - ): - pool = Pool(NUM_THREADS) + for target_dir, json_names in zip(('train/', 'val/', 'test/'), + (train_json_names, val_json_names, test_json_names)): + pool = Pool(os.cpu_count() - 1) + for json_name in json_names: - pool.apply_async(self.covert_json_to_text, args=(target_dir, json_name)) + pool.apply_async(self.covert_json_to_text, + args=(target_dir, json_name)) pool.close() pool.join() - print("Generating dataset.yaml file ...") + print('Generating dataset.yaml file ...') self._save_dataset_yaml() def covert_json_to_text(self, target_dir, json_name): json_path = os.path.join(self._json_dir, json_name) json_data = json.load(open(json_path)) - print("Converting %s for %s ..." % (json_name, target_dir.replace("/", ""))) - - img_path = save_yolo_image( - json_data, json_name, self._image_dir_path, target_dir - ) + print('Converting %s for %s ...' % + (json_name, target_dir.replace('/', ''))) + img_path = self._save_yolo_image(json_data, + json_name, + self._image_dir_path, + target_dir) yolo_obj_list = self._get_yolo_object_list(json_data, img_path) - save_yolo_label(json_name, self._label_dir_path, target_dir, yolo_obj_list) + self._save_yolo_label(json_name, + self._label_dir_path, + target_dir, + yolo_obj_list) def convert_one(self, json_name): json_path = os.path.join(self._json_dir, json_name) json_data = json.load(open(json_path)) - print("Converting %s ..." % json_name) + print('Converting %s ...' % json_name) - img_path = save_yolo_image(json_data, json_name, self._json_dir, "") + img_path = self._save_yolo_image(json_data, json_name, + self._json_dir, '') yolo_obj_list = self._get_yolo_object_list(json_data, img_path) - save_yolo_label(json_name, self._json_dir, "", yolo_obj_list) + self._save_yolo_label(json_name, self._json_dir, + '', yolo_obj_list) def _get_yolo_object_list(self, json_data, img_path): yolo_obj_list = [] @@ -243,22 +252,22 @@ class Labelme2YOLO(object): for shape in json_data["shapes"]: # labelme circle shape is different from others # it only has 2 points, 1st is circle center, 2nd is drag end point - if shape["shape_type"] == "circle": - yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w) + if shape['shape_type'] == 'circle': + yolo_obj = self._get_circle_shape_yolo_object( + shape, img_h, img_w) else: - yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w) + yolo_obj = self._get_other_shape_yolo_object( + shape, img_h, img_w) yolo_obj_list.append(yolo_obj) return yolo_obj_list def _get_circle_shape_yolo_object(self, shape, img_h, img_w): - obj_center_x, obj_center_y = shape["points"][0] + obj_center_x, obj_center_y = shape['points'][0] - radius = math.sqrt( - (obj_center_x - shape["points"][1][0]) ** 2 - + (obj_center_y - shape["points"][1][1]) ** 2 - ) + radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 + + (obj_center_y - shape['points'][1][1]) ** 2) obj_w = 2 * radius obj_h = 2 * radius @@ -267,41 +276,57 @@ class Labelme2YOLO(object): yolo_w = round(float(obj_w / img_w), 6) yolo_h = round(float(obj_h / img_h), 6) - label_id = self._label_id_map[shape["label"]] + label_id = self._label_id_map[shape['label']] return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h def _get_other_shape_yolo_object(self, shape, img_h, img_w): - def __get_object_desc(obj_port_list): - def __get_dist(int_list): - return max(int_list) - min(int_list) - x_lists = [port[0] for port in obj_port_list] - y_lists = [port[1] for port in obj_port_list] + point_list = shape['points'] + points = np.zeros(2 * len(point_list)) + points[::2] = [float(point[0]) / img_w for point in point_list] + points[1::2] = [float(point[1]) / img_h for point in point_list] + label_id = self._label_id_map[shape['label']] - return min(x_lists), __get_dist(x_lists), min(y_lists), __get_dist(y_lists) + return (label_id, points.tolist()) - obj_x_min, obj_w, obj_y_min, obj_h = __get_object_desc(shape["points"]) + def _save_yolo_label(self, json_name, label_dir_path, target_dir, yolo_obj_list): + txt_path = os.path.join(label_dir_path, + target_dir, + json_name.replace('.json', '.txt')) - yolo_center_x = round(float((obj_x_min + obj_w / 2.0) / img_w), 6) - yolo_center_y = round(float((obj_y_min + obj_h / 2.0) / img_h), 6) - yolo_w = round(float(obj_w / img_w), 6) - yolo_h = round(float(obj_h / img_h), 6) + with open(txt_path, 'w+') as f: + for yolo_obj in yolo_obj_list: + label, points = yolo_obj + points = [str(item) for item in points] + yolo_obj_line = f"{label} {' '.join(points)}\n" + f.write(yolo_obj_line) - label_id = self._label_id_map[shape["label"]] + def _save_yolo_image(self, json_data, json_name, image_dir_path, target_dir): + img_name = json_name.replace('.json', '.png') + img_path = os.path.join(image_dir_path, target_dir, img_name) - return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h + if not os.path.exists(img_path): + img = img_b64_to_arr(json_data['imageData']) + PIL.Image.fromarray(img).save(img_path) + + return img_path def _save_dataset_yaml(self): - yaml_path = os.path.join(self._json_dir, "YOLODataset", "dataset.yaml") + yaml_path = os.path.join( + self._json_dir, 'YOLODataset/', 'dataset.yaml') - with open(yaml_path, "w+") as yaml_file: - yaml_file.write("train: %s\n" % os.path.join(self._image_dir_path, "train")) - yaml_file.write("val: %s\n\n" % os.path.join(self._image_dir_path, "val")) - yaml_file.write("test: %s\n\n" % os.path.join(self._image_dir_path, "test")) - yaml_file.write("nc: %i\n\n" % len(self._label_id_map)) + with open(yaml_path, 'w+') as yaml_file: + yaml_file.write('train: %s\n' % + os.path.join(self._image_dir_path, 'train/')) + yaml_file.write('val: %s\n' % + os.path.join(self._image_dir_path, 'val/')) + yaml_file.write('test: %s\n' % + os.path.join(self._image_dir_path, 'test/')) + yaml_file.write('nc: %i\n' % len(self._label_id_map)) - names_str = "" + names_str = '' + for label, _ in self._label_id_map.items(): names_str += "'%s', " % label names_str = names_str.rstrip(", ")