update export data format: yolo polygon format

This commit is contained in:
greatx 2022-11-25 10:14:06 +08:00
parent ee76d5da04
commit 639cbcad0d
3 changed files with 111 additions and 109 deletions

View File

@ -8,6 +8,11 @@
Help converting LabelMe Annotation Tool JSON format to YOLO text file format. Help converting LabelMe Annotation Tool JSON format to YOLO text file format.
If you've already marked your segmentation dataset by LabelMe, it's easy to use this tool to help converting to YOLO format dataset. If you've already marked your segmentation dataset by LabelMe, it's easy to use this tool to help converting to YOLO format dataset.
---------
## New
- export data as yolo polygon annotation (for YOLOv5 v7.0 segmentation)
## Parameters Explain ## Parameters Explain
**--json_dir** LabelMe JSON files folder path. **--json_dir** LabelMe JSON files folder path.

View File

@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2022-present Wang Xin <xinwang614@gmail.com> # SPDX-FileCopyrightText: 2022-present Wang Xin <xinwang614@gmail.com>
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
__version__ = '0.0.1' __version__ = '0.0.5'

View File

@ -123,214 +123,211 @@ def apply_exif_orientation(image):
else: else:
return image return image
class Labelme2YOLO(object): class Labelme2YOLO(object):
def __init__(self, json_dir): def __init__(self, json_dir):
self._json_dir = json_dir self._json_dir = json_dir
self._label_id_map = self._get_label_id_map(self._json_dir) self._label_id_map = self._get_label_id_map(self._json_dir)
def _make_train_val_dir(self): def _make_train_val_dir(self):
self._label_dir_path = os.path.join(self._json_dir, self._label_dir_path = os.path.join(self._json_dir,
'YOLODataset/labels/') 'YOLODataset/labels/')
self._image_dir_path = os.path.join(self._json_dir, self._image_dir_path = os.path.join(self._json_dir,
'YOLODataset/images/') 'YOLODataset/images/')
for yolo_path in (os.path.join(self._label_dir_path + 'train/'), for yolo_path in (os.path.join(self._label_dir_path + 'train/'),
os.path.join(self._label_dir_path + 'val/'), os.path.join(self._label_dir_path + 'val/'),
os.path.join(self._label_dir_path + 'test/'), os.path.join(self._label_dir_path + 'test/'),
os.path.join(self._image_dir_path + 'train/'), os.path.join(self._image_dir_path + 'train/'),
os.path.join(self._image_dir_path + 'val/'), os.path.join(self._image_dir_path + 'val/'),
os.path.join(self._image_dir_path + 'test/')): os.path.join(self._image_dir_path + 'test/')):
if os.path.exists(yolo_path): if os.path.exists(yolo_path):
shutil.rmtree(yolo_path) shutil.rmtree(yolo_path)
os.makedirs(yolo_path) os.makedirs(yolo_path)
def _get_label_id_map(self, json_dir): def _get_label_id_map(self, json_dir):
label_set = set() label_set = set()
for file_name in os.listdir(json_dir): for file_name in os.listdir(json_dir):
if file_name.endswith('json'): if file_name.endswith('json'):
json_path = os.path.join(json_dir, file_name) json_path = os.path.join(json_dir, file_name)
data = json.load(open(json_path)) data = json.load(open(json_path))
for shape in data['shapes']: for shape in data['shapes']:
label_set.add(shape['label']) label_set.add(shape['label'])
return OrderedDict([(label, label_id) \ return OrderedDict([(label, label_id)
for label_id, label in enumerate(label_set)]) for label_id, label in enumerate(label_set)])
def _train_test_split(self, folders, json_names, val_size, test_size): def _train_test_split(self, folders, json_names, val_size, test_size):
if len(folders) > 0 and 'train' in folders and 'val' in folders and 'test' in folders: if len(folders) > 0 and 'train' in folders and 'val' in folders and 'test' in folders:
train_folder = os.path.join(self._json_dir, 'train/') train_folder = os.path.join(self._json_dir, 'train/')
train_json_names = [train_sample_name + '.json' \ train_json_names = [train_sample_name + '.json'
for train_sample_name in os.listdir(train_folder) \ for train_sample_name in os.listdir(train_folder)
if os.path.isdir(os.path.join(train_folder, train_sample_name))] if os.path.isdir(os.path.join(train_folder, train_sample_name))]
val_folder = os.path.join(self._json_dir, 'val/') val_folder = os.path.join(self._json_dir, 'val/')
val_json_names = [val_sample_name + '.json' \ val_json_names = [val_sample_name + '.json'
for val_sample_name in os.listdir(val_folder) \ for val_sample_name in os.listdir(val_folder)
if os.path.isdir(os.path.join(val_folder, val_sample_name))] if os.path.isdir(os.path.join(val_folder, val_sample_name))]
test_folder = os.path.join(self._json_dir, 'test/') test_folder = os.path.join(self._json_dir, 'test/')
test_json_names = [test_sample_name + '.json' \ test_json_names = [test_sample_name + '.json'
for test_sample_name in os.listdir(test_folder) \ for test_sample_name in os.listdir(test_folder)
if os.path.isdir(os.path.join(test_folder, test_sample_name))] if os.path.isdir(os.path.join(test_folder, test_sample_name))]
return train_json_names, val_json_names, test_json_names return train_json_names, val_json_names, test_json_names
train_idxs, val_idxs = train_test_split(range(len(json_names)), train_idxs, val_idxs = train_test_split(range(len(json_names)),
test_size=val_size) test_size=val_size)
tmp_train_len = len(train_idxs) tmp_train_len = len(train_idxs)
test_idxs = [] test_idxs = []
if test_size > 1e-8: if test_size > 1e-8:
train_idxs, test_idxs = train_test_split(range(tmp_train_len), test_size=test_size / (1 - val_size)) train_idxs, test_idxs = train_test_split(
range(tmp_train_len), test_size=test_size / (1 - val_size))
train_json_names = [json_names[train_idx] for train_idx in train_idxs] train_json_names = [json_names[train_idx] for train_idx in train_idxs]
val_json_names = [json_names[val_idx] for val_idx in val_idxs] val_json_names = [json_names[val_idx] for val_idx in val_idxs]
test_json_names = [json_names[test_idx] for test_idx in test_idxs] test_json_names = [json_names[test_idx] for test_idx in test_idxs]
return train_json_names, val_json_names, test_json_names return train_json_names, val_json_names, test_json_names
def convert(self, val_size, test_size): def convert(self, val_size, test_size):
json_names = [file_name for file_name in os.listdir(self._json_dir) \ json_names = [file_name for file_name in os.listdir(self._json_dir)
if os.path.isfile(os.path.join(self._json_dir, file_name)) and \ if os.path.isfile(os.path.join(self._json_dir, file_name)) and
file_name.endswith('.json')] file_name.endswith('.json')]
folders = [file_name for file_name in os.listdir(self._json_dir) \ folders = [file_name for file_name in os.listdir(self._json_dir)
if os.path.isdir(os.path.join(self._json_dir, file_name))] if os.path.isdir(os.path.join(self._json_dir, file_name))]
train_json_names, val_json_names, test_json_names = self._train_test_split(folders, json_names, val_size, test_size) train_json_names, val_json_names, test_json_names = self._train_test_split(
folders, json_names, val_size, test_size)
self._make_train_val_dir() self._make_train_val_dir()
# convert labelme object to yolo format object, and save them to files # convert labelme object to yolo format object, and save them to files
# also get image from labelme json file and save them under images folder # also get image from labelme json file and save them under images folder
for target_dir, json_names in zip(('train/', 'val/', 'test/'), for target_dir, json_names in zip(('train/', 'val/', 'test/'),
(train_json_names, val_json_names, test_json_names)): (train_json_names, val_json_names, test_json_names)):
pool = Pool(os.cpu_count() - 1) pool = Pool(os.cpu_count() - 1)
for json_name in json_names: for json_name in json_names:
pool.apply_async(self.covert_json_to_text, args=(target_dir, json_name)) pool.apply_async(self.covert_json_to_text,
args=(target_dir, json_name))
pool.close() pool.close()
pool.join() pool.join()
print('Generating dataset.yaml file ...') print('Generating dataset.yaml file ...')
self._save_dataset_yaml() self._save_dataset_yaml()
def covert_json_to_text(self, target_dir, json_name): def covert_json_to_text(self, target_dir, json_name):
json_path = os.path.join(self._json_dir, json_name) json_path = os.path.join(self._json_dir, json_name)
json_data = json.load(open(json_path)) json_data = json.load(open(json_path))
print('Converting %s for %s ...' % (json_name, target_dir.replace('/', ''))) print('Converting %s for %s ...' %
(json_name, target_dir.replace('/', '')))
img_path = self._save_yolo_image(json_data,
json_name, img_path = self._save_yolo_image(json_data,
self._image_dir_path, json_name,
target_dir) self._image_dir_path,
target_dir)
yolo_obj_list = self._get_yolo_object_list(json_data, img_path) yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
self._save_yolo_label(json_name, self._save_yolo_label(json_name,
self._label_dir_path, self._label_dir_path,
target_dir, target_dir,
yolo_obj_list) yolo_obj_list)
def convert_one(self, json_name): def convert_one(self, json_name):
json_path = os.path.join(self._json_dir, json_name) json_path = os.path.join(self._json_dir, json_name)
json_data = json.load(open(json_path)) json_data = json.load(open(json_path))
print('Converting %s ...' % json_name) print('Converting %s ...' % json_name)
img_path = self._save_yolo_image(json_data, json_name, img_path = self._save_yolo_image(json_data, json_name,
self._json_dir, '') self._json_dir, '')
yolo_obj_list = self._get_yolo_object_list(json_data, img_path) yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
self._save_yolo_label(json_name, self._json_dir, self._save_yolo_label(json_name, self._json_dir,
'', yolo_obj_list) '', yolo_obj_list)
def _get_yolo_object_list(self, json_data, img_path): def _get_yolo_object_list(self, json_data, img_path):
yolo_obj_list = [] yolo_obj_list = []
img_h, img_w, _ = cv2.imread(img_path).shape img_h, img_w, _ = cv2.imread(img_path).shape
for shape in json_data['shapes']: for shape in json_data['shapes']:
# labelme circle shape is different from others # labelme circle shape is different from others
# it only has 2 points, 1st is circle center, 2nd is drag end point # it only has 2 points, 1st is circle center, 2nd is drag end point
if shape['shape_type'] == 'circle': if shape['shape_type'] == 'circle':
yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w) yolo_obj = self._get_circle_shape_yolo_object(
shape, img_h, img_w)
else: else:
yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w) yolo_obj = self._get_other_shape_yolo_object(
shape, img_h, img_w)
yolo_obj_list.append(yolo_obj) yolo_obj_list.append(yolo_obj)
return yolo_obj_list return yolo_obj_list
def _get_circle_shape_yolo_object(self, shape, img_h, img_w): def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
obj_center_x, obj_center_y = shape['points'][0] obj_center_x, obj_center_y = shape['points'][0]
radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 + radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 +
(obj_center_y - shape['points'][1][1]) ** 2) (obj_center_y - shape['points'][1][1]) ** 2)
obj_w = 2 * radius obj_w = 2 * radius
obj_h = 2 * radius obj_h = 2 * radius
yolo_center_x= round(float(obj_center_x / img_w), 6) yolo_center_x = round(float(obj_center_x / img_w), 6)
yolo_center_y = round(float(obj_center_y / img_h), 6) yolo_center_y = round(float(obj_center_y / img_h), 6)
yolo_w = round(float(obj_w / img_w), 6) yolo_w = round(float(obj_w / img_w), 6)
yolo_h = round(float(obj_h / img_h), 6) yolo_h = round(float(obj_h / img_h), 6)
label_id = self._label_id_map[shape['label']] label_id = self._label_id_map[shape['label']]
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
def _get_other_shape_yolo_object(self, shape, img_h, img_w): def _get_other_shape_yolo_object(self, shape, img_h, img_w):
def __get_object_desc(obj_port_list):
__get_dist = lambda int_list: max(int_list) - min(int_list) point_list = shape['points']
points = np.zeros(2 * len(point_list))
x_lists = [port[0] for port in obj_port_list] points[::2] = [float(point[0]) / img_w for point in point_list]
y_lists = [port[1] for port in obj_port_list] points[1::2] = [float(point[1]) / img_h for point in point_list]
return min(x_lists), __get_dist(x_lists), min(y_lists), __get_dist(y_lists)
obj_x_min, obj_w, obj_y_min, obj_h = __get_object_desc(shape['points'])
yolo_center_x= round(float((obj_x_min + obj_w / 2.0) / img_w), 6)
yolo_center_y = round(float((obj_y_min + obj_h / 2.0) / img_h), 6)
yolo_w = round(float(obj_w / img_w), 6)
yolo_h = round(float(obj_h / img_h), 6)
label_id = self._label_id_map[shape['label']] label_id = self._label_id_map[shape['label']]
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h return (label_id, points.tolist())
def _save_yolo_label(self, json_name, label_dir_path, target_dir, yolo_obj_list): def _save_yolo_label(self, json_name, label_dir_path, target_dir, yolo_obj_list):
txt_path = os.path.join(label_dir_path, txt_path = os.path.join(label_dir_path,
target_dir, target_dir,
json_name.replace('.json', '.txt')) json_name.replace('.json', '.txt'))
with open(txt_path, 'w+') as f: with open(txt_path, 'w+') as f:
for yolo_obj_idx, yolo_obj in enumerate(yolo_obj_list): for yolo_obj in yolo_obj_list:
yolo_obj_line = '%s %s %s %s %s\n' % yolo_obj \ label, points = yolo_obj
if yolo_obj_idx + 1 != len(yolo_obj_list) else \ points = [str(item) for item in points]
'%s %s %s %s %s' % yolo_obj yolo_obj_line = f"{label} {' '.join(points)}\n"
f.write(yolo_obj_line) f.write(yolo_obj_line)
def _save_yolo_image(self, json_data, json_name, image_dir_path, target_dir): def _save_yolo_image(self, json_data, json_name, image_dir_path, target_dir):
img_name = json_name.replace('.json', '.png') img_name = json_name.replace('.json', '.png')
img_path = os.path.join(image_dir_path, target_dir,img_name) img_path = os.path.join(image_dir_path, target_dir, img_name)
if not os.path.exists(img_path): if not os.path.exists(img_path):
img = img_b64_to_arr(json_data['imageData']) img = img_b64_to_arr(json_data['imageData'])
PIL.Image.fromarray(img).save(img_path) PIL.Image.fromarray(img).save(img_path)
return img_path return img_path
def _save_dataset_yaml(self): def _save_dataset_yaml(self):
yaml_path = os.path.join(self._json_dir, 'YOLODataset/', 'dataset.yaml') yaml_path = os.path.join(
self._json_dir, 'YOLODataset/', 'dataset.yaml')
with open(yaml_path, 'w+') as yaml_file: with open(yaml_path, 'w+') as yaml_file:
yaml_file.write('train: %s\n' % \ yaml_file.write('train: %s\n' %
os.path.join(self._image_dir_path, 'train/')) os.path.join(self._image_dir_path, 'train/'))
yaml_file.write('val: %s\n\n' % \ yaml_file.write('val: %s\n' %
os.path.join(self._image_dir_path, 'val/')) os.path.join(self._image_dir_path, 'val/'))
yaml_file.write('test: %s\n\n' % \ yaml_file.write('test: %s\n' %
os.path.join(self._image_dir_path, 'test/')) os.path.join(self._image_dir_path, 'test/'))
yaml_file.write('nc: %i\n\n' % len(self._label_id_map)) yaml_file.write('nc: %i\n' % len(self._label_id_map))
names_str = '' names_str = ''
for label, _ in self._label_id_map.items(): for label, _ in self._label_id_map.items():
names_str += "'%s', " % label names_str += "'%s', " % label