support recursive search dirs (#38)
* support recursive search dirs * fix pylint error
This commit is contained in:
parent
dba6184a50
commit
cc4171e182
|
@ -1,3 +1,4 @@
|
||||||
opencv-python
|
opencv-python
|
||||||
Pillow
|
Pillow
|
||||||
numpy
|
numpy
|
||||||
|
tqdm
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
'''
|
'''
|
||||||
about version
|
about version
|
||||||
'''
|
'''
|
||||||
__version__ = '0.1.3'
|
__version__ = '0.1.4'
|
||||||
|
|
|
@ -5,26 +5,30 @@ Created on Aug 18, 2021
|
||||||
@author: GreatV(Wang Xin)
|
@author: GreatV(Wang Xin)
|
||||||
"""
|
"""
|
||||||
import base64
|
import base64
|
||||||
|
import glob
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import shutil
|
import shutil
|
||||||
from collections import OrderedDict
|
import uuid
|
||||||
from multiprocessing import Pool
|
import logging
|
||||||
|
|
||||||
import PIL.ExifTags
|
import PIL.ExifTags
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
import PIL.ImageOps
|
import PIL.ImageOps
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import tqdm
|
||||||
|
|
||||||
|
# set seed
|
||||||
random.seed(12345678)
|
random.seed(12345678)
|
||||||
|
random.Random().seed(12345678)
|
||||||
np.random.seed(12345678)
|
np.random.seed(12345678)
|
||||||
|
|
||||||
# number of LabelMe2YOLO multiprocessing threads
|
logging.basicConfig(level=logging.INFO)
|
||||||
NUM_THREADS = max(1, os.cpu_count() - 1)
|
logger = logging.getLogger("labelme2yolo")
|
||||||
|
|
||||||
|
|
||||||
def train_test_split(dataset_index, test_size=0.2):
|
def train_test_split(dataset_index, test_size=0.2):
|
||||||
|
@ -97,21 +101,6 @@ def img_data_to_png_data(img_data):
|
||||||
return f_in.read()
|
return f_in.read()
|
||||||
|
|
||||||
|
|
||||||
def get_label_id_map(json_dir: str):
|
|
||||||
"""Get label id map from json files in json_dir"""
|
|
||||||
label_set = set()
|
|
||||||
|
|
||||||
for file_name in os.listdir(json_dir):
|
|
||||||
if file_name.endswith("json"):
|
|
||||||
json_path = os.path.join(json_dir, file_name)
|
|
||||||
with open(json_path, encoding="utf-8") as file:
|
|
||||||
data = json.load(file)
|
|
||||||
for shape in data["shapes"]:
|
|
||||||
label_set.add(shape["label"])
|
|
||||||
|
|
||||||
return OrderedDict([(label, label_id) for label_id, label in enumerate(label_set)])
|
|
||||||
|
|
||||||
|
|
||||||
def extend_point_list(point_list, out_format="polygon"):
|
def extend_point_list(point_list, out_format="polygon"):
|
||||||
"""Extend point list to polygon or bbox"""
|
"""Extend point list to polygon or bbox"""
|
||||||
x_min = min(float(point) for point in point_list[::2])
|
x_min = min(float(point) for point in point_list[::2])
|
||||||
|
@ -131,32 +120,24 @@ def extend_point_list(point_list, out_format="polygon"):
|
||||||
return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max])
|
return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max])
|
||||||
|
|
||||||
|
|
||||||
def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list):
|
def save_yolo_label(obj_list, label_dir, target_dir, target_name):
|
||||||
"""Save yolo label to txt file"""
|
"""Save yolo label to txt file"""
|
||||||
txt_path = os.path.join(label_dir_path,
|
txt_path = os.path.join(label_dir, target_dir, target_name)
|
||||||
target_dir,
|
|
||||||
json_name.replace(".json", ".txt"))
|
|
||||||
|
|
||||||
with open(txt_path, "w+", encoding="utf-8") as file:
|
with open(txt_path, "w+", encoding="utf-8") as file:
|
||||||
for yolo_obj in yolo_obj_list:
|
for label, points in obj_list:
|
||||||
label, points = yolo_obj
|
|
||||||
points = [str(item) for item in points]
|
points = [str(item) for item in points]
|
||||||
yolo_obj_line = f"{label} {' '.join(points)}\n"
|
line = f"{label} {' '.join(points)}\n"
|
||||||
file.write(yolo_obj_line)
|
file.write(line)
|
||||||
|
|
||||||
|
|
||||||
def save_yolo_image(json_data, json_path, image_dir_path, target_dir):
|
def save_yolo_image(json_data, json_dir, image_dir, target_dir, target_name):
|
||||||
"""Save yolo image to image_dir_path/target_dir"""
|
"""Save yolo image to image_dir_path/target_dir"""
|
||||||
json_name = os.path.basename(json_path)
|
img_path = os.path.join(image_dir, target_dir, target_name)
|
||||||
img_name = json_name.replace(".json", ".png")
|
|
||||||
|
|
||||||
# make image_path and save image
|
|
||||||
img_path = os.path.join(image_dir_path, target_dir, img_name)
|
|
||||||
|
|
||||||
if json_data["imageData"] is None:
|
if json_data["imageData"] is None:
|
||||||
dirname = os.path.dirname(json_path)
|
|
||||||
image_name = json_data["imagePath"]
|
image_name = json_data["imagePath"]
|
||||||
src_image_name = os.path.join(dirname, image_name)
|
src_image_name = os.path.join(json_dir, image_name)
|
||||||
src_image = cv2.imread(src_image_name)
|
src_image = cv2.imread(src_image_name)
|
||||||
cv2.imwrite(img_path, src_image)
|
cv2.imwrite(img_path, src_image)
|
||||||
else:
|
else:
|
||||||
|
@ -170,31 +151,36 @@ class Labelme2YOLO:
|
||||||
"""Labelme to YOLO format converter"""
|
"""Labelme to YOLO format converter"""
|
||||||
|
|
||||||
def __init__(self, json_dir, output_format, label_list):
|
def __init__(self, json_dir, output_format, label_list):
|
||||||
self._json_dir = json_dir
|
self._json_dir = os.path.expanduser(json_dir)
|
||||||
self._output_format = output_format
|
self._output_format = output_format
|
||||||
self._label_list = label_list
|
self._label_list = []
|
||||||
|
self._label_id_map = {}
|
||||||
self._label_dir_path = ""
|
self._label_dir_path = ""
|
||||||
self._image_dir_path = ""
|
self._image_dir_path = ""
|
||||||
|
|
||||||
if label_list:
|
if label_list:
|
||||||
self._label_id_map = {label: label_id
|
self._label_list = label_list
|
||||||
for label_id, label in enumerate(label_list)}
|
self._label_id_map = {
|
||||||
else:
|
label: label_id for label_id, label in enumerate(label_list)
|
||||||
self._label_id_map = get_label_id_map(self._json_dir)
|
}
|
||||||
self._label_list = list(self._label_id_map.keys())
|
|
||||||
|
def _update_id_map(self, label: str):
|
||||||
|
if label not in self._label_list:
|
||||||
|
self._label_list.append(label)
|
||||||
|
self._label_id_map[label] = len(self._label_id_map)
|
||||||
|
|
||||||
def _make_train_val_dir(self):
|
def _make_train_val_dir(self):
|
||||||
self._label_dir_path = os.path.join(self._json_dir,
|
self._label_dir_path = os.path.join(self._json_dir, "YOLODataset/labels/")
|
||||||
'YOLODataset/labels/')
|
self._image_dir_path = os.path.join(self._json_dir, "YOLODataset/images/")
|
||||||
self._image_dir_path = os.path.join(self._json_dir,
|
|
||||||
'YOLODataset/images/')
|
|
||||||
|
|
||||||
for yolo_path in (os.path.join(self._label_dir_path + 'train/'),
|
for yolo_path in (
|
||||||
os.path.join(self._label_dir_path + 'val/'),
|
os.path.join(self._label_dir_path + "train/"),
|
||||||
os.path.join(self._label_dir_path + 'test/'),
|
os.path.join(self._label_dir_path + "val/"),
|
||||||
os.path.join(self._image_dir_path + 'train/'),
|
os.path.join(self._label_dir_path + "test/"),
|
||||||
os.path.join(self._image_dir_path + 'val/'),
|
os.path.join(self._image_dir_path + "train/"),
|
||||||
os.path.join(self._image_dir_path + 'test/')):
|
os.path.join(self._image_dir_path + "val/"),
|
||||||
|
os.path.join(self._image_dir_path + "test/"),
|
||||||
|
):
|
||||||
if os.path.exists(yolo_path):
|
if os.path.exists(yolo_path):
|
||||||
shutil.rmtree(yolo_path)
|
shutil.rmtree(yolo_path)
|
||||||
|
|
||||||
|
@ -207,31 +193,21 @@ class Labelme2YOLO:
|
||||||
for sample_name in os.listdir(set_folder):
|
for sample_name in os.listdir(set_folder):
|
||||||
set_dir = os.path.join(set_folder, sample_name)
|
set_dir = os.path.join(set_folder, sample_name)
|
||||||
if os.path.isdir(set_dir):
|
if os.path.isdir(set_dir):
|
||||||
json_names.append(sample_name + '.json')
|
json_names.append(sample_name + ".json")
|
||||||
return json_names
|
return json_names
|
||||||
|
|
||||||
def _train_test_split(self, folders, json_names, val_size, test_size):
|
def _train_test_split(self, json_names, val_size, test_size):
|
||||||
"""Split json names to train, val, test"""
|
"""Split json names to train, val, test"""
|
||||||
if (len(folders) > 0 and
|
|
||||||
'train' in folders and
|
|
||||||
'val' in folders and
|
|
||||||
'test' in folders):
|
|
||||||
train_json_names = self._get_dataset_part_json_names('train')
|
|
||||||
val_json_names = self._get_dataset_part_json_names('val')
|
|
||||||
test_json_names = self._get_dataset_part_json_names('test')
|
|
||||||
|
|
||||||
return train_json_names, val_json_names, test_json_names
|
|
||||||
|
|
||||||
total_size = len(json_names)
|
total_size = len(json_names)
|
||||||
dataset_index = list(range(total_size))
|
dataset_index = list(range(total_size))
|
||||||
train_ids, val_ids = train_test_split(dataset_index,
|
train_ids, val_ids = train_test_split(dataset_index, test_size=val_size)
|
||||||
test_size=val_size)
|
|
||||||
test_ids = []
|
test_ids = []
|
||||||
if test_size is None:
|
if test_size is None:
|
||||||
test_size = 0.0
|
test_size = 0.0
|
||||||
if test_size > 1e-8:
|
if test_size > 1e-8:
|
||||||
train_ids, test_ids = train_test_split(
|
train_ids, test_ids = train_test_split(
|
||||||
train_ids, test_size=test_size / (1 - val_size))
|
train_ids, test_size=test_size / (1 - val_size)
|
||||||
|
)
|
||||||
train_json_names = [json_names[train_idx] for train_idx in train_ids]
|
train_json_names = [json_names[train_idx] for train_idx in train_ids]
|
||||||
val_json_names = [json_names[val_idx] for val_idx in val_ids]
|
val_json_names = [json_names[val_idx] for val_idx in val_ids]
|
||||||
test_json_names = [json_names[test_idx] for test_idx in test_ids]
|
test_json_names = [json_names[test_idx] for test_idx in test_ids]
|
||||||
|
@ -240,49 +216,41 @@ class Labelme2YOLO:
|
||||||
|
|
||||||
def convert(self, val_size, test_size):
|
def convert(self, val_size, test_size):
|
||||||
"""Convert labelme format to yolo format"""
|
"""Convert labelme format to yolo format"""
|
||||||
json_names = [file_name for file_name in os.listdir(self._json_dir)
|
json_names = glob.glob(
|
||||||
if os.path.isfile(os.path.join(self._json_dir, file_name)) and
|
os.path.join(self._json_dir, "**", "*.json"), recursive=True
|
||||||
file_name.endswith('.json')]
|
)
|
||||||
folders = [file_name for file_name in os.listdir(self._json_dir)
|
json_names = sorted(json_names)
|
||||||
if os.path.isdir(os.path.join(self._json_dir, file_name))]
|
|
||||||
train_json_names, val_json_names, test_json_names = self._train_test_split(
|
train_json_names, val_json_names, test_json_names = self._train_test_split(
|
||||||
folders, json_names, val_size, test_size)
|
json_names, val_size, test_size
|
||||||
|
)
|
||||||
|
|
||||||
self._make_train_val_dir()
|
self._make_train_val_dir()
|
||||||
|
|
||||||
# convert labelme object to yolo format object, and save them to files
|
# convert labelme object to yolo format object, and save them to files
|
||||||
# also get image from labelme json file and save them under images folder
|
# also get image from labelme json file and save them under images folder
|
||||||
dirs = ('train/', 'val/', 'test/')
|
dirs = ("train/", "val/", "test/")
|
||||||
names = (train_json_names, val_json_names, test_json_names)
|
names = (train_json_names, val_json_names, test_json_names)
|
||||||
for target_dir, json_names in zip(dirs, names):
|
for target_dir, json_names in zip(dirs, names):
|
||||||
|
target_part = target_dir.replace("/", "")
|
||||||
|
logger.info("Converting %s set ...", target_part)
|
||||||
|
for json_name in tqdm.tqdm(json_names):
|
||||||
|
self.covert_json_to_text(target_dir, json_name)
|
||||||
|
|
||||||
with Pool(NUM_THREADS) as pool:
|
|
||||||
for json_name in json_names:
|
|
||||||
pool.apply_async(self.covert_json_to_text,
|
|
||||||
args=(target_dir, json_name))
|
|
||||||
pool.close()
|
|
||||||
pool.join()
|
|
||||||
|
|
||||||
print('Generating dataset.yaml file ...')
|
|
||||||
self._save_dataset_yaml()
|
self._save_dataset_yaml()
|
||||||
|
|
||||||
def covert_json_to_text(self, target_dir, json_name):
|
def covert_json_to_text(self, target_dir, json_name):
|
||||||
"""Convert json file to yolo format text file and save them to files"""
|
"""Convert json file to yolo format text file and save them to files"""
|
||||||
json_path = os.path.join(self._json_dir, json_name)
|
with open(json_name, encoding="utf-8") as file:
|
||||||
with open(json_path, encoding="utf-8") as file:
|
|
||||||
json_data = json.load(file)
|
json_data = json.load(file)
|
||||||
|
|
||||||
print(f"Converting {json_name} for {target_dir.replace('/', '')} ...")
|
filename: str = uuid.UUID(int=random.Random().getrandbits(128)).hex
|
||||||
|
image_name = f"{filename}.png"
|
||||||
img_path = save_yolo_image(json_data,
|
label_name = f"{filename}.txt"
|
||||||
json_path,
|
img_path = save_yolo_image(
|
||||||
self._image_dir_path,
|
json_data, self._json_dir, self._image_dir_path, target_dir, image_name
|
||||||
target_dir)
|
)
|
||||||
yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
|
yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
|
||||||
save_yolo_label(json_name,
|
save_yolo_label(yolo_obj_list, self._label_dir_path, target_dir, label_name)
|
||||||
self._label_dir_path,
|
|
||||||
target_dir,
|
|
||||||
yolo_obj_list)
|
|
||||||
|
|
||||||
def convert_one(self, json_name):
|
def convert_one(self, json_name):
|
||||||
"""Convert one json file to yolo format text file and save them to files"""
|
"""Convert one json file to yolo format text file and save them to files"""
|
||||||
|
@ -290,14 +258,14 @@ class Labelme2YOLO:
|
||||||
with open(json_path, encoding="utf-8") as file:
|
with open(json_path, encoding="utf-8") as file:
|
||||||
json_data = json.load(file)
|
json_data = json.load(file)
|
||||||
|
|
||||||
print(f'Converting {json_name} ...')
|
image_name = json_name.replace(".json", ".png")
|
||||||
|
label_name = json_name.replace(".json", ".txt")
|
||||||
img_path = save_yolo_image(json_data, json_name,
|
img_path = save_yolo_image(
|
||||||
self._json_dir, '')
|
json_data, self._json_dir, self._image_dir_path, "", image_name
|
||||||
|
)
|
||||||
|
|
||||||
yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
|
yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
|
||||||
save_yolo_label(json_name, self._json_dir,
|
save_yolo_label(yolo_obj_list, self._label_dir_path, "", label_name)
|
||||||
'', yolo_obj_list)
|
|
||||||
|
|
||||||
def _get_yolo_object_list(self, json_data, img_path):
|
def _get_yolo_object_list(self, json_data, img_path):
|
||||||
yolo_obj_list = []
|
yolo_obj_list = []
|
||||||
|
@ -306,22 +274,23 @@ class Labelme2YOLO:
|
||||||
for shape in json_data["shapes"]:
|
for shape in json_data["shapes"]:
|
||||||
# labelme circle shape is different from others
|
# labelme circle shape is different from others
|
||||||
# it only has 2 points, 1st is circle center, 2nd is drag end point
|
# it only has 2 points, 1st is circle center, 2nd is drag end point
|
||||||
if shape['shape_type'] == 'circle':
|
if shape["shape_type"] == "circle":
|
||||||
yolo_obj = self._get_circle_shape_yolo_object(
|
yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w)
|
||||||
shape, img_h, img_w)
|
|
||||||
else:
|
else:
|
||||||
yolo_obj = self._get_other_shape_yolo_object(
|
yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w)
|
||||||
shape, img_h, img_w)
|
|
||||||
|
|
||||||
yolo_obj_list.append(yolo_obj)
|
if yolo_obj:
|
||||||
|
yolo_obj_list.append(yolo_obj)
|
||||||
|
|
||||||
return yolo_obj_list
|
return yolo_obj_list
|
||||||
|
|
||||||
def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
|
def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
|
||||||
obj_center_x, obj_center_y = shape['points'][0]
|
obj_center_x, obj_center_y = shape["points"][0]
|
||||||
|
|
||||||
radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 +
|
radius = math.sqrt(
|
||||||
(obj_center_y - shape['points'][1][1]) ** 2)
|
(obj_center_x - shape["points"][1][0]) ** 2
|
||||||
|
+ (obj_center_y - shape["points"][1][1]) ** 2
|
||||||
|
)
|
||||||
obj_w = 2 * radius
|
obj_w = 2 * radius
|
||||||
obj_h = 2 * radius
|
obj_h = 2 * radius
|
||||||
|
|
||||||
|
@ -330,46 +299,55 @@ class Labelme2YOLO:
|
||||||
yolo_w = round(float(obj_w / img_w), 6)
|
yolo_w = round(float(obj_w / img_w), 6)
|
||||||
yolo_h = round(float(obj_h / img_h), 6)
|
yolo_h = round(float(obj_h / img_h), 6)
|
||||||
|
|
||||||
if shape['label'] in self._label_id_map:
|
if shape["label"]:
|
||||||
label_id = self._label_id_map[shape['label']]
|
label = shape["label"]
|
||||||
|
if label not in self._label_list:
|
||||||
|
self._update_id_map(label)
|
||||||
|
label_id = self._label_id_map[shape["label"]]
|
||||||
|
|
||||||
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
|
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
|
||||||
|
|
||||||
raise f"label {shape['label']} not in {self._label_list}"
|
return None
|
||||||
|
|
||||||
def _get_other_shape_yolo_object(self, shape, img_h, img_w):
|
def _get_other_shape_yolo_object(self, shape, img_h, img_w):
|
||||||
|
point_list = shape["points"]
|
||||||
point_list = shape['points']
|
|
||||||
points = np.zeros(2 * len(point_list))
|
points = np.zeros(2 * len(point_list))
|
||||||
points[::2] = [float(point[0]) / img_w for point in point_list]
|
points[::2] = [float(point[0]) / img_w for point in point_list]
|
||||||
points[1::2] = [float(point[1]) / img_h for point in point_list]
|
points[1::2] = [float(point[1]) / img_h for point in point_list]
|
||||||
|
|
||||||
if len(points) == 4:
|
if len(points) == 4:
|
||||||
if self._output_format == "polygon":
|
if self._output_format == "polygon":
|
||||||
points = extend_point_list(points)
|
points = extend_point_list(points)
|
||||||
if self._output_format == "bbox":
|
if self._output_format == "bbox":
|
||||||
points = extend_point_list(points, "bbox")
|
points = extend_point_list(points, "bbox")
|
||||||
|
|
||||||
if shape['label'] in self._label_id_map:
|
if shape["label"]:
|
||||||
label_id = self._label_id_map[shape['label']]
|
label = shape["label"]
|
||||||
|
if label not in self._label_list:
|
||||||
|
self._update_id_map(label)
|
||||||
|
label_id = self._label_id_map[shape["label"]]
|
||||||
|
|
||||||
return label_id, points.tolist()
|
return label_id, points.tolist()
|
||||||
|
|
||||||
raise f"label {shape['label']} not in {self._label_list}"
|
return None
|
||||||
|
|
||||||
def _save_dataset_yaml(self):
|
def _save_dataset_yaml(self):
|
||||||
yaml_path = os.path.join(
|
yaml_path = os.path.join(self._json_dir, "YOLODataset/", "dataset.yaml")
|
||||||
self._json_dir, 'YOLODataset/', 'dataset.yaml')
|
|
||||||
|
|
||||||
with open(yaml_path, 'w+', encoding="utf-8") as yaml_file:
|
with open(yaml_path, "w+", encoding="utf-8") as yaml_file:
|
||||||
train_dir = os.path.join(self._image_dir_path, 'train/')
|
train_dir = os.path.join(self._image_dir_path, "train/")
|
||||||
val_dir = os.path.join(self._image_dir_path, 'val/')
|
val_dir = os.path.join(self._image_dir_path, "val/")
|
||||||
test_dir = os.path.join(self._image_dir_path, 'test/')
|
test_dir = os.path.join(self._image_dir_path, "test/")
|
||||||
|
|
||||||
names_str = ''
|
names_str = ""
|
||||||
for label, _ in self._label_id_map.items():
|
for label, _ in self._label_id_map.items():
|
||||||
names_str += f"\"{label}\", "
|
names_str += f'"{label}", '
|
||||||
names_str = names_str.rstrip(", ")
|
names_str = names_str.rstrip(", ")
|
||||||
|
|
||||||
content = (f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n"
|
content = (
|
||||||
f"nc: {len(self._label_id_map)}\n"
|
f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n"
|
||||||
f"names: [{names_str}]")
|
f"nc: {len(self._label_id_map)}\n"
|
||||||
|
f"names: [{names_str}]"
|
||||||
|
)
|
||||||
|
|
||||||
yaml_file.write(content)
|
yaml_file.write(content)
|
||||||
|
|
Loading…
Reference in New Issue