remove scikit-learn dependence

This commit is contained in:
Wang Xin 2023-10-04 16:28:20 +08:00
parent 10dacce294
commit 9f2f443dab
5 changed files with 60 additions and 52 deletions

View File

@ -6,7 +6,7 @@ build-backend = "hatchling.build"
name = "labelme2yolo" name = "labelme2yolo"
description = "This script converts the JSON format output by LabelMe to the text format required by YOLO serirs." description = "This script converts the JSON format output by LabelMe to the text format required by YOLO serirs."
readme = "README.md" readme = "README.md"
requires-python = ">=3.7" requires-python = ">=3.8"
license = "MIT" license = "MIT"
keywords = [] keywords = []
authors = [ authors = [
@ -15,7 +15,6 @@ authors = [
classifiers = [ classifiers = [
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
"Programming Language :: Python", "Programming Language :: Python",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
@ -25,7 +24,6 @@ classifiers = [
dependencies = [ dependencies = [
"opencv-python>=4.1.2", "opencv-python>=4.1.2",
"Pillow>=9.2,<10.1", "Pillow>=9.2,<10.1",
"scikit-learn>=1.1.1,<1.4.0",
"numpy>=1.23.1,<1.27.0" "numpy>=1.23.1,<1.27.0"
] ]
dynamic = ["version"] dynamic = ["version"]
@ -54,7 +52,7 @@ cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=labelm
no-cov = "cov --no-cov" no-cov = "cov --no-cov"
[[tool.hatch.envs.test.matrix]] [[tool.hatch.envs.test.matrix]]
python = ["37", "38", "39", "310"] python = ["38", "39", "310"]
[tool.coverage.run] [tool.coverage.run]
branch = true branch = true

View File

@ -1,4 +1,3 @@
opencv-python opencv-python
Pillow Pillow
scikit-learn
numpy numpy

View File

@ -4,4 +4,4 @@
''' '''
about version about version
''' '''
__version__ = '0.1.2' __version__ = '0.1.3'

View File

@ -21,14 +21,14 @@ def run():
"--val_size", "--val_size",
type=float, type=float,
nargs="?", nargs="?",
default=None, default=0.2,
help="Please input the validation dataset size, for example 0.1.", help="Please input the validation dataset size, for example 0.1.",
) )
parser.add_argument( parser.add_argument(
"--test_size", "--test_size",
type=float, type=float,
nargs="?", nargs="?",
default=None, default=0.0,
help="Please input the test dataset size, for example 0.1.", help="Please input the test dataset size, for example 0.1.",
) )
parser.add_argument( parser.add_argument(

View File

@ -9,25 +9,38 @@ import io
import json import json
import math import math
import os import os
import random
import shutil import shutil
from collections import OrderedDict from collections import OrderedDict
from multiprocessing import Pool from multiprocessing import Pool
import cv2
import numpy as np
import PIL.ExifTags import PIL.ExifTags
import PIL.Image import PIL.Image
import PIL.ImageOps import PIL.ImageOps
from sklearn.model_selection import train_test_split import cv2
import numpy as np
random.seed(12345678)
np.random.seed(12345678)
# number of LabelMe2YOLO multiprocessing threads # number of LabelMe2YOLO multiprocessing threads
NUM_THREADS = max(1, os.cpu_count() - 1) NUM_THREADS = max(1, os.cpu_count() - 1)
def train_test_split(dataset_index, test_size=0.2):
test_size = min(max(0.0, test_size), 1.0)
total_size = len(dataset_index)
train_size = int(math.ceil(total_size * (1.0 - test_size)))
random.shuffle(dataset_index)
train_index = dataset_index[:train_size]
test_index = dataset_index[train_size:]
return train_index, test_index
# copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py # copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py
def img_data_to_pil(img_data): def img_data_to_pil(img_data):
'''Convert img_data(byte) to PIL.Image''' """Convert img_data(byte) to PIL.Image"""
file = io.BytesIO() file = io.BytesIO()
file.write(img_data) file.write(img_data)
img_pil = PIL.Image.open(file) img_pil = PIL.Image.open(file)
@ -36,7 +49,7 @@ def img_data_to_pil(img_data):
# copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py # copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py
def img_data_to_arr(img_data): def img_data_to_arr(img_data):
'''Convert img_data(byte) to numpy.ndarray''' """Convert img_data(byte) to numpy.ndarray"""
img_pil = img_data_to_pil(img_data) img_pil = img_data_to_pil(img_data)
img_arr = np.array(img_pil) img_arr = np.array(img_pil)
return img_arr return img_arr
@ -44,7 +57,7 @@ def img_data_to_arr(img_data):
# copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py # copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py
def img_b64_to_arr(img_b64): def img_b64_to_arr(img_b64):
'''Convert img_b64(str) to numpy.ndarray''' """Convert img_b64(str) to numpy.ndarray"""
img_data = base64.b64decode(img_b64) img_data = base64.b64decode(img_b64)
img_arr = img_data_to_arr(img_data) img_arr = img_data_to_arr(img_data)
return img_arr return img_arr
@ -52,7 +65,7 @@ def img_b64_to_arr(img_b64):
# copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py # copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py
def img_pil_to_data(img_pil): def img_pil_to_data(img_pil):
'''Convert PIL.Image to img_data(byte)''' """Convert PIL.Image to img_data(byte)"""
file = io.BytesIO() file = io.BytesIO()
img_pil.save(file, format="PNG") img_pil.save(file, format="PNG")
img_data = file.getvalue() img_data = file.getvalue()
@ -61,7 +74,7 @@ def img_pil_to_data(img_pil):
# copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py # copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py
def img_arr_to_b64(img_arr): def img_arr_to_b64(img_arr):
'''Convert numpy.ndarray to img_b64(str)''' """Convert numpy.ndarray to img_b64(str)"""
img_pil = PIL.Image.fromarray(img_arr) img_pil = PIL.Image.fromarray(img_arr)
file = io.BytesIO() file = io.BytesIO()
img_pil.save(file, format="PNG") img_pil.save(file, format="PNG")
@ -72,7 +85,7 @@ def img_arr_to_b64(img_arr):
# copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py # copy form https://github.com/wkentaro/labelme/blob/main/labelme/utils/image.py
def img_data_to_png_data(img_data): def img_data_to_png_data(img_data):
'''Convert img_data(byte) to png_data(byte)''' """Convert img_data(byte) to png_data(byte)"""
with io.BytesIO() as f_out: with io.BytesIO() as f_out:
f_out.write(img_data) f_out.write(img_data)
img = PIL.Image.open(f_out) img = PIL.Image.open(f_out)
@ -84,7 +97,7 @@ def img_data_to_png_data(img_data):
def get_label_id_map(json_dir: str): def get_label_id_map(json_dir: str):
'''Get label id map from json files in json_dir''' """Get label id map from json files in json_dir"""
label_set = set() label_set = set()
for file_name in os.listdir(json_dir): for file_name in os.listdir(json_dir):
@ -99,26 +112,26 @@ def get_label_id_map(json_dir: str):
def extend_point_list(point_list, out_format="polygon"): def extend_point_list(point_list, out_format="polygon"):
'''Extend point list to polygon or bbox''' """Extend point list to polygon or bbox"""
xmin = min(float(point) for point in point_list[::2]) x_min = min(float(point) for point in point_list[::2])
xmax = max(float(point) for point in point_list[::2]) x_max = max(float(point) for point in point_list[::2])
ymin = min(float(point) for point in point_list[1::2]) y_min = min(float(point) for point in point_list[1::2])
ymax = max(float(point) for point in point_list[1::2]) y_max = max(float(point) for point in point_list[1::2])
if out_format == "bbox": if out_format == "bbox":
x_i = xmin x_i = x_min
y_i = ymin y_i = y_min
w_i = xmax - xmin w_i = x_max - x_min
h_i = ymax - ymin h_i = y_max - y_min
x_i = x_i + w_i / 2 x_i = x_i + w_i / 2
y_i = y_i + h_i / 2 y_i = y_i + h_i / 2
return np.array([x_i, y_i, w_i, h_i]) return np.array([x_i, y_i, w_i, h_i])
return np.array([xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]) return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max])
def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list): def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list):
'''Save yolo label to txt file''' """Save yolo label to txt file"""
txt_path = os.path.join(label_dir_path, txt_path = os.path.join(label_dir_path,
target_dir, target_dir,
json_name.replace(".json", ".txt")) json_name.replace(".json", ".txt"))
@ -132,7 +145,7 @@ def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list):
def save_yolo_image(json_data, json_path, image_dir_path, target_dir): def save_yolo_image(json_data, json_path, image_dir_path, target_dir):
'''Save yolo image to image_dir_path/target_dir''' """Save yolo image to image_dir_path/target_dir"""
json_name = os.path.basename(json_path) json_name = os.path.basename(json_path)
img_name = json_name.replace(".json", ".png") img_name = json_name.replace(".json", ".png")
@ -153,7 +166,7 @@ def save_yolo_image(json_data, json_path, image_dir_path, target_dir):
class Labelme2YOLO: class Labelme2YOLO:
'''Labelme to YOLO format converter''' """Labelme to YOLO format converter"""
def __init__(self, json_dir, output_format, label_list): def __init__(self, json_dir, output_format, label_list):
self._json_dir = json_dir self._json_dir = json_dir
@ -187,7 +200,7 @@ class Labelme2YOLO:
os.makedirs(yolo_path) os.makedirs(yolo_path)
def _get_dataset_part_json_names(self, dataset_part: str): def _get_dataset_part_json_names(self, dataset_part: str):
'''Get json names in dataset_part folder''' """Get json names in dataset_part folder"""
set_folder = os.path.join(self._json_dir, dataset_part) set_folder = os.path.join(self._json_dir, dataset_part)
json_names = [] json_names = []
for sample_name in os.listdir(set_folder): for sample_name in os.listdir(set_folder):
@ -197,34 +210,35 @@ class Labelme2YOLO:
return json_names return json_names
def _train_test_split(self, folders, json_names, val_size, test_size): def _train_test_split(self, folders, json_names, val_size, test_size):
'''Split json names to train, val, test''' """Split json names to train, val, test"""
if (len(folders) > 0 and if (len(folders) > 0 and
'train' in folders and 'train' in folders and
'val' in folders and 'val' in folders and
'test' in folders): 'test' in folders):
train_json_names = self._get_dataset_part_json_names('train') train_json_names = self._get_dataset_part_json_names('train')
val_json_names = self._get_dataset_part_json_names('val') val_json_names = self._get_dataset_part_json_names('val')
test_json_names = self._get_dataset_part_json_names('test') test_json_names = self._get_dataset_part_json_names('test')
return train_json_names, val_json_names, test_json_names return train_json_names, val_json_names, test_json_names
train_idxs, val_idxs = train_test_split(range(len(json_names)), total_size = len(json_names)
test_size=val_size) dataset_index = list(range(total_size))
test_idxs = [] train_ids, val_ids = train_test_split(dataset_index,
test_size=val_size)
test_ids = []
if test_size is None: if test_size is None:
test_size = 0.0 test_size = 0.0
if test_size > 1e-8: if test_size > 1e-8:
train_idxs, test_idxs = train_test_split( train_ids, test_ids = train_test_split(
train_idxs, test_size=test_size / (1 - val_size)) train_ids, test_size=test_size / (1 - val_size))
train_json_names = [json_names[train_idx] for train_idx in train_idxs] train_json_names = [json_names[train_idx] for train_idx in train_ids]
val_json_names = [json_names[val_idx] for val_idx in val_idxs] val_json_names = [json_names[val_idx] for val_idx in val_ids]
test_json_names = [json_names[test_idx] for test_idx in test_idxs] test_json_names = [json_names[test_idx] for test_idx in test_ids]
return train_json_names, val_json_names, test_json_names return train_json_names, val_json_names, test_json_names
def convert(self, val_size, test_size): def convert(self, val_size, test_size):
'''Convert labelme format to yolo format''' """Convert labelme format to yolo format"""
json_names = [file_name for file_name in os.listdir(self._json_dir) json_names = [file_name for file_name in os.listdir(self._json_dir)
if os.path.isfile(os.path.join(self._json_dir, file_name)) and if os.path.isfile(os.path.join(self._json_dir, file_name)) and
file_name.endswith('.json')] file_name.endswith('.json')]
@ -317,10 +331,9 @@ class Labelme2YOLO:
if shape['label'] in self._label_id_map: if shape['label'] in self._label_id_map:
label_id = self._label_id_map[shape['label']] label_id = self._label_id_map[shape['label']]
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
else: else:
print(f"label {shape['label']} not in {self._label_list}") raise f"label {shape['label']} not in {self._label_list}"
return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
def _get_other_shape_yolo_object(self, shape, img_h, img_w): def _get_other_shape_yolo_object(self, shape, img_h, img_w):
@ -336,17 +349,15 @@ class Labelme2YOLO:
if shape['label'] in self._label_id_map: if shape['label'] in self._label_id_map:
label_id = self._label_id_map[shape['label']] label_id = self._label_id_map[shape['label']]
return label_id, points.tolist()
else: else:
print(f"label {shape['label']} not in {self._label_list}") raise f"label {shape['label']} not in {self._label_list}"
return label_id, points.tolist()
def _save_dataset_yaml(self): def _save_dataset_yaml(self):
yaml_path = os.path.join( yaml_path = os.path.join(
self._json_dir, 'YOLODataset/', 'dataset.yaml') self._json_dir, 'YOLODataset/', 'dataset.yaml')
with open(yaml_path, 'w+', encoding="utf-8") as yaml_file: with open(yaml_path, 'w+', encoding="utf-8") as yaml_file:
train_dir = os.path.join(self._image_dir_path, 'train/') train_dir = os.path.join(self._image_dir_path, 'train/')
val_dir = os.path.join(self._image_dir_path, 'val/') val_dir = os.path.join(self._image_dir_path, 'val/')
test_dir = os.path.join(self._image_dir_path, 'test/') test_dir = os.path.join(self._image_dir_path, 'test/')