refactor: 精简代码,标注以文件为单位存储

This commit is contained in:
maxmon 2023-04-12 00:42:03 +08:00
parent 7fc844b27c
commit 366cff7d19
5 changed files with 35 additions and 168 deletions

View File

@ -1,4 +1,4 @@
from app.config.setting import PROJECT_NAME, FILE_NAME, PROJECTS, ANNO_OUTPUT_PATH
from app.config.setting import PROJECT_NAME, FILE_NAME, PROJECT_PATH, ANNO_OUTPUT_PATH
from app.entities.entities import ReturnInfo, AnnoContents, OutputAnno, QueryAnno
from app.libs.redprint import RedPrint
import time
@ -13,49 +13,14 @@ api = RedPrint('anno')
waiting_list_dic = {}
def create_anno_2_file(project_name, anno_cont, all_anno=None):
anno_output_path = ANNO_OUTPUT_PATH.format(project_name)
# 判断路径是否存在
if not os.path.exists(anno_output_path):
write_json(anno_output_path, [anno_cont])
else:
output_anno = OutputAnno()
output_anno.all_anno = read_json_file(anno_output_path)
output_anno.add_anno(anno_cont)
write_json(anno_output_path, output_anno.all_anno)
waiting_list_dic[project_name] = waiting_list_dic[project_name][1:]
if len(waiting_list_dic[project_name]) > 0:
create_anno_2_file(project_name, waiting_list_dic[project_name][0], all_anno)
return True
return True
@api.route('/create', methods=['POST'])
def create_anno():
# time.sleep(0.02)
ret_info = ReturnInfo()
try:
param = request.get_json()
project_name = param.get(PROJECT_NAME)
file_name = param.get(FILE_NAME)
anno_details = param.get('annoDetails')
anno_cont = AnnoContents()
anno_cont.fileName = file_name
anno_cont.annoDetails = anno_details
anno_cont.isAnno = True
if project_name not in waiting_list_dic:
waiting_list_dic[project_name] = []
waiting_list_dic[project_name].append(anno_cont)
if (len(waiting_list_dic[project_name]) == 1): # 自己位于第一个才处理,其他在第一个结束时处理
create_anno_2_file(project_name, waiting_list_dic[project_name][0])
j = request.get_json()
anno_output_path = ANNO_OUTPUT_PATH.format(j["projectName"], j["fileName"])
with open(anno_output_path, 'w', encoding='utf-8') as fh:
fh.write(json.dumps(request.get_json(), ensure_ascii=False))
except Exception as e:
print(e)
@ -72,16 +37,17 @@ def query_anno():
project_name = request.args.get("projectName").strip()
file_name = request.args.get("fileName").strip()
file_path = PROJECTS + '/' + project_name + '/' + file_name
file_path = PROJECT_PATH.format(project_name) + '/' + file_name
file_content = read_file(file_path)
anno_output_path = ANNO_OUTPUT_PATH.format(project_name)
output_anno = OutputAnno()
output_anno.all_anno = read_json_file(anno_output_path)
anno_output_path = ANNO_OUTPUT_PATH.format(project_name, file_name)
anno_details = []
if os.path.exists(anno_output_path):
anno_details = json.loads(read_file(anno_output_path))['annoDetails']
query_anno = QueryAnno()
query_anno.fileContent = file_content
query_anno.annoDetails = output_anno.get_anno(file_name)
query_anno.annoDetails = anno_details
ret_info.info = query_anno
ret_info.errCode = 0

View File

@ -23,35 +23,21 @@ def query_file():
page_number = int(request.args.get("pageNumber"))
page_size = int(request.args.get("pageSize"))
file_names = os.listdir(PROJECT_PATH.format(project_name))
project_path = PROJECT_PATH.format(project_name)
if not os.path.exists(project_path):
os.makedirs(project_path)
file_names = os.listdir(project_path)
if 'config.json' in file_names:
file_names.remove('config.json')
if 'anno.json' in file_names:
file_names.remove('anno.json')
anno_output_path = ANNO_OUTPUT_PATH.format(project_name)
if not os.path.exists(anno_output_path):
open(anno_output_path, 'w', encoding='utf-8').write('[]').close()
# 判断路径是否存在
if not os.path.exists(anno_output_path):
for file_name in file_names[(page_number - 1) * page_size: (page_number - 1) * page_size + page_size]:
file_info = FileInfo()
file_info.fileName = file_name
file_info.isAnno = False
ret_info.info.append(file_info)
print(2)
else:
for file_name in sorted(file_names)[
(page_number - 1) * page_size: (page_number - 1) * page_size + page_size]:
file_info = FileInfo()
file_info.fileName = file_name
for anno_info in read_json_file(anno_output_path):
if anno_info.get('fileName') == file_name:
file_info.isAnno = anno_info.get('isAnno')
ret_info.info.append(file_info.__dict__)
for file_name in file_names[(page_number - 1) * page_size: (page_number - 1) * page_size + page_size]:
anno_output_path = ANNO_OUTPUT_PATH.format(project_name, file_name)
file_info = FileInfo()
file_info.fileName = file_name
file_info.isAnno = os.path.exists(anno_output_path)
ret_info.info.append(file_info)
ret_info.errCode = 0
@ -99,86 +85,19 @@ def get_lables():
return json.dumps(download_json, default=lambda o: o.__dict__)
@api.route('/get_anno_json', methods=['GET'])
@api.route('/get_json', methods=['GET'])
def get_anno_json():
project_name = request.args.get("projectName")
anno_output_dir = ANNO_OUTPUT_PATH.format(project_name, '').replace('.json', '')
fns = os.listdir(anno_output_dir)
js = []
for fn in fns:
if not fn.endswith('.json'):
continue
js.append(read_txt_file(anno_output_dir + '/' + fn))
anno_json_path = DOWNLOAD_FILE_LOCATION.format(project_name).replace('result.json', 'anno.json')
print(anno_json_path)
open('app/'+anno_json_path, 'w', encoding='utf-8').write('\n'.join(js))
response = make_response(send_from_directory(directory='', path=anno_json_path, as_attachment=True))
response.headers["Content-disposition"] = 'attachment; filename=result.json'
return response
@api.route('/get_json', methods=['GET'])
def get_json():
ret_info = ReturnInfo()
try:
project_name = request.args.get("projectName")
project_path = PROJECT_PATH.format(project_name)
project_file_list = get_project_file(project_path)
anno_data_set = set()
download_json = []
anno_file_path = PROJECT_PATH.format(project_name) + '/anno.json'
if not os.path.exists(anno_file_path):
open(anno_file_path, 'w', encoding='utf-8').write('[]').close()
anno_data = read_json_file(anno_file_path)
for item in anno_data:
item_dict = {}
item_dict['file'] = item['fileName']
item_dict['text'] = read_txt_file(PROJECT_PATH.format(project_name) + '/' + item['fileName'])
# 这一步来去掉anno.json中的isSmall
for entity in item['annoDetails']:
if 'isSmall' in entity:
entity.pop('isSmall')
# 也可以用这种方式来实现
# item['annoDetails'].pop('isSmall','0')
item_dict['entity'] = item['annoDetails']
anno_data_set.add(item['fileName'])
download_json.append(item_dict)
print(anno_data_set)
# add no label data
for filename in set(project_file_list).difference(anno_data_set):
item_dict = {'file': filename,
'txt': read_txt_file(PROJECT_PATH.format(project_name) + '/' + filename),
'entity': []
}
download_json.append(item_dict)
write_json(PROJECT_PATH.format(project_name) + '/result.json', download_json)
except Exception as e:
ret_info.errCode = 404
ret_info.errMsg = str(e)
# 返回数据
# 1.返回json格式的数据
# ret_info.info = download_json
# ret_info.errCode = 0
# return json.dumps(ret_info, default=lambda o: o.__dict__)
# 使用send_from_directory 或者使用send_file时要特别注意文件的路径路径不对的话会报404
# 本线默认目录时app下所以不需要再加/app了所以不能用PROJECT_PATH
# 2.创建response对象返回数据
# 使用response可以将result.json再删除掉
response = make_response(send_from_directory(directory='', path=DOWNLOAD_FILE_LOCATION.format(project_name),
as_attachment=True))
response.headers["Content-disposition"] = 'attachment; filename=result.json'
# print(PROJECT_PATH.format(project_name)+'/result.json')
# os.remove(PROJECT_PATH.format(project_name)+'/result.json')
def delete():
time.sleep(3)
os.remove(PROJECT_PATH.format(project_name) + '/result.json')
print('result.json has been deleted')
thread = threading.Thread(target=delete)
thread.start()
return response
# 3. 直接使用send from directory 返回json文件
# return send_from_directory('', filename=DOWNLOAD_FILE_LOCATION.format(project_name),as_attachment=True)
# 4. 使用send file 返回json文件
# return send_file(DOWNLOAD_FILE_LOCATION.format(project_name), as_attachment=True, attachment_filename=project_name+'_result.json')

View File

@ -79,9 +79,6 @@ def create_project():
make_dir(PROJECT_PATH.format(project_name))
write_json(PROJECT_CONFIG_PATH.format(project_name), param)
anno_path = ANNO_OUTPUT_PATH.format(project_name)
if not os.path.exists(anno_path):
write_json(anno_path, [])
ret_info.errCode = 0
@ -104,8 +101,6 @@ def update_entity_types():
for entity_type in entity_types:
new_entity_types.append(entity_type.get('type'))
anno_output_path = ANNO_OUTPUT_PATH.format(project_name)
if os.path.exists(PROJECT_CONFIG_PATH.format(project_name)): # False
project_config_info = read_json_file(PROJECT_CONFIG_PATH.format(project_name))
project = Project()
@ -114,16 +109,6 @@ def update_entity_types():
project.entityTypes = entity_types
print(project.entityTypes)
if os.path.exists(anno_output_path):
anno_details = read_json_file(anno_output_path)
for anno_info in anno_details:
anno_detail = json.loads(anno_info.get('annoDetails'))
for ind, anno in enumerate(anno_detail):
entity_type = anno.get('type')
if entity_type not in new_entity_types:
anno_detail.pop(ind)
write_json(anno_output_path, anno_details)
write_json(PROJECT_CONFIG_PATH.format(project_name), project)
ret_info.errMsg = 'update ok'

View File

@ -13,11 +13,11 @@ FILE_NAME = 'fileName'
PROJECTS = 'app/projects'
# 项目路径
PROJECT_PATH = PROJECTS + '/' + "{}"
PROJECT_PATH = PROJECTS + '/{}/data/'
PROJECT_CONFIG_PATH = PROJECTS + '/' + "{}" + '/config.json'
#下载标注结果所在的位置
DOWNLOAD_FILE_LOCATION = 'projects/{}/result.json'
# 标注信息存储路径
ANNO_OUTPUT_PATH = PROJECTS + '/' + '{}' + '/' + 'anno.json'
ANNO_OUTPUT_PATH = PROJECTS + '/{}/anno/{}.json'

View File

@ -22,13 +22,10 @@ def make_dir(path):
if not folder:
os.mkdir(path)
json_cache_dic = {}
# 将json数据写入.json文件
def write_json(json_file_path, data):
json_cache_dic[json_file_path] = data
with open(json_file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, default=lambda o: o.__dict__)
with open(json_file_path, 'w', encoding='utf-8') as fh:
fh.write(json.dumps(data, ensure_ascii=False))
# 加载json文件中的json数据
@ -42,7 +39,7 @@ def read_file(file_path):
ext = file_path.split('.')[-1]
if ext in ['jpg', 'png']:
return read_img_file(file_path)
elif ext in ['txt']:
elif ext in ['txt', 'json']:
return read_txt_file(file_path)
return ''