From 13091c886fa26b2eaba15c7971701e183c87da77 Mon Sep 17 00:00:00 2001 From: xgdyp Date: Fri, 2 Jul 2021 00:19:42 +0800 Subject: [PATCH] =?UTF-8?q?7.1=E6=89=B9=E9=87=8F=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86=E3=80=81=E5=AF=BC=E5=87=BA=E7=BB=93?= =?UTF-8?q?=E6=9E=9C=E7=9A=84=E5=90=8E=E7=AB=AF=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/.gitignore | 8 +++ .idea/deployment.xml | 21 +++++++ .idea/inspectionProfiles/Project_Default.xml | 20 +++++++ .../inspectionProfiles/profiles_settings.xml | 6 ++ .idea/misc.xml | 4 ++ .idea/modules.xml | 8 +++ .idea/vcs.xml | 6 ++ .idea/whale-anno.iml | 15 +++++ be/app/api/v1/anno.py | 7 ++- be/app/api/v1/files.py | 56 +++++++++++++++++-- be/app/api/v1/project.py | 38 +++++++++++-- be/app/config/setting.py | 2 + be/app/libs/tools.py | 19 ++++++- be/app/projects/demo5/anno.json | 2 +- be/run.py | 2 +- be/test.py | 31 +++++++--- doc/api文档.md | 7 +++ 17 files changed, 232 insertions(+), 20 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/deployment.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/whale-anno.iml create mode 100644 doc/api文档.md diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..f7be68c --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/deployment.xml b/.idea/deployment.xml new file mode 100644 index 0000000..84d45fc --- /dev/null +++ b/.idea/deployment.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..a8df899 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,20 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..b534b89 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..746c876 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..9661ac7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/whale-anno.iml b/.idea/whale-anno.iml new file mode 100644 index 0000000..fed4048 --- /dev/null +++ b/.idea/whale-anno.iml @@ -0,0 +1,15 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/be/app/api/v1/anno.py b/be/app/api/v1/anno.py index 9749893..76601d9 100644 --- a/be/app/api/v1/anno.py +++ b/be/app/api/v1/anno.py @@ -1,18 +1,22 @@ from app.config.setting import PROJECT_NAME, FILE_NAME, PROJECTS, ANNO_OUTPUT_PATH from app.entities.entities import ReturnInfo, AnnoContents, OutputAnno, QueryAnno from app.libs.redprint import RedPrint - +import time from flask import request import json import os +import zipfile from app.libs.tools import read_txt_file, write_json, read_json_file, make_dir api = RedPrint('anno') + + @api.route('/create', methods=['POST']) def create_anno(): + # time.sleep(0.02) ret_info = ReturnInfo() try: param = request.get_json() @@ -40,6 +44,7 @@ def create_anno(): ret_info.errCode = 0 except Exception as e: + print(e) ret_info.errCode = 404 ret_info.errMsg = str(e) diff --git a/be/app/api/v1/files.py b/be/app/api/v1/files.py index ff1b9b3..b363a95 100644 --- a/be/app/api/v1/files.py +++ b/be/app/api/v1/files.py @@ -1,12 +1,13 @@ -from app.config.setting import PROJECT_PATH, ANNO_OUTPUT_PATH -from app.libs.redprint import RedPrint +from ...config.setting import PROJECT_PATH, ANNO_OUTPUT_PATH,DOWNLOAD_FILE_LOCATION -from flask import request +from ...libs.redprint import RedPrint + +from flask import request, send_file, Response, send_from_directory,make_response import json import os -from app.entities.entities import ReturnInfo, FileInfo -from app.libs.tools import read_json_file +from ...entities.entities import ReturnInfo, FileInfo +from ...libs.tools import read_json_file,read_txt_file,write_json api = RedPrint('files') @@ -56,3 +57,48 @@ def query_file(): ret_info.errCode = 404 ret_info.errMsg = str(e) return json.dumps(ret_info, default=lambda o: o.__dict__) + + +@api.route('/get_json', methods=['GET']) +def get_json(): + ret_info = ReturnInfo() + try: + project_name = request.args.get("projectName").strip() + download_json = [] + anno_data = read_json_file(PROJECT_PATH.format(project_name)+'/anno.json') + for item in anno_data: + item_dict = {} + item_dict['file'] = item['fileName'] + item_dict['text'] = read_txt_file(PROJECT_PATH.format(project_name)+'/'+item['fileName']) + # 这一步来去掉anno.json中的isSmall + for entity in item['annoDetails']: + if 'isSmall' in entity: + entity.pop('isSmall') + # 也可以用这种方式来实现 + # item['annoDetails'].pop('isSmall','0') + item_dict['entity'] = item['annoDetails'] + download_json.append(item_dict) + write_json(PROJECT_PATH.format(project_name)+'/result.json', download_json) + + except Exception as e: + ret_info.errCode = 404 + ret_info.errMsg = str(e) + + # 返回数据 + # 1.返回json格式的数据 + # ret_info.info = download_json + # ret_info.errCode = 0 + # return json.dumps(ret_info, default=lambda o: o.__dict__) + + # 使用send_from_directory 或者使用send_file时要特别注意文件的路径,路径不对的话会报404 + # 本线默认目录时app下,所以不需要再加/app了,所以不能用PROJECT_PATH + # 2.创建response对象返回数据 + # response = make_response(send_from_directory(DOWNLOAD_FILE_LOCATION.format(project_name),filename='result.json', as_attachment=True)) + # response.headers["Content-disposition"] = 'attachment; filename=result.json' + # return response + + # 3. 直接使用send from directory 返回json文件 + # return send_from_directory('', filename=DOWNLOAD_FILE_LOCATION.format(project_name),as_attachment=True) + + # 4. 使用send file 返回json文件 + return send_file(DOWNLOAD_FILE_LOCATION.format(project_name), as_attachment=True, attachment_filename=project_name+'_result.json') diff --git a/be/app/api/v1/project.py b/be/app/api/v1/project.py index 236977c..96b72b1 100644 --- a/be/app/api/v1/project.py +++ b/be/app/api/v1/project.py @@ -1,21 +1,51 @@ -from app.config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \ +from ...config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \ ANNO_OUTPUT_PATH -from app.libs.redprint import RedPrint +from ...libs.redprint import RedPrint from flask import request import json import os -from app.libs.tools import make_dir, write_json, read_json_file -from app.entities.entities import Project, ReturnInfo +from ...libs.tools import make_dir, write_json, read_json_file,unzip_file +from ...entities.entities import Project, ReturnInfo + + +from ...config.setting import FILE_NAME +from ...entities.entities import AnnoContents api = RedPrint('project') +@api.route('/get_zipped_data', methods=['POST']) +def get_zipped_data(): + # time.sleep(0.02) + ret_info = ReturnInfo() + try: + project_name = request.form.get('projectName') + print(project_name) + + upload_file = request.files['file'] + file_path = os.path.join(PROJECT_PATH.format(project_name), upload_file.filename) + target_path = PROJECT_PATH.format(project_name) + # print(file_path) + upload_file.save(file_path) + print(target_path) + unzip_file(file_path,target_path) + os.remove(file_path) + + except Exception as e: + print(e) + ret_info.errCode = 404 + ret_info.errMsg = str(e) + + return json.dumps(ret_info, default=lambda o: o.__dict__) + @api.route('/create', methods=['POST']) def create_project(): ret_info = ReturnInfo() try: + param = request.get_json() + print(param) project_name = param.get(PROJECT_NAME) make_dir(PROJECT_PATH.format(project_name)) diff --git a/be/app/config/setting.py b/be/app/config/setting.py index f2c24c9..1b3ee76 100644 --- a/be/app/config/setting.py +++ b/be/app/config/setting.py @@ -16,5 +16,7 @@ PROJECTS = 'app/projects' PROJECT_PATH = PROJECTS + '/' + "{}" PROJECT_CONFIG_PATH = PROJECTS + '/' + "{}" + '/config.json' +#下载标注结果所在的位置 +DOWNLOAD_FILE_LOCATION = 'projects/{}/result.json' # 标注信息存储路径 ANNO_OUTPUT_PATH = PROJECTS + '/' + '{}' + '/' + 'anno.json' diff --git a/be/app/libs/tools.py b/be/app/libs/tools.py index d000ea7..ce878b0 100644 --- a/be/app/libs/tools.py +++ b/be/app/libs/tools.py @@ -1,6 +1,6 @@ import os import json - +import zipfile def make_dir(path): folder = os.path.exists(path) @@ -27,3 +27,20 @@ def read_txt_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: data = f.read() return data + + +def unzip_file(zip_src, dst_dir): + """ + 解压zip文件 + :param zip_src: zip文件的全路径 + :param dst_dir: 要解压到的目的文件夹 + :return: + """ + r = zipfile.is_zipfile(zip_src) + if r: + fz = zipfile.ZipFile(zip_src, "r") + for file in fz.namelist(): + fz.extract(file, dst_dir) + else: + return "请上传zip类型压缩文件" + diff --git a/be/app/projects/demo5/anno.json b/be/app/projects/demo5/anno.json index 713e8e7..6747564 100644 --- a/be/app/projects/demo5/anno.json +++ b/be/app/projects/demo5/anno.json @@ -1 +1 @@ -[{"fileName": "test1.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test10.txt", "annoDetails": [{"name": "小明", "type": "person1", "start": 0, "end": 2, "isSmall": false}, {"name": "北京工业大学", "type": "organiztion", "start": 5, "end": 11}, {"name": "北京", "type": "location", "start": 5, "end": 7, "isSmall": false}], "isAnno": true}] \ No newline at end of file +[{"fileName": "test10.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test12.txt", "annoDetails": [{"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10}], "isAnno": true}, {"fileName": "test6.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test4.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test2.txt", "annoDetails": [{"name": "小明毕业于北京工业", "type": "person1", "start": 0, "end": 9, "isSmall": false}, {"name": "小明毕业于北京", "type": "person1", "start": 0, "end": 7, "isSmall": true}, {"name": "小明毕业于北", "type": "person1", "start": 0, "end": 6, "isSmall": true}, {"name": "小明毕业于", "type": "person1", "start": 0, "end": 5, "isSmall": true}, {"name": "明毕业于北京工业", "type": "person1", "start": 1, "end": 9, "isSmall": true}, {"name": "明毕业于北京工", "type": "person1", "start": 1, "end": 8, "isSmall": true}, {"name": "明毕业于北京", "type": "person1", "start": 1, "end": 7, "isSmall": true}, {"name": "毕业于北京工业", "type": "person1", "start": 2, "end": 9, "isSmall": true}, {"name": "毕业于北京", "type": "person1", "start": 2, "end": 7, "isSmall": true}, {"name": "毕业于北", "type": "person1", "start": 2, "end": 6, "isSmall": true}, {"name": "毕业", "type": "person1", "start": 2, "end": 4}, {"name": "毕", "type": "person1", "start": 2, "end": 3, "isSmall": true}, {"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10, "isSmall": true}, {"name": "业于北京", "type": "person1", "start": 3, "end": 7, "isSmall": true}, {"name": "于北京工", "type": "person1", "start": 4, "end": 8, "isSmall": true}, {"name": "于北京", "type": "person1", "start": 4, "end": 7, "isSmall": true}, {"name": "北京工", "type": "person1", "start": 5, "end": 8, "isSmall": true}, {"name": "京工", "type": "person1", "start": 6, "end": 8, "isSmall": true}, {"name": "工业大", "type": "person1", "start": 7, "end": 10, "isSmall": true}], "isAnno": true}, {"fileName": "test3.txt", "annoDetails": [{"name": "小明", "type": "person1", "start": 0, "end": 2}, {"name": "明毕业于北京", "type": "organiztion", "start": 1, "end": 7, "isSmall": false}, {"name": "京工业", "type": "123", "start": 6, "end": 9, "isSmall": true}, {"name": "业大学", "type": "location", "start": 8, "end": 11, "isSmall": true}], "isAnno": true}, {"fileName": "test11.txt", "annoDetails": [{"name": "北", "type": "person1", "start": 5, "end": 6, "isSmall": false}], "isAnno": true}, {"fileName": "test5.txt", "annoDetails": [{"name": "工业大学", "type": "person1", "start": 7, "end": 11}], "isAnno": true}, {"fileName": "test1.txt", "annoDetails": [{"name": "弟们 雄起", "type": "person1", "start": 11, "end": 16}], "isAnno": true}] \ No newline at end of file diff --git a/be/run.py b/be/run.py index bf1329c..570aa72 100644 --- a/be/run.py +++ b/be/run.py @@ -14,4 +14,4 @@ app = create_app() # v1是版本 # index是首页 if __name__ == '__main__': - app.run(host='0.0.0.0', port=9060, debug=True) \ No newline at end of file + app.run(host='127.0.0.1', port=9060, debug=True) \ No newline at end of file diff --git a/be/test.py b/be/test.py index 08c6257..aa68563 100644 --- a/be/test.py +++ b/be/test.py @@ -1,18 +1,29 @@ import requests import json import threading - -headers = {'Content-Type': 'application/json'} -data = {"projectName":"demo5","fileName":"test11.txt","annoDetails":[{"name":"北","type":"person1","start":5,"end":6,"isSmall":False}]} -test_times = 20 +import time +import random +headers = {'Content-Type': 'application/json', 'Connection': 'close'} +data = {"projectName": "demo5", "fileName": "test11.txt", + "annoDetails": [{"name": "北", "type": "person1", "start": 5, "end": 6, "isSmall": False}]} +test_times = 1000 test_cnt = 0 err_cnt = 0 +requests.DEFAULT_RETRIES = 1000 + +requests.adapters.DEFAULT_RETRIES = 1000 +s = requests.session() +s.keep_alive = False + def create_load(): global test_times global test_cnt global err_cnt - result = requests.post('http://127.0.0.1:9060/v1/anno/create', data=json.dumps(data), headers=headers) + global s + result = requests.post('http://127.0.0.1:9060/v1/anno/create', data=json.dumps(data), headers=headers, timeout=30) + + print(result.text) if result.json()['errMsg']: err_cnt += 1 print('Catch Err:', result.json()) @@ -22,11 +33,17 @@ def create_load(): print('Test success!') else: print('Test failed!') - + + print('Start', test_times, 'test') for i in range(test_times): # 单线程测试 # create_load() # 多线程测试 - t = threading.Thread(target=create_load,args=()) + time.sleep(random.randint(100,200)*0.0001) + t = threading.Thread(target=create_load, args=()) + # t.setDaemon(True) t.start() +time.sleep(1) +print("ERROR COUNT"+str(err_cnt)) +# print("OK") diff --git a/doc/api文档.md b/doc/api文档.md new file mode 100644 index 0000000..99e5e48 --- /dev/null +++ b/doc/api文档.md @@ -0,0 +1,7 @@ +# API文档 + +| url | methods | params | return | +| -------------------------- | ------- | -------------------------------------------------------- | -------------------------------- | +| v1/project/get_zipped_data | POST | projectName:项目名 file:数据集文件(目前只支持.zip格式) | json :errcode表示是否成功导入 | +| v1/files/get_json | GET | projectName:项目名 | file(json格式的数据集标注结果) | +