7.1批量添加数据集、导出结果的后端实现

This commit is contained in:
xgdyp 2021-07-02 00:19:42 +08:00
parent 3b84a4f720
commit 13091c886f
17 changed files with 232 additions and 20 deletions

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

21
.idea/deployment.xml Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="root@113.31.110.160:22">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@139.198.180.75:22 agent">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
</component>
</project>

View File

@ -0,0 +1,20 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
<option name="ignoredPackages">
<value>
<list size="0" />
</value>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="app.flask_login" />
<option value="app.flask_bootstrap" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/whale-anno.iml" filepath="$PROJECT_DIR$/.idea/whale-anno.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

15
.idea/whale-anno.iml Normal file
View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TemplatesService">
<option name="TEMPLATE_FOLDERS">
<list>
<option value="$MODULE_DIR$/fe/node_modules/istanbul-reports/lib/html/templates" />
</list>
</option>
</component>
</module>

View File

@ -1,18 +1,22 @@
from app.config.setting import PROJECT_NAME, FILE_NAME, PROJECTS, ANNO_OUTPUT_PATH from app.config.setting import PROJECT_NAME, FILE_NAME, PROJECTS, ANNO_OUTPUT_PATH
from app.entities.entities import ReturnInfo, AnnoContents, OutputAnno, QueryAnno from app.entities.entities import ReturnInfo, AnnoContents, OutputAnno, QueryAnno
from app.libs.redprint import RedPrint from app.libs.redprint import RedPrint
import time
from flask import request from flask import request
import json import json
import os import os
import zipfile
from app.libs.tools import read_txt_file, write_json, read_json_file, make_dir from app.libs.tools import read_txt_file, write_json, read_json_file, make_dir
api = RedPrint('anno') api = RedPrint('anno')
@api.route('/create', methods=['POST']) @api.route('/create', methods=['POST'])
def create_anno(): def create_anno():
# time.sleep(0.02)
ret_info = ReturnInfo() ret_info = ReturnInfo()
try: try:
param = request.get_json() param = request.get_json()
@ -40,6 +44,7 @@ def create_anno():
ret_info.errCode = 0 ret_info.errCode = 0
except Exception as e: except Exception as e:
print(e)
ret_info.errCode = 404 ret_info.errCode = 404
ret_info.errMsg = str(e) ret_info.errMsg = str(e)

View File

@ -1,12 +1,13 @@
from app.config.setting import PROJECT_PATH, ANNO_OUTPUT_PATH from ...config.setting import PROJECT_PATH, ANNO_OUTPUT_PATH,DOWNLOAD_FILE_LOCATION
from app.libs.redprint import RedPrint
from flask import request from ...libs.redprint import RedPrint
from flask import request, send_file, Response, send_from_directory,make_response
import json import json
import os import os
from app.entities.entities import ReturnInfo, FileInfo from ...entities.entities import ReturnInfo, FileInfo
from app.libs.tools import read_json_file from ...libs.tools import read_json_file,read_txt_file,write_json
api = RedPrint('files') api = RedPrint('files')
@ -56,3 +57,48 @@ def query_file():
ret_info.errCode = 404 ret_info.errCode = 404
ret_info.errMsg = str(e) ret_info.errMsg = str(e)
return json.dumps(ret_info, default=lambda o: o.__dict__) return json.dumps(ret_info, default=lambda o: o.__dict__)
@api.route('/get_json', methods=['GET'])
def get_json():
ret_info = ReturnInfo()
try:
project_name = request.args.get("projectName").strip()
download_json = []
anno_data = read_json_file(PROJECT_PATH.format(project_name)+'/anno.json')
for item in anno_data:
item_dict = {}
item_dict['file'] = item['fileName']
item_dict['text'] = read_txt_file(PROJECT_PATH.format(project_name)+'/'+item['fileName'])
# 这一步来去掉anno.json中的isSmall
for entity in item['annoDetails']:
if 'isSmall' in entity:
entity.pop('isSmall')
# 也可以用这种方式来实现
# item['annoDetails'].pop('isSmall','0')
item_dict['entity'] = item['annoDetails']
download_json.append(item_dict)
write_json(PROJECT_PATH.format(project_name)+'/result.json', download_json)
except Exception as e:
ret_info.errCode = 404
ret_info.errMsg = str(e)
# 返回数据
# 1.返回json格式的数据
# ret_info.info = download_json
# ret_info.errCode = 0
# return json.dumps(ret_info, default=lambda o: o.__dict__)
# 使用send_from_directory 或者使用send_file时要特别注意文件的路径路径不对的话会报404
# 本线默认目录时app下所以不需要再加/app了所以不能用PROJECT_PATH
# 2.创建response对象返回数据
# response = make_response(send_from_directory(DOWNLOAD_FILE_LOCATION.format(project_name),filename='result.json', as_attachment=True))
# response.headers["Content-disposition"] = 'attachment; filename=result.json'
# return response
# 3. 直接使用send from directory 返回json文件
# return send_from_directory('', filename=DOWNLOAD_FILE_LOCATION.format(project_name),as_attachment=True)
# 4. 使用send file 返回json文件
return send_file(DOWNLOAD_FILE_LOCATION.format(project_name), as_attachment=True, attachment_filename=project_name+'_result.json')

View File

@ -1,21 +1,51 @@
from app.config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \ from ...config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \
ANNO_OUTPUT_PATH ANNO_OUTPUT_PATH
from app.libs.redprint import RedPrint from ...libs.redprint import RedPrint
from flask import request from flask import request
import json import json
import os import os
from app.libs.tools import make_dir, write_json, read_json_file from ...libs.tools import make_dir, write_json, read_json_file,unzip_file
from app.entities.entities import Project, ReturnInfo from ...entities.entities import Project, ReturnInfo
from ...config.setting import FILE_NAME
from ...entities.entities import AnnoContents
api = RedPrint('project') api = RedPrint('project')
@api.route('/get_zipped_data', methods=['POST'])
def get_zipped_data():
# time.sleep(0.02)
ret_info = ReturnInfo()
try:
project_name = request.form.get('projectName')
print(project_name)
upload_file = request.files['file']
file_path = os.path.join(PROJECT_PATH.format(project_name), upload_file.filename)
target_path = PROJECT_PATH.format(project_name)
# print(file_path)
upload_file.save(file_path)
print(target_path)
unzip_file(file_path,target_path)
os.remove(file_path)
except Exception as e:
print(e)
ret_info.errCode = 404
ret_info.errMsg = str(e)
return json.dumps(ret_info, default=lambda o: o.__dict__)
@api.route('/create', methods=['POST']) @api.route('/create', methods=['POST'])
def create_project(): def create_project():
ret_info = ReturnInfo() ret_info = ReturnInfo()
try: try:
param = request.get_json() param = request.get_json()
print(param)
project_name = param.get(PROJECT_NAME) project_name = param.get(PROJECT_NAME)
make_dir(PROJECT_PATH.format(project_name)) make_dir(PROJECT_PATH.format(project_name))

View File

@ -16,5 +16,7 @@ PROJECTS = 'app/projects'
PROJECT_PATH = PROJECTS + '/' + "{}" PROJECT_PATH = PROJECTS + '/' + "{}"
PROJECT_CONFIG_PATH = PROJECTS + '/' + "{}" + '/config.json' PROJECT_CONFIG_PATH = PROJECTS + '/' + "{}" + '/config.json'
#下载标注结果所在的位置
DOWNLOAD_FILE_LOCATION = 'projects/{}/result.json'
# 标注信息存储路径 # 标注信息存储路径
ANNO_OUTPUT_PATH = PROJECTS + '/' + '{}' + '/' + 'anno.json' ANNO_OUTPUT_PATH = PROJECTS + '/' + '{}' + '/' + 'anno.json'

View File

@ -1,6 +1,6 @@
import os import os
import json import json
import zipfile
def make_dir(path): def make_dir(path):
folder = os.path.exists(path) folder = os.path.exists(path)
@ -27,3 +27,20 @@ def read_txt_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
data = f.read() data = f.read()
return data return data
def unzip_file(zip_src, dst_dir):
"""
解压zip文件
:param zip_src: zip文件的全路径
:param dst_dir: 要解压到的目的文件夹
:return:
"""
r = zipfile.is_zipfile(zip_src)
if r:
fz = zipfile.ZipFile(zip_src, "r")
for file in fz.namelist():
fz.extract(file, dst_dir)
else:
return "请上传zip类型压缩文件"

View File

@ -1 +1 @@
[{"fileName": "test1.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test10.txt", "annoDetails": [{"name": "小明", "type": "person1", "start": 0, "end": 2, "isSmall": false}, {"name": "北京工业大学", "type": "organiztion", "start": 5, "end": 11}, {"name": "北京", "type": "location", "start": 5, "end": 7, "isSmall": false}], "isAnno": true}] [{"fileName": "test10.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test12.txt", "annoDetails": [{"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10}], "isAnno": true}, {"fileName": "test6.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test4.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test2.txt", "annoDetails": [{"name": "小明毕业于北京工业", "type": "person1", "start": 0, "end": 9, "isSmall": false}, {"name": "小明毕业于北京", "type": "person1", "start": 0, "end": 7, "isSmall": true}, {"name": "小明毕业于北", "type": "person1", "start": 0, "end": 6, "isSmall": true}, {"name": "小明毕业于", "type": "person1", "start": 0, "end": 5, "isSmall": true}, {"name": "明毕业于北京工业", "type": "person1", "start": 1, "end": 9, "isSmall": true}, {"name": "明毕业于北京工", "type": "person1", "start": 1, "end": 8, "isSmall": true}, {"name": "明毕业于北京", "type": "person1", "start": 1, "end": 7, "isSmall": true}, {"name": "毕业于北京工业", "type": "person1", "start": 2, "end": 9, "isSmall": true}, {"name": "毕业于北京", "type": "person1", "start": 2, "end": 7, "isSmall": true}, {"name": "毕业于北", "type": "person1", "start": 2, "end": 6, "isSmall": true}, {"name": "毕业", "type": "person1", "start": 2, "end": 4}, {"name": "毕", "type": "person1", "start": 2, "end": 3, "isSmall": true}, {"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10, "isSmall": true}, {"name": "业于北京", "type": "person1", "start": 3, "end": 7, "isSmall": true}, {"name": "于北京工", "type": "person1", "start": 4, "end": 8, "isSmall": true}, {"name": "于北京", "type": "person1", "start": 4, "end": 7, "isSmall": true}, {"name": "北京工", "type": "person1", "start": 5, "end": 8, "isSmall": true}, {"name": "京工", "type": "person1", "start": 6, "end": 8, "isSmall": true}, {"name": "工业大", "type": "person1", "start": 7, "end": 10, "isSmall": true}], "isAnno": true}, {"fileName": "test3.txt", "annoDetails": [{"name": "小明", "type": "person1", "start": 0, "end": 2}, {"name": "明毕业于北京", "type": "organiztion", "start": 1, "end": 7, "isSmall": false}, {"name": "京工业", "type": "123", "start": 6, "end": 9, "isSmall": true}, {"name": "业大学", "type": "location", "start": 8, "end": 11, "isSmall": true}], "isAnno": true}, {"fileName": "test11.txt", "annoDetails": [{"name": "北", "type": "person1", "start": 5, "end": 6, "isSmall": false}], "isAnno": true}, {"fileName": "test5.txt", "annoDetails": [{"name": "工业大学", "type": "person1", "start": 7, "end": 11}], "isAnno": true}, {"fileName": "test1.txt", "annoDetails": [{"name": "弟们 雄起", "type": "person1", "start": 11, "end": 16}], "isAnno": true}]

View File

@ -14,4 +14,4 @@ app = create_app()
# v1是版本 # v1是版本
# index是首页 # index是首页
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='0.0.0.0', port=9060, debug=True) app.run(host='127.0.0.1', port=9060, debug=True)

View File

@ -1,18 +1,29 @@
import requests import requests
import json import json
import threading import threading
import time
headers = {'Content-Type': 'application/json'} import random
data = {"projectName":"demo5","fileName":"test11.txt","annoDetails":[{"name":"","type":"person1","start":5,"end":6,"isSmall":False}]} headers = {'Content-Type': 'application/json', 'Connection': 'close'}
test_times = 20 data = {"projectName": "demo5", "fileName": "test11.txt",
"annoDetails": [{"name": "", "type": "person1", "start": 5, "end": 6, "isSmall": False}]}
test_times = 1000
test_cnt = 0 test_cnt = 0
err_cnt = 0 err_cnt = 0
requests.DEFAULT_RETRIES = 1000
requests.adapters.DEFAULT_RETRIES = 1000
s = requests.session()
s.keep_alive = False
def create_load(): def create_load():
global test_times global test_times
global test_cnt global test_cnt
global err_cnt global err_cnt
result = requests.post('http://127.0.0.1:9060/v1/anno/create', data=json.dumps(data), headers=headers) global s
result = requests.post('http://127.0.0.1:9060/v1/anno/create', data=json.dumps(data), headers=headers, timeout=30)
print(result.text)
if result.json()['errMsg']: if result.json()['errMsg']:
err_cnt += 1 err_cnt += 1
print('Catch Err:', result.json()) print('Catch Err:', result.json())
@ -22,11 +33,17 @@ def create_load():
print('Test success!') print('Test success!')
else: else:
print('Test failed!') print('Test failed!')
print('Start', test_times, 'test') print('Start', test_times, 'test')
for i in range(test_times): for i in range(test_times):
# 单线程测试 # 单线程测试
# create_load() # create_load()
# 多线程测试 # 多线程测试
t = threading.Thread(target=create_load,args=()) time.sleep(random.randint(100,200)*0.0001)
t = threading.Thread(target=create_load, args=())
# t.setDaemon(True)
t.start() t.start()
time.sleep(1)
print("ERROR COUNT"+str(err_cnt))
# print("OK")

7
doc/api文档.md Normal file
View File

@ -0,0 +1,7 @@
# API文档
| url | methods | params | return |
| -------------------------- | ------- | -------------------------------------------------------- | -------------------------------- |
| v1/project/get_zipped_data | POST | projectName:项目名 file:数据集文件(目前只支持.zip格式 | json errcode表示是否成功导入 |
| v1/files/get_json | GET | projectName:项目名 | filejson格式的数据集标注结果 |