7.1批量添加数据集、导出结果的后端实现

This commit is contained in:
xgdyp 2021-07-02 00:19:42 +08:00
parent 3b84a4f720
commit 13091c886f
17 changed files with 232 additions and 20 deletions

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

21
.idea/deployment.xml Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="root@113.31.110.160:22">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@139.198.180.75:22 agent">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
</component>
</project>

View File

@ -0,0 +1,20 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
<option name="ignoredPackages">
<value>
<list size="0" />
</value>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="app.flask_login" />
<option value="app.flask_bootstrap" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/whale-anno.iml" filepath="$PROJECT_DIR$/.idea/whale-anno.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

15
.idea/whale-anno.iml Normal file
View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TemplatesService">
<option name="TEMPLATE_FOLDERS">
<list>
<option value="$MODULE_DIR$/fe/node_modules/istanbul-reports/lib/html/templates" />
</list>
</option>
</component>
</module>

View File

@ -1,18 +1,22 @@
from app.config.setting import PROJECT_NAME, FILE_NAME, PROJECTS, ANNO_OUTPUT_PATH
from app.entities.entities import ReturnInfo, AnnoContents, OutputAnno, QueryAnno
from app.libs.redprint import RedPrint
import time
from flask import request
import json
import os
import zipfile
from app.libs.tools import read_txt_file, write_json, read_json_file, make_dir
api = RedPrint('anno')
@api.route('/create', methods=['POST'])
def create_anno():
# time.sleep(0.02)
ret_info = ReturnInfo()
try:
param = request.get_json()
@ -40,6 +44,7 @@ def create_anno():
ret_info.errCode = 0
except Exception as e:
print(e)
ret_info.errCode = 404
ret_info.errMsg = str(e)

View File

@ -1,12 +1,13 @@
from app.config.setting import PROJECT_PATH, ANNO_OUTPUT_PATH
from app.libs.redprint import RedPrint
from ...config.setting import PROJECT_PATH, ANNO_OUTPUT_PATH,DOWNLOAD_FILE_LOCATION
from flask import request
from ...libs.redprint import RedPrint
from flask import request, send_file, Response, send_from_directory,make_response
import json
import os
from app.entities.entities import ReturnInfo, FileInfo
from app.libs.tools import read_json_file
from ...entities.entities import ReturnInfo, FileInfo
from ...libs.tools import read_json_file,read_txt_file,write_json
api = RedPrint('files')
@ -56,3 +57,48 @@ def query_file():
ret_info.errCode = 404
ret_info.errMsg = str(e)
return json.dumps(ret_info, default=lambda o: o.__dict__)
@api.route('/get_json', methods=['GET'])
def get_json():
ret_info = ReturnInfo()
try:
project_name = request.args.get("projectName").strip()
download_json = []
anno_data = read_json_file(PROJECT_PATH.format(project_name)+'/anno.json')
for item in anno_data:
item_dict = {}
item_dict['file'] = item['fileName']
item_dict['text'] = read_txt_file(PROJECT_PATH.format(project_name)+'/'+item['fileName'])
# 这一步来去掉anno.json中的isSmall
for entity in item['annoDetails']:
if 'isSmall' in entity:
entity.pop('isSmall')
# 也可以用这种方式来实现
# item['annoDetails'].pop('isSmall','0')
item_dict['entity'] = item['annoDetails']
download_json.append(item_dict)
write_json(PROJECT_PATH.format(project_name)+'/result.json', download_json)
except Exception as e:
ret_info.errCode = 404
ret_info.errMsg = str(e)
# 返回数据
# 1.返回json格式的数据
# ret_info.info = download_json
# ret_info.errCode = 0
# return json.dumps(ret_info, default=lambda o: o.__dict__)
# 使用send_from_directory 或者使用send_file时要特别注意文件的路径路径不对的话会报404
# 本线默认目录时app下所以不需要再加/app了所以不能用PROJECT_PATH
# 2.创建response对象返回数据
# response = make_response(send_from_directory(DOWNLOAD_FILE_LOCATION.format(project_name),filename='result.json', as_attachment=True))
# response.headers["Content-disposition"] = 'attachment; filename=result.json'
# return response
# 3. 直接使用send from directory 返回json文件
# return send_from_directory('', filename=DOWNLOAD_FILE_LOCATION.format(project_name),as_attachment=True)
# 4. 使用send file 返回json文件
return send_file(DOWNLOAD_FILE_LOCATION.format(project_name), as_attachment=True, attachment_filename=project_name+'_result.json')

View File

@ -1,21 +1,51 @@
from app.config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \
from ...config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \
ANNO_OUTPUT_PATH
from app.libs.redprint import RedPrint
from ...libs.redprint import RedPrint
from flask import request
import json
import os
from app.libs.tools import make_dir, write_json, read_json_file
from app.entities.entities import Project, ReturnInfo
from ...libs.tools import make_dir, write_json, read_json_file,unzip_file
from ...entities.entities import Project, ReturnInfo
from ...config.setting import FILE_NAME
from ...entities.entities import AnnoContents
api = RedPrint('project')
@api.route('/get_zipped_data', methods=['POST'])
def get_zipped_data():
# time.sleep(0.02)
ret_info = ReturnInfo()
try:
project_name = request.form.get('projectName')
print(project_name)
upload_file = request.files['file']
file_path = os.path.join(PROJECT_PATH.format(project_name), upload_file.filename)
target_path = PROJECT_PATH.format(project_name)
# print(file_path)
upload_file.save(file_path)
print(target_path)
unzip_file(file_path,target_path)
os.remove(file_path)
except Exception as e:
print(e)
ret_info.errCode = 404
ret_info.errMsg = str(e)
return json.dumps(ret_info, default=lambda o: o.__dict__)
@api.route('/create', methods=['POST'])
def create_project():
ret_info = ReturnInfo()
try:
param = request.get_json()
print(param)
project_name = param.get(PROJECT_NAME)
make_dir(PROJECT_PATH.format(project_name))

View File

@ -16,5 +16,7 @@ PROJECTS = 'app/projects'
PROJECT_PATH = PROJECTS + '/' + "{}"
PROJECT_CONFIG_PATH = PROJECTS + '/' + "{}" + '/config.json'
#下载标注结果所在的位置
DOWNLOAD_FILE_LOCATION = 'projects/{}/result.json'
# 标注信息存储路径
ANNO_OUTPUT_PATH = PROJECTS + '/' + '{}' + '/' + 'anno.json'

View File

@ -1,6 +1,6 @@
import os
import json
import zipfile
def make_dir(path):
folder = os.path.exists(path)
@ -27,3 +27,20 @@ def read_txt_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
data = f.read()
return data
def unzip_file(zip_src, dst_dir):
"""
解压zip文件
:param zip_src: zip文件的全路径
:param dst_dir: 要解压到的目的文件夹
:return:
"""
r = zipfile.is_zipfile(zip_src)
if r:
fz = zipfile.ZipFile(zip_src, "r")
for file in fz.namelist():
fz.extract(file, dst_dir)
else:
return "请上传zip类型压缩文件"

View File

@ -1 +1 @@
[{"fileName": "test1.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test10.txt", "annoDetails": [{"name": "小明", "type": "person1", "start": 0, "end": 2, "isSmall": false}, {"name": "北京工业大学", "type": "organiztion", "start": 5, "end": 11}, {"name": "北京", "type": "location", "start": 5, "end": 7, "isSmall": false}], "isAnno": true}]
[{"fileName": "test10.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test12.txt", "annoDetails": [{"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10}], "isAnno": true}, {"fileName": "test6.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test4.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test2.txt", "annoDetails": [{"name": "小明毕业于北京工业", "type": "person1", "start": 0, "end": 9, "isSmall": false}, {"name": "小明毕业于北京", "type": "person1", "start": 0, "end": 7, "isSmall": true}, {"name": "小明毕业于北", "type": "person1", "start": 0, "end": 6, "isSmall": true}, {"name": "小明毕业于", "type": "person1", "start": 0, "end": 5, "isSmall": true}, {"name": "明毕业于北京工业", "type": "person1", "start": 1, "end": 9, "isSmall": true}, {"name": "明毕业于北京工", "type": "person1", "start": 1, "end": 8, "isSmall": true}, {"name": "明毕业于北京", "type": "person1", "start": 1, "end": 7, "isSmall": true}, {"name": "毕业于北京工业", "type": "person1", "start": 2, "end": 9, "isSmall": true}, {"name": "毕业于北京", "type": "person1", "start": 2, "end": 7, "isSmall": true}, {"name": "毕业于北", "type": "person1", "start": 2, "end": 6, "isSmall": true}, {"name": "毕业", "type": "person1", "start": 2, "end": 4}, {"name": "毕", "type": "person1", "start": 2, "end": 3, "isSmall": true}, {"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10, "isSmall": true}, {"name": "业于北京", "type": "person1", "start": 3, "end": 7, "isSmall": true}, {"name": "于北京工", "type": "person1", "start": 4, "end": 8, "isSmall": true}, {"name": "于北京", "type": "person1", "start": 4, "end": 7, "isSmall": true}, {"name": "北京工", "type": "person1", "start": 5, "end": 8, "isSmall": true}, {"name": "京工", "type": "person1", "start": 6, "end": 8, "isSmall": true}, {"name": "工业大", "type": "person1", "start": 7, "end": 10, "isSmall": true}], "isAnno": true}, {"fileName": "test3.txt", "annoDetails": [{"name": "小明", "type": "person1", "start": 0, "end": 2}, {"name": "明毕业于北京", "type": "organiztion", "start": 1, "end": 7, "isSmall": false}, {"name": "京工业", "type": "123", "start": 6, "end": 9, "isSmall": true}, {"name": "业大学", "type": "location", "start": 8, "end": 11, "isSmall": true}], "isAnno": true}, {"fileName": "test11.txt", "annoDetails": [{"name": "北", "type": "person1", "start": 5, "end": 6, "isSmall": false}], "isAnno": true}, {"fileName": "test5.txt", "annoDetails": [{"name": "工业大学", "type": "person1", "start": 7, "end": 11}], "isAnno": true}, {"fileName": "test1.txt", "annoDetails": [{"name": "弟们 雄起", "type": "person1", "start": 11, "end": 16}], "isAnno": true}]

View File

@ -14,4 +14,4 @@ app = create_app()
# v1是版本
# index是首页
if __name__ == '__main__':
app.run(host='0.0.0.0', port=9060, debug=True)
app.run(host='127.0.0.1', port=9060, debug=True)

View File

@ -1,18 +1,29 @@
import requests
import json
import threading
headers = {'Content-Type': 'application/json'}
data = {"projectName":"demo5","fileName":"test11.txt","annoDetails":[{"name":"","type":"person1","start":5,"end":6,"isSmall":False}]}
test_times = 20
import time
import random
headers = {'Content-Type': 'application/json', 'Connection': 'close'}
data = {"projectName": "demo5", "fileName": "test11.txt",
"annoDetails": [{"name": "", "type": "person1", "start": 5, "end": 6, "isSmall": False}]}
test_times = 1000
test_cnt = 0
err_cnt = 0
requests.DEFAULT_RETRIES = 1000
requests.adapters.DEFAULT_RETRIES = 1000
s = requests.session()
s.keep_alive = False
def create_load():
global test_times
global test_cnt
global err_cnt
result = requests.post('http://127.0.0.1:9060/v1/anno/create', data=json.dumps(data), headers=headers)
global s
result = requests.post('http://127.0.0.1:9060/v1/anno/create', data=json.dumps(data), headers=headers, timeout=30)
print(result.text)
if result.json()['errMsg']:
err_cnt += 1
print('Catch Err:', result.json())
@ -22,11 +33,17 @@ def create_load():
print('Test success!')
else:
print('Test failed!')
print('Start', test_times, 'test')
for i in range(test_times):
# 单线程测试
# create_load()
# 多线程测试
t = threading.Thread(target=create_load,args=())
time.sleep(random.randint(100,200)*0.0001)
t = threading.Thread(target=create_load, args=())
# t.setDaemon(True)
t.start()
time.sleep(1)
print("ERROR COUNT"+str(err_cnt))
# print("OK")

7
doc/api文档.md Normal file
View File

@ -0,0 +1,7 @@
# API文档
| url | methods | params | return |
| -------------------------- | ------- | -------------------------------------------------------- | -------------------------------- |
| v1/project/get_zipped_data | POST | projectName:项目名 file:数据集文件(目前只支持.zip格式 | json errcode表示是否成功导入 |
| v1/files/get_json | GET | projectName:项目名 | filejson格式的数据集标注结果 |