7.6 support more file type and fix some problems

This commit is contained in:
xgdyp 2021-07-06 21:52:35 +08:00
parent 3b0936c6ac
commit 0111361c72
23 changed files with 80 additions and 36 deletions

View File

@ -1,7 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/be/app/projects" />
</content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>

5
be/.gitignore vendored
View File

@ -1,3 +1,6 @@
.idea .idea
__pycache__ __pycache__
app/projects/* app/projects/*
../.idea

View File

@ -1,5 +1,5 @@
from flask import Blueprint from flask import Blueprint
from app.api.v1 import index, project, files, anno from ...api.v1 import index, project, files, anno
def create_blueprint_v1(): def create_blueprint_v1():

View File

@ -70,6 +70,7 @@ def get_json():
for item in anno_data: for item in anno_data:
item_dict = {} item_dict = {}
item_dict['file'] = item['fileName'] item_dict['file'] = item['fileName']
item_dict['text'] = read_txt_file(PROJECT_PATH.format(project_name)+'/'+item['fileName']) item_dict['text'] = read_txt_file(PROJECT_PATH.format(project_name)+'/'+item['fileName'])
# 这一步来去掉anno.json中的isSmall # 这一步来去掉anno.json中的isSmall
for entity in item['annoDetails']: for entity in item['annoDetails']:
@ -94,12 +95,16 @@ def get_json():
# 使用send_from_directory 或者使用send_file时要特别注意文件的路径路径不对的话会报404 # 使用send_from_directory 或者使用send_file时要特别注意文件的路径路径不对的话会报404
# 本线默认目录时app下所以不需要再加/app了所以不能用PROJECT_PATH # 本线默认目录时app下所以不需要再加/app了所以不能用PROJECT_PATH
# 2.创建response对象返回数据 # 2.创建response对象返回数据
# response = make_response(send_from_directory(DOWNLOAD_FILE_LOCATION.format(project_name),filename='result.json', as_attachment=True)) #使用response可以将result.json再删除掉
# response.headers["Content-disposition"] = 'attachment; filename=result.json' response = make_response(send_from_directory('', filename=DOWNLOAD_FILE_LOCATION.format(project_name),
# return response as_attachment=True))
response.headers["Content-disposition"] = 'attachment; filename={}_result.json'.format(project_name)
# print(PROJECT_PATH.format(project_name)+'/result.json')
os.remove(PROJECT_PATH.format(project_name)+'/result.json')
return response
# 3. 直接使用send from directory 返回json文件 # 3. 直接使用send from directory 返回json文件
# return send_from_directory('', filename=DOWNLOAD_FILE_LOCATION.format(project_name),as_attachment=True) # return send_from_directory('', filename=DOWNLOAD_FILE_LOCATION.format(project_name),as_attachment=True)
# 4. 使用send file 返回json文件 # 4. 使用send file 返回json文件
return send_file(DOWNLOAD_FILE_LOCATION.format(project_name), as_attachment=True, attachment_filename=project_name+'_result.json') # return send_file(DOWNLOAD_FILE_LOCATION.format(project_name), as_attachment=True, attachment_filename=project_name+'_result.json')

View File

@ -1,3 +1,5 @@
import shutil
from ...config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \ from ...config.setting import PROJECT_NAME, PROJECT_TYPE, ENTITY_TYPES, PROJECTS, PROJECT_PATH, PROJECT_CONFIG_PATH, \
ANNO_OUTPUT_PATH ANNO_OUTPUT_PATH
from ...libs.redprint import RedPrint from ...libs.redprint import RedPrint
@ -5,7 +7,7 @@ from ...libs.redprint import RedPrint
from flask import request from flask import request
import json import json
import os import os
from ...libs.tools import make_dir, write_json, read_json_file,unzip_file from ...libs.tools import make_dir, write_json, read_json_file, unzip_file
from ...entities.entities import Project, ReturnInfo from ...entities.entities import Project, ReturnInfo
@ -14,23 +16,34 @@ from ...entities.entities import AnnoContents
api = RedPrint('project') api = RedPrint('project')
@api.route('/get_zipped_data', methods=['POST']) @api.route('/get_zipped_data', methods=['POST'])
def get_zipped_data(): def get_zipped_data():
# time.sleep(0.02)
ret_info = ReturnInfo() ret_info = ReturnInfo()
try: try:
project_name = request.form.get('projectName') project_name = request.form.get('projectName')
print(project_name)
upload_file = request.files['file'] upload_file = request.files['file']
file_path = os.path.join(PROJECT_PATH.format(project_name), upload_file.filename) file_path = os.path.join(PROJECT_PATH.format(project_name), upload_file.filename)
target_path = PROJECT_PATH.format(project_name) target_path = PROJECT_PATH.format(project_name)
# print(file_path)
upload_file.save(file_path) upload_file.save(file_path)
print(target_path) file_type = file_path.split('.')[-1]
unzip_file(file_path,target_path) # print("zipped data received: .{} form".format(file_type))
unzip_file(file_path, target_path)
os.remove(file_path) os.remove(file_path)
# Put files in the folder directly under the project directory
for item in os.listdir(target_path):
# If item is a folder,copy files in item to project folder,then remove this folder
folder_path = target_path + '/' + item
if os.path.isdir(folder_path):
for file in os.listdir(folder_path):
shutil.copy(folder_path + '/' + file, folder_path + '/../')
# Here to deal with chinese encode in module zipfile and rarfile
if file_type == 'zip':
os.rename(folder_path + '/../' + file,
folder_path + '/../' + file.encode('cp437').decode('GBK'))
shutil.rmtree(folder_path)
except Exception as e: except Exception as e:
print(e) print(e)
ret_info.errCode = 404 ret_info.errCode = 404

View File

@ -2,7 +2,7 @@ from flask import Flask
def create_app(): def create_app():
app = Flask(__name__, static_folder='../../fe/dist/', static_url_path='/') app = Flask(__name__, static_folder='../../fe/dist/',template_folder='../../fe/dist/', static_url_path='/')
app.config.from_object('app.config.setting') app.config.from_object('app.config.setting')
app.config.from_object('app.config.secure') app.config.from_object('app.config.secure')

View File

@ -1,6 +1,14 @@
import os import os
import json import json
import zipfile import zipfile
import rarfile
import py7zr
import tarfile
import gzip
from os import rename
from os import listdir
from shutil import move
def make_dir(path): def make_dir(path):
folder = os.path.exists(path) folder = os.path.exists(path)
@ -31,16 +39,33 @@ def read_txt_file(file_path):
def unzip_file(zip_src, dst_dir): def unzip_file(zip_src, dst_dir):
""" """
解压zip文件 解压数据文件
:param zip_src: zip文件的全路径 :param zip_src: 压缩包文件的全路径
:param dst_dir: 要解压到的文件夹 :param dst_dir: 要解压到的目文件夹
:return: :return:
""" """
r = zipfile.is_zipfile(zip_src)
if r: if zipfile.is_zipfile(zip_src):
fz = zipfile.ZipFile(zip_src, "r") fz = zipfile.ZipFile(zip_src, "r")
for file in fz.namelist(): for file in fz.namelist():
fz.extract(file, dst_dir) fz.extract(file, dst_dir)
else: return "unzip .zip file success"
return "请上传zip类型压缩文件" elif rarfile.is_rarfile(zip_src):
fr = rarfile.RarFile(zip_src, "r")
print(fr.namelist())
for file in fr.namelist():
fr.extract(file, dst_dir)
return "unzip .rar file success"
elif py7zr.is_7zfile(zip_src):
f7z = py7zr.SevenZipFile(zip_src, "r")
f7z.extractall(path=dst_dir)
f7z.close()
elif tarfile.is_tarfile(zip_src):
ft = tarfile.TarFile(zip_src, "r")
print(ft.getnames())
for file in ft.getnames():
ft.extract(file,dst_dir)
else:
return "请上传.zip .rar .tar .7z格式的文件"

View File

@ -1 +0,0 @@
[{"fileName": "test10.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test12.txt", "annoDetails": [{"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10}], "isAnno": true}, {"fileName": "test6.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test4.txt", "annoDetails": [], "isAnno": true}, {"fileName": "test2.txt", "annoDetails": [{"name": "小明毕业于北京工业", "type": "person1", "start": 0, "end": 9, "isSmall": false}, {"name": "小明毕业于北京", "type": "person1", "start": 0, "end": 7, "isSmall": true}, {"name": "小明毕业于北", "type": "person1", "start": 0, "end": 6, "isSmall": true}, {"name": "小明毕业于", "type": "person1", "start": 0, "end": 5, "isSmall": true}, {"name": "明毕业于北京工业", "type": "person1", "start": 1, "end": 9, "isSmall": true}, {"name": "明毕业于北京工", "type": "person1", "start": 1, "end": 8, "isSmall": true}, {"name": "明毕业于北京", "type": "person1", "start": 1, "end": 7, "isSmall": true}, {"name": "毕业于北京工业", "type": "person1", "start": 2, "end": 9, "isSmall": true}, {"name": "毕业于北京", "type": "person1", "start": 2, "end": 7, "isSmall": true}, {"name": "毕业于北", "type": "person1", "start": 2, "end": 6, "isSmall": true}, {"name": "毕业", "type": "person1", "start": 2, "end": 4}, {"name": "毕", "type": "person1", "start": 2, "end": 3, "isSmall": true}, {"name": "业于北京工业大", "type": "person1", "start": 3, "end": 10, "isSmall": true}, {"name": "业于北京", "type": "person1", "start": 3, "end": 7, "isSmall": true}, {"name": "于北京工", "type": "person1", "start": 4, "end": 8, "isSmall": true}, {"name": "于北京", "type": "person1", "start": 4, "end": 7, "isSmall": true}, {"name": "北京工", "type": "person1", "start": 5, "end": 8, "isSmall": true}, {"name": "京工", "type": "person1", "start": 6, "end": 8, "isSmall": true}, {"name": "工业大", "type": "person1", "start": 7, "end": 10, "isSmall": true}], "isAnno": true}, {"fileName": "test3.txt", "annoDetails": [{"name": "小明", "type": "person1", "start": 0, "end": 2}, {"name": "明毕业于北京", "type": "organiztion", "start": 1, "end": 7, "isSmall": false}, {"name": "京工业", "type": "123", "start": 6, "end": 9, "isSmall": true}, {"name": "业大学", "type": "location", "start": 8, "end": 11, "isSmall": true}], "isAnno": true}, {"fileName": "test11.txt", "annoDetails": [{"name": "北", "type": "person1", "start": 5, "end": 6, "isSmall": false}], "isAnno": true}, {"fileName": "test5.txt", "annoDetails": [{"name": "工业大学", "type": "person1", "start": 7, "end": 11}], "isAnno": true}, {"fileName": "test1.txt", "annoDetails": [{"name": "弟们 雄起", "type": "person1", "start": 11, "end": 16}], "isAnno": true}]

View File

@ -1 +0,0 @@
{"projectName": "demo5", "projectType": "命名实体识别", "entityTypes": "[{\"type\":\"person1\",\"color\":\"#e61490\"},{\"type\":\"location\",\"color\":\"#0aab8a\"},{\"type\":\"organiztion\",\"color\":\"#2770cd\"},{\"type\":\"123\",\"color\":\"#1c7a82\"}]"}

View File

@ -1 +0,0 @@
hello 你好啊 兄弟们 雄起

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1 +0,0 @@
小明毕业于北京工业大学

View File

@ -1,6 +1,6 @@
# 入口程序文件 # 入口程序文件
from app.app import create_app from app.app import create_app
from flask import render_template
app = create_app() app = create_app()
# host='0.0.0.0' # host='0.0.0.0'
@ -13,5 +13,15 @@ app = create_app()
# 9060是端口 # 9060是端口
# v1是版本 # v1是版本
# index是首页 # index是首页
@app.route('/')
def index():
'''
using this route to load index page
'''
return render_template('/index.html')
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='127.0.0.1', port=9060, debug=True) app.run(host='127.0.0.1', port=9060, debug=True)