feat: 支持上传jsonl文件,并支持将值按特定标记分割

This commit is contained in:
maxmon 2023-04-14 00:50:09 +08:00
parent 50743b4225
commit ca2fb49112
4 changed files with 48 additions and 4 deletions

View File

@ -67,6 +67,41 @@ def get_zipped_data():
return json.dumps(ret_info, default=lambda o: o.__dict__)
@api.route('/get_jsonl_data', methods=['POST'])
def get_jsonl_data():
ret_info = ReturnInfo()
try:
project_name = request.form.get('projectName')
upload_file = request.files['file']
file_path = os.path.join(PROJECT_PATH.format(project_name), upload_file.filename)
file_name = upload_file.filename.replace('.jsonl', '')
target_path = PROJECT_PATH.format(project_name) + '/data'
upload_file.save(file_path)
with open(file_path, 'r', encoding='utf-8') as fh:
l = fh.readline().strip()
idx = 0
while l:
idx += 1
j = json.loads(l)
txts = []
for key in j:
txts.append(j[key])
_idx = ('000000'+str(idx))[-6:]
if len(str(idx)) > 6:
_idx = str(idx)
open(f'{target_path}/{file_name}_{_idx}', 'w', encoding='utf-8').write('<whale_n>'.join(txts))
l = fh.readline().strip()
os.remove(file_path)
except Exception as e:
print(e)
ret_info.errCode = 404
ret_info.errMsg = str(e)
return json.dumps(ret_info, default=lambda o: o.__dict__)
@api.route('/create', methods=['POST'])
def create_project():

View File

@ -41,6 +41,8 @@ def read_file(file_path):
return read_img_file(file_path)
elif ext in ['txt', 'json']:
return read_txt_file(file_path)
elif '.' not in file_path:
return read_txt_file(file_path)
return ''
# 读取img文件

View File

@ -1,6 +1,6 @@
<template>
<div class="rlhf-box">
<div v-for="(line, idx) in fileContent.split('\n')" :key="idx" class="line" @click="clickLine(idx)">
<div v-for="(line, idx) in fileContent.split('<whale_n>')" :key="idx" class="line" @click="clickLine(idx)">
<template v-for="(word, idx) in line">
<span class="word" v-if="word !== '\n'" :key="idx">{{ word }}</span>
<br v-if="word === '\n'" :key="idx"/>

View File

@ -53,7 +53,7 @@
</div>
<p>上传文本</p>
<p style="font-size:10px">请选择包含文本文件的ziptar文件</p>
<input type="file" id="file-input" accept=".zip,.tar"/>
<input type="file" id="file-input" accept=".zip,.tar,.jsonl"/>
<p class="edit-box-btn-area">
<button class="button danger" @click="del" v-if="page==='edit'">删除</button>
<button class="button" @click="submit">提交</button>
@ -169,9 +169,16 @@ export default {
const fileInputElement = document.getElementById('file-input')
if (fileInputElement.files[0]) {
let formData = new FormData()
let file = fileInputElement.files[0]
formData.append('projectName', projectName)
formData.append('file', fileInputElement.files[0])
form('/v1/project/get_zipped_data', formData)
formData.append('file', file)
if (file.name.endsWith('.zip')) {
form('/v1/project/get_zipped_data', formData)
} else if (file.name.endsWith('.tar')) {
form('/v1/project/get_zipped_data', formData)
} else if (file.name.endsWith('.jsonl')) {
form('/v1/project/get_jsonl_data', formData)
}
}
})
},