feat: 支持上传jsonl文件,并支持将值按特定标记分割
This commit is contained in:
parent
50743b4225
commit
ca2fb49112
|
@ -67,6 +67,41 @@ def get_zipped_data():
|
||||||
|
|
||||||
return json.dumps(ret_info, default=lambda o: o.__dict__)
|
return json.dumps(ret_info, default=lambda o: o.__dict__)
|
||||||
|
|
||||||
|
@api.route('/get_jsonl_data', methods=['POST'])
|
||||||
|
def get_jsonl_data():
|
||||||
|
ret_info = ReturnInfo()
|
||||||
|
try:
|
||||||
|
project_name = request.form.get('projectName')
|
||||||
|
|
||||||
|
upload_file = request.files['file']
|
||||||
|
file_path = os.path.join(PROJECT_PATH.format(project_name), upload_file.filename)
|
||||||
|
file_name = upload_file.filename.replace('.jsonl', '')
|
||||||
|
target_path = PROJECT_PATH.format(project_name) + '/data'
|
||||||
|
upload_file.save(file_path)
|
||||||
|
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as fh:
|
||||||
|
l = fh.readline().strip()
|
||||||
|
idx = 0
|
||||||
|
while l:
|
||||||
|
idx += 1
|
||||||
|
j = json.loads(l)
|
||||||
|
txts = []
|
||||||
|
for key in j:
|
||||||
|
txts.append(j[key])
|
||||||
|
_idx = ('000000'+str(idx))[-6:]
|
||||||
|
if len(str(idx)) > 6:
|
||||||
|
_idx = str(idx)
|
||||||
|
open(f'{target_path}/{file_name}_{_idx}', 'w', encoding='utf-8').write('<whale_n>'.join(txts))
|
||||||
|
l = fh.readline().strip()
|
||||||
|
os.remove(file_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
ret_info.errCode = 404
|
||||||
|
ret_info.errMsg = str(e)
|
||||||
|
|
||||||
|
return json.dumps(ret_info, default=lambda o: o.__dict__)
|
||||||
|
|
||||||
|
|
||||||
@api.route('/create', methods=['POST'])
|
@api.route('/create', methods=['POST'])
|
||||||
def create_project():
|
def create_project():
|
||||||
|
|
|
@ -41,6 +41,8 @@ def read_file(file_path):
|
||||||
return read_img_file(file_path)
|
return read_img_file(file_path)
|
||||||
elif ext in ['txt', 'json']:
|
elif ext in ['txt', 'json']:
|
||||||
return read_txt_file(file_path)
|
return read_txt_file(file_path)
|
||||||
|
elif '.' not in file_path:
|
||||||
|
return read_txt_file(file_path)
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
# 读取img文件
|
# 读取img文件
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
<template>
|
<template>
|
||||||
<div class="rlhf-box">
|
<div class="rlhf-box">
|
||||||
<div v-for="(line, idx) in fileContent.split('\n')" :key="idx" class="line" @click="clickLine(idx)">
|
<div v-for="(line, idx) in fileContent.split('<whale_n>')" :key="idx" class="line" @click="clickLine(idx)">
|
||||||
<template v-for="(word, idx) in line">
|
<template v-for="(word, idx) in line">
|
||||||
<span class="word" v-if="word !== '\n'" :key="idx">{{ word }}</span>
|
<span class="word" v-if="word !== '\n'" :key="idx">{{ word }}</span>
|
||||||
<br v-if="word === '\n'" :key="idx"/>
|
<br v-if="word === '\n'" :key="idx"/>
|
||||||
|
|
|
@ -53,7 +53,7 @@
|
||||||
</div>
|
</div>
|
||||||
<p>上传文本:</p>
|
<p>上传文本:</p>
|
||||||
<p style="font-size:10px">(请选择包含文本文件的zip、tar文件)</p>
|
<p style="font-size:10px">(请选择包含文本文件的zip、tar文件)</p>
|
||||||
<input type="file" id="file-input" accept=".zip,.tar"/>
|
<input type="file" id="file-input" accept=".zip,.tar,.jsonl"/>
|
||||||
<p class="edit-box-btn-area">
|
<p class="edit-box-btn-area">
|
||||||
<button class="button danger" @click="del" v-if="page==='edit'">删除</button>
|
<button class="button danger" @click="del" v-if="page==='edit'">删除</button>
|
||||||
<button class="button" @click="submit">提交</button>
|
<button class="button" @click="submit">提交</button>
|
||||||
|
@ -169,9 +169,16 @@ export default {
|
||||||
const fileInputElement = document.getElementById('file-input')
|
const fileInputElement = document.getElementById('file-input')
|
||||||
if (fileInputElement.files[0]) {
|
if (fileInputElement.files[0]) {
|
||||||
let formData = new FormData()
|
let formData = new FormData()
|
||||||
|
let file = fileInputElement.files[0]
|
||||||
formData.append('projectName', projectName)
|
formData.append('projectName', projectName)
|
||||||
formData.append('file', fileInputElement.files[0])
|
formData.append('file', file)
|
||||||
form('/v1/project/get_zipped_data', formData)
|
if (file.name.endsWith('.zip')) {
|
||||||
|
form('/v1/project/get_zipped_data', formData)
|
||||||
|
} else if (file.name.endsWith('.tar')) {
|
||||||
|
form('/v1/project/get_zipped_data', formData)
|
||||||
|
} else if (file.name.endsWith('.jsonl')) {
|
||||||
|
form('/v1/project/get_jsonl_data', formData)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in New Issue