62 lines
1.6 KiB
Python
62 lines
1.6 KiB
Python
import torch
|
|
from flask import Flask, request, jsonify
|
|
from transformers import AutoModelForCausalLM
|
|
|
|
app = Flask(__name__)
|
|
device = 'cpu'
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
'Maple728/TimeMoE-200M',
|
|
device_map=device, # use "cpu" for CPU inference, and "cuda" for GPU inference.
|
|
trust_remote_code=True,
|
|
)
|
|
|
|
@app.route('/ds_predict', methods=['POST'])
|
|
def time_moe():
|
|
try:
|
|
data = request.get_json()
|
|
if not data or 'input' not in data:
|
|
return jsonify({
|
|
'status':'error',
|
|
'error': 'Invalid input, please provide "input" field in JSON'
|
|
}), 400
|
|
|
|
input_data = data['input']
|
|
prediction_length = data['next_len']
|
|
|
|
seqs = torch.tensor(input_data).unsqueeze(0).float().to(device)
|
|
mean, std = seqs.mean(dim=-1, keepdim=True), seqs.std(dim=-1, keepdim=True)
|
|
normed_seqs = (seqs - mean) / std
|
|
seqs = normed_seqs
|
|
|
|
pred_y = model.generate(seqs, max_new_tokens=prediction_length)
|
|
|
|
normed_predictions = pred_y[:, -prediction_length:]
|
|
# inverse normalize
|
|
predictions = normed_predictions * std + mean
|
|
print(predictions)
|
|
pred_y = predictions[0].numpy().tolist()
|
|
|
|
response = {
|
|
'status': 'success',
|
|
'output': pred_y
|
|
}
|
|
return jsonify(response), 200
|
|
except Exception as e:
|
|
return jsonify({
|
|
'error': f'Prediction failed: {str(e)}'
|
|
}), 500
|
|
|
|
def main():
|
|
app.run(
|
|
host='0.0.0.0',
|
|
port=5001,
|
|
threaded=True,
|
|
debug=False
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|