I was hoping to write an api for embedding models, where the model is loaded only once and every work can call this model, but it doesn’t seem to work. Can anyone tell me what I should do?
async def server_loop(q):
model_path = "./jinaai/jina-embeddings-v2-base-zh/"
model = SentenceTransformer(
model_path, # switch to en/zh for English or Chinese
trust_remote_code=True
)
model.max_seq_length = 8192 # control your input sequence length up to 8192
while True:
(string, response_q) = await q.get()
print(string)
out = model.encode(string).tolist()
print(out)
await asyncio.sleep(0.1)
await response_q.put(out)
@app.main_process_start
async def main_process_start(app):
q=asyncio.Queue()
app.shared_ctx.model_queue =q
#asyncio.create_task(server_loop(app ))
app.add_task(server_loop(app.shared_ctx.model_queue))
@app.route('/api/getem', methods=['POST'])
async def getem(request):
print(request)
getdata = request.json
messages= getdata.get('input')
print(messages)
message=messages[0]
response_q = asyncio.Queue()
await request.app.shared_ctx.model_queue.put((message, response_q))
outputem = await response_q.get()
data={"data":[{"embedding":outputem}]}
return response.json(data)