khalednabawi11 commited on
Commit
4d6e31c
·
verified ·
1 Parent(s): c0fe2cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -8
app.py CHANGED
@@ -226,8 +226,11 @@ from pydantic import BaseModel, Field
226
  import asyncio
227
  from concurrent.futures import ThreadPoolExecutor
228
 
 
 
 
229
  # Load model and tokenizer
230
- model_name = "FreedomIntelligence/Apollo-7B"
231
  # model_name = "emilyalsentzer/Bio_ClinicalBERT"
232
  # model_name = "FreedomIntelligence/Apollo-2B"
233
 
@@ -294,16 +297,45 @@ def read_root():
294
  return {"message": "Apollo Medical Chatbot API is running"}
295
 
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  @app.post("/ask")
298
  async def chat_fn(query: Query):
299
  message = query.message
 
 
300
  prompt = generate_prompt(message)
301
 
302
- # Run blocking inference in thread
303
- loop = asyncio.get_event_loop()
304
- response = await loop.run_in_executor(executor,
305
- lambda: pipe(prompt, max_new_tokens=512, temperature=0.7, do_sample=True, top_p=0.9)[0]['generated_text'])
 
 
 
 
 
 
 
 
 
 
306
 
307
- # Parse answer
308
- answer = response.split("Answer:")[-1].strip() if "Answer:" in response else response.split("الإجابة:")[-1].strip()
309
- return {"Answer": answer}
 
226
  import asyncio
227
  from concurrent.futures import ThreadPoolExecutor
228
 
229
+ logging.basicConfig(level=logging.INFO)
230
+ logger = logging.getLogger(__name__)
231
+
232
  # Load model and tokenizer
233
+ model_name = "FreedomIntelligence/Apollo-2B"
234
  # model_name = "emilyalsentzer/Bio_ClinicalBERT"
235
  # model_name = "FreedomIntelligence/Apollo-2B"
236
 
 
297
  return {"message": "Apollo Medical Chatbot API is running"}
298
 
299
 
300
+ # @app.post("/ask")
301
+ # async def chat_fn(query: Query):
302
+
303
+ # message = query.message
304
+ # logger.info(f"Received message: {message}")
305
+
306
+ # prompt = generate_prompt(message)
307
+
308
+ # # Run blocking inference in thread
309
+ # loop = asyncio.get_event_loop()
310
+ # response = await loop.run_in_executor(executor,
311
+ # lambda: pipe(prompt, max_new_tokens=512, temperature=0.7, do_sample=True, top_p=0.9)[0]['generated_text'])
312
+
313
+ # # Parse answer
314
+ # answer = response.split("Answer:")[-1].strip() if "Answer:" in response else response.split("الإجابة:")[-1].strip()
315
+ # return {"Answer": answer}
316
+
317
  @app.post("/ask")
318
  async def chat_fn(query: Query):
319
  message = query.message
320
+ logger.info(f"Received message: {message}")
321
+
322
  prompt = generate_prompt(message)
323
 
324
+ try:
325
+ start_time = time.time()
326
+
327
+ loop = asyncio.get_event_loop()
328
+ response = await loop.run_in_executor(
329
+ executor,
330
+ lambda: pipe(prompt, max_new_tokens=150, temperature=0.7, do_sample=True, top_p=0.9)[0]['generated_text']
331
+ )
332
+
333
+ duration = time.time() - start_time
334
+ logger.info(f"Model inference completed in {duration:.2f} seconds")
335
+
336
+ logger.info(f"Generated answer: {answer}")
337
+ return {"Answer": answer}
338
 
339
+ except Exception as e:
340
+ logger.error(f"Inference failed: {str(e)}")
341
+ raise HTTPException(status_code=500, detail="Model inference failed.")