khalednabawi11 commited on
Commit
9b4a539
·
verified ·
1 Parent(s): 99639ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +264 -113
app.py CHANGED
@@ -1,52 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import asyncio
3
  import logging
4
  import signal
5
  import uvicorn
6
- import os
7
 
8
  from fastapi import FastAPI, Request, HTTPException, status
9
  from pydantic import BaseModel, Field
10
  from langdetect import detect
11
 
12
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, GenerationConfig
13
- from langchain.vectorstores import Qdrant
14
  from langchain.embeddings import HuggingFaceEmbeddings
15
- from langchain.chains import RetrievalQA
16
- from langchain.llms import HuggingFacePipeline
17
  from qdrant_client import QdrantClient
18
- from langchain.callbacks.base import BaseCallbackHandler
19
  from huggingface_hub import hf_hub_download
20
- from contextlib import asynccontextmanager
21
 
22
- # Get environment variables
23
- QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
24
- QDRANT_URL = os.getenv("QDRANT_URL")
25
  COLLECTION_NAME = "arabic_rag_collection"
26
- QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
27
- QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.Jb39rYQW2rSE9RdXrjdzKY6T1RF44XjdQzCvzFkjat4")
28
 
29
  # === LOGGING === #
30
- logging.basicConfig(level=logging.DEBUG)
31
  logger = logging.getLogger(__name__)
32
 
33
- # Load model and tokenizer
34
  model_name = "FreedomIntelligence/Apollo-7B"
35
  tokenizer = AutoTokenizer.from_pretrained(model_name)
36
  model = AutoModelForCausalLM.from_pretrained(model_name)
37
  tokenizer.pad_token = tokenizer.eos_token
38
 
39
- # Connect to Qdrant + embedding
40
- embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
41
- qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
42
-
43
- vector_store = Qdrant(
44
- client=qdrant_client,
45
- collection_name=COLLECTION_NAME,
46
- embeddings=embedding
47
- )
48
-
49
- # Generation settings
50
  generation_config = GenerationConfig(
51
  max_new_tokens=150,
52
  temperature=0.2,
@@ -56,7 +257,6 @@ generation_config = GenerationConfig(
56
  repetition_penalty=1.3,
57
  )
58
 
59
- # Text generation pipeline
60
  llm_pipeline = pipeline(
61
  model=model,
62
  tokenizer=tokenizer,
@@ -64,53 +264,20 @@ llm_pipeline = pipeline(
64
  generation_config=generation_config,
65
  device=model.device.index if model.device.type == "cuda" else -1
66
  )
67
-
68
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
69
 
70
- retriever = vector_store.as_retriever(search_kwargs={"k": 3})
 
 
71
 
72
- # Set up RAG QA chain
73
- qa_chain = RetrievalQA.from_chain_type(
74
- llm=llm,
75
- retriever=retriever,
76
- chain_type="stuff"
77
  )
 
78
 
79
- # FastAPI setup
80
- app = FastAPI(title="Apollo RAG Medical Chatbot")
81
-
82
- class Query(BaseModel):
83
- question: str = Field(..., example="ما هي اسباب تساقط الشعر ؟", min_length=3)
84
-
85
- class TimeoutCallback(BaseCallbackHandler):
86
- def __init__(self, timeout_seconds: int = 60):
87
- self.timeout_seconds = timeout_seconds
88
- self.start_time = None
89
-
90
- async def on_llm_start(self, *args, **kwargs):
91
- self.start_time = asyncio.get_event_loop().time()
92
-
93
- async def on_llm_new_token(self, *args, **kwargs):
94
- if asyncio.get_event_loop().time() - self.start_time > self.timeout_seconds:
95
- raise TimeoutError("LLM processing timeout")
96
-
97
- # Prompt template
98
- # def generate_prompt(question: str) -> str:
99
- # lang = detect(question)
100
- # if lang == "ar":
101
- # return f"""أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة.
102
- # وتأكد من ان:
103
- # - عدم تكرار أي نقطة أو عبارة أو كلمة
104
- # - وضوح وسلاسة كل نقطة
105
- # - تجنب الحشو والعبارات الزائدة
106
- # السؤال: {question}
107
- # الإجابة:"""
108
- # else:
109
- # return f"""Answer the following medical question in clear English with a detailed, non-redundant response. Do not repeat ideas or restate the question. If the context lacks information, rely on prior medical knowledge.
110
- # Question: {question}
111
- # Answer:"""
112
-
113
-
114
  def generate_prompt(question: str) -> str:
115
  lang = detect(question)
116
  if lang == "ar":
@@ -124,23 +291,28 @@ def generate_prompt(question: str) -> str:
124
  else:
125
  return (
126
  "Answer the following medical question in clear English with a detailed, non-redundant response. "
127
- "Do not repeat ideas, phrases, or restate the question in the answer. If the context lacks relevant "
128
- "information, rely on your prior medical knowledge. If the answer involves multiple points, list them "
129
- "in concise and distinct bullet points:\n"
130
  f"Question: {question}\nAnswer:"
131
  )
132
-
133
- # Input schema
134
- # class ChatRequest(BaseModel):
135
- # message: str
136
 
137
- # # Output endpoint
138
- # @app.post("/chat")
139
- # def chat_rag(req: ChatRequest):
140
- # prompt = generate_prompt(req.message)
141
- # response = qa_chain.run(prompt)
142
- # return {"response": response}
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  # === ROUTES === #
146
  @app.get("/")
@@ -150,55 +322,34 @@ async def root():
150
  @app.post("/ask")
151
  async def ask(query: Query):
152
  try:
153
- logger.debug(f"Received question: {query.question}")
154
- prompt = generate_prompt(query.question)
155
- timeout_callback = TimeoutCallback(timeout_seconds=60)
156
-
157
 
158
- # docs = retriever.get_relevant_documents(query.question)
159
- # if not docs:
160
- # logger.warning("No documents retrieved from Qdrant for the question.")
161
- # else:
162
- # logger.debug(f"Retrieved documents: {[doc.page_content for doc in docs[:1]]}")
163
-
164
- loop = asyncio.get_event_loop()
165
-
166
- answer = await asyncio.wait_for(
167
- # qa_chain.run(prompt, callbacks=[timeout_callback]),
168
- loop.run_in_executor(None, qa_chain.run, query.question),
169
- timeout=360
170
- )
171
-
172
- if not answer:
173
- raise ValueError("Empty answer returned from model")
174
-
175
- if 'Answer:' in answer:
176
- response_text = answer.split('Answer:')[-1].strip()
177
- elif 'الإجابة:' in answer:
178
- response_text = answer.split('الإجابة:')[-1].strip()
179
  else:
180
- response_text = answer.strip()
181
 
182
-
183
  return {
184
  "status": "success",
185
- "answer": answer,
186
  "response": response_text,
187
  "language": detect(query.question)
188
  }
189
 
190
- except TimeoutError as te:
191
- logger.error("Request timed out", exc_info=True)
192
  raise HTTPException(
193
  status_code=status.HTTP_504_GATEWAY_TIMEOUT,
194
- detail={"status": "error", "message": "Request timed out", "error": str(te)}
195
  )
196
 
197
  except Exception as e:
198
  logger.error(f"Unexpected error: {e}", exc_info=True)
199
  raise HTTPException(
200
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
201
- detail={"status": "error", "message": "Internal server error", "error": str(e)}
202
  )
203
 
204
  # === ENTRYPOINT === #
@@ -208,6 +359,6 @@ if __name__ == "__main__":
208
  exit(0)
209
 
210
  signal.signal(signal.SIGINT, handle_exit)
211
- import uvicorn
212
  uvicorn.run(app, host="0.0.0.0", port=8000)
213
 
 
 
1
+ # import torch
2
+ # import asyncio
3
+ # import logging
4
+ # import signal
5
+ # import uvicorn
6
+ # import os
7
+
8
+ # from fastapi import FastAPI, Request, HTTPException, status
9
+ # from pydantic import BaseModel, Field
10
+ # from langdetect import detect
11
+
12
+ # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, GenerationConfig
13
+ # from langchain.vectorstores import Qdrant
14
+ # from langchain.embeddings import HuggingFaceEmbeddings
15
+ # from langchain.chains import RetrievalQA
16
+ # from langchain.llms import HuggingFacePipeline
17
+ # from qdrant_client import QdrantClient
18
+ # from langchain.callbacks.base import BaseCallbackHandler
19
+ # from huggingface_hub import hf_hub_download
20
+ # from contextlib import asynccontextmanager
21
+
22
+ # # Get environment variables
23
+ # QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
24
+ # QDRANT_URL = os.getenv("QDRANT_URL")
25
+ # COLLECTION_NAME = "arabic_rag_collection"
26
+ # QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
27
+ # QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.Jb39rYQW2rSE9RdXrjdzKY6T1RF44XjdQzCvzFkjat4")
28
+
29
+ # # === LOGGING === #
30
+ # logging.basicConfig(level=logging.DEBUG)
31
+ # logger = logging.getLogger(__name__)
32
+
33
+ # # Load model and tokenizer
34
+ # model_name = "FreedomIntelligence/Apollo-7B"
35
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
36
+ # model = AutoModelForCausalLM.from_pretrained(model_name)
37
+ # tokenizer.pad_token = tokenizer.eos_token
38
+
39
+ # # Connect to Qdrant + embedding
40
+ # embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
41
+ # qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
42
+
43
+ # vector_store = Qdrant(
44
+ # client=qdrant_client,
45
+ # collection_name=COLLECTION_NAME,
46
+ # embeddings=embedding
47
+ # )
48
+
49
+ # # Generation settings
50
+ # generation_config = GenerationConfig(
51
+ # max_new_tokens=150,
52
+ # temperature=0.2,
53
+ # top_k=20,
54
+ # do_sample=True,
55
+ # top_p=0.7,
56
+ # repetition_penalty=1.3,
57
+ # )
58
+
59
+ # # Text generation pipeline
60
+ # llm_pipeline = pipeline(
61
+ # model=model,
62
+ # tokenizer=tokenizer,
63
+ # task="text-generation",
64
+ # generation_config=generation_config,
65
+ # device=model.device.index if model.device.type == "cuda" else -1
66
+ # )
67
+
68
+ # llm = HuggingFacePipeline(pipeline=llm_pipeline)
69
+
70
+ # retriever = vector_store.as_retriever(search_kwargs={"k": 3})
71
+
72
+ # # Set up RAG QA chain
73
+ # qa_chain = RetrievalQA.from_chain_type(
74
+ # llm=llm,
75
+ # retriever=retriever,
76
+ # chain_type="stuff"
77
+ # )
78
+
79
+ # # FastAPI setup
80
+ # app = FastAPI(title="Apollo RAG Medical Chatbot")
81
+
82
+ # class Query(BaseModel):
83
+ # question: str = Field(..., example="ما هي اسباب تساقط الشعر ؟", min_length=3)
84
+
85
+ # class TimeoutCallback(BaseCallbackHandler):
86
+ # def __init__(self, timeout_seconds: int = 60):
87
+ # self.timeout_seconds = timeout_seconds
88
+ # self.start_time = None
89
+
90
+ # async def on_llm_start(self, *args, **kwargs):
91
+ # self.start_time = asyncio.get_event_loop().time()
92
+
93
+ # async def on_llm_new_token(self, *args, **kwargs):
94
+ # if asyncio.get_event_loop().time() - self.start_time > self.timeout_seconds:
95
+ # raise TimeoutError("LLM processing timeout")
96
+
97
+ # # Prompt template
98
+ # # def generate_prompt(question: str) -> str:
99
+ # # lang = detect(question)
100
+ # # if lang == "ar":
101
+ # # return f"""أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة.
102
+ # # وتأكد من ان:
103
+ # # - عدم تكرار أي نقطة أو عبارة أو كلمة
104
+ # # - وضوح وسلاسة كل نقطة
105
+ # # - تجنب الحشو والعبارات الزائدة
106
+ # # السؤال: {question}
107
+ # # الإجابة:"""
108
+ # # else:
109
+ # # return f"""Answer the following medical question in clear English with a detailed, non-redundant response. Do not repeat ideas or restate the question. If the context lacks information, rely on prior medical knowledge.
110
+ # # Question: {question}
111
+ # # Answer:"""
112
+
113
+
114
+ # def generate_prompt(question: str) -> str:
115
+ # lang = detect(question)
116
+ # if lang == "ar":
117
+ # return (
118
+ # "أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة. \n"
119
+ # "- عدم تكرار أي نقطة أو عبارة أو كلمة\n"
120
+ # "- وضوح وسلاسة كل نقطة\n"
121
+ # "- تجنب الحشو والعبارات الزائدة\n"
122
+ # f"\nالسؤال: {question}\nالإجابة:"
123
+ # )
124
+ # else:
125
+ # return (
126
+ # "Answer the following medical question in clear English with a detailed, non-redundant response. "
127
+ # "Do not repeat ideas, phrases, or restate the question in the answer. If the context lacks relevant "
128
+ # "information, rely on your prior medical knowledge. If the answer involves multiple points, list them "
129
+ # "in concise and distinct bullet points:\n"
130
+ # f"Question: {question}\nAnswer:"
131
+ # )
132
+
133
+ # # Input schema
134
+ # # class ChatRequest(BaseModel):
135
+ # # message: str
136
+
137
+ # # # Output endpoint
138
+ # # @app.post("/chat")
139
+ # # def chat_rag(req: ChatRequest):
140
+ # # prompt = generate_prompt(req.message)
141
+ # # response = qa_chain.run(prompt)
142
+ # # return {"response": response}
143
+
144
+
145
+ # # === ROUTES === #
146
+ # @app.get("/")
147
+ # async def root():
148
+ # return {"message": "Medical QA API is running!"}
149
+
150
+ # @app.post("/ask")
151
+ # async def ask(query: Query):
152
+ # try:
153
+ # logger.debug(f"Received question: {query.question}")
154
+ # prompt = generate_prompt(query.question)
155
+ # timeout_callback = TimeoutCallback(timeout_seconds=60)
156
+
157
+
158
+ # # docs = retriever.get_relevant_documents(query.question)
159
+ # # if not docs:
160
+ # # logger.warning("No documents retrieved from Qdrant for the question.")
161
+ # # else:
162
+ # # logger.debug(f"Retrieved documents: {[doc.page_content for doc in docs[:1]]}")
163
+
164
+ # loop = asyncio.get_event_loop()
165
+
166
+ # answer = await asyncio.wait_for(
167
+ # # qa_chain.run(prompt, callbacks=[timeout_callback]),
168
+ # loop.run_in_executor(None, qa_chain.run, query.question),
169
+ # timeout=360
170
+ # )
171
+
172
+ # if not answer:
173
+ # raise ValueError("Empty answer returned from model")
174
+
175
+ # if 'Answer:' in answer:
176
+ # response_text = answer.split('Answer:')[-1].strip()
177
+ # elif 'الإجابة:' in answer:
178
+ # response_text = answer.split('الإجابة:')[-1].strip()
179
+ # else:
180
+ # response_text = answer.strip()
181
+
182
+
183
+ # return {
184
+ # "status": "success",
185
+ # "answer": answer,
186
+ # "response": response_text,
187
+ # "language": detect(query.question)
188
+ # }
189
+
190
+ # except TimeoutError as te:
191
+ # logger.error("Request timed out", exc_info=True)
192
+ # raise HTTPException(
193
+ # status_code=status.HTTP_504_GATEWAY_TIMEOUT,
194
+ # detail={"status": "error", "message": "Request timed out", "error": str(te)}
195
+ # )
196
+
197
+ # except Exception as e:
198
+ # logger.error(f"Unexpected error: {e}", exc_info=True)
199
+ # raise HTTPException(
200
+ # status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
201
+ # detail={"status": "error", "message": "Internal server error", "error": str(e)}
202
+ # )
203
+
204
+ # # === ENTRYPOINT === #
205
+ # if __name__ == "__main__":
206
+ # def handle_exit(signum, frame):
207
+ # print("Shutting down gracefully...")
208
+ # exit(0)
209
+
210
+ # signal.signal(signal.SIGINT, handle_exit)
211
+ # import uvicorn
212
+ # uvicorn.run(app, host="0.0.0.0", port=8000)
213
+
214
+
215
+
216
  import torch
217
  import asyncio
218
  import logging
219
  import signal
220
  import uvicorn
221
+ import os
222
 
223
  from fastapi import FastAPI, Request, HTTPException, status
224
  from pydantic import BaseModel, Field
225
  from langdetect import detect
226
 
227
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, GenerationConfig
228
+ from langchain_community.vectorstores import Qdrant
229
  from langchain.embeddings import HuggingFaceEmbeddings
230
+ from langchain_community.llms import HuggingFacePipeline
 
231
  from qdrant_client import QdrantClient
232
+ from langchain_core.runnables import RunnableMap
233
  from huggingface_hub import hf_hub_download
 
234
 
235
+ # === ENVIRONMENT SETUP === #
236
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "your_fallback_api_key")
237
+ QDRANT_URL = os.getenv("QDRANT_URL", "your_fallback_qdrant_url")
238
  COLLECTION_NAME = "arabic_rag_collection"
 
 
239
 
240
  # === LOGGING === #
241
+ logging.basicConfig(level=logging.INFO)
242
  logger = logging.getLogger(__name__)
243
 
244
+ # === MODEL SETUP === #
245
  model_name = "FreedomIntelligence/Apollo-7B"
246
  tokenizer = AutoTokenizer.from_pretrained(model_name)
247
  model = AutoModelForCausalLM.from_pretrained(model_name)
248
  tokenizer.pad_token = tokenizer.eos_token
249
 
250
+ # === GENERATION CONFIG === #
 
 
 
 
 
 
 
 
 
 
251
  generation_config = GenerationConfig(
252
  max_new_tokens=150,
253
  temperature=0.2,
 
257
  repetition_penalty=1.3,
258
  )
259
 
 
260
  llm_pipeline = pipeline(
261
  model=model,
262
  tokenizer=tokenizer,
 
264
  generation_config=generation_config,
265
  device=model.device.index if model.device.type == "cuda" else -1
266
  )
 
267
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
268
 
269
+ # === EMBEDDING + VECTOR STORE === #
270
+ embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
271
+ qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
272
 
273
+ vector_store = Qdrant(
274
+ client=qdrant_client,
275
+ collection_name=COLLECTION_NAME,
276
+ embeddings=embedding
 
277
  )
278
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
279
 
280
+ # === PROMPT FUNCTION === #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  def generate_prompt(question: str) -> str:
282
  lang = detect(question)
283
  if lang == "ar":
 
291
  else:
292
  return (
293
  "Answer the following medical question in clear English with a detailed, non-redundant response. "
294
+ "Do not repeat ideas, phrases, or restate the question. If the context lacks relevant "
295
+ "information, rely on prior medical knowledge.\n"
 
296
  f"Question: {question}\nAnswer:"
297
  )
 
 
 
 
298
 
299
+ # === FASTAPI SETUP === #
300
+ app = FastAPI(title="Apollo RAG Medical Chatbot")
 
 
 
 
301
 
302
+ class Query(BaseModel):
303
+ question: str = Field(..., example="ما هي اسباب تساقط الشعر ؟", min_length=3)
304
+
305
+ # === RAG PIPELINE === #
306
+ async def async_chain(question: str):
307
+ prompt = generate_prompt(question)
308
+ docs = await retriever.aget_relevant_documents(question)
309
+ if not docs:
310
+ logger.warning("No relevant documents found in Qdrant.")
311
+ context = "\n".join([doc.page_content for doc in docs])
312
+ full_prompt = f"{context}\n\n{prompt}"
313
+ logger.debug(f"Prompt: {full_prompt}")
314
+ response = llm.invoke(full_prompt)
315
+ return response
316
 
317
  # === ROUTES === #
318
  @app.get("/")
 
322
  @app.post("/ask")
323
  async def ask(query: Query):
324
  try:
325
+ response = await asyncio.wait_for(async_chain(query.question), timeout=60)
 
 
 
326
 
327
+ if 'Answer:' in response:
328
+ response_text = response.split('Answer:')[-1].strip()
329
+ elif 'الإجابة:' in response:
330
+ response_text = response.split('الإجابة:')[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  else:
332
+ response_text = response.strip()
333
 
 
334
  return {
335
  "status": "success",
336
+ "answer": response,
337
  "response": response_text,
338
  "language": detect(query.question)
339
  }
340
 
341
+ except asyncio.TimeoutError:
342
+ logger.error("Request timed out")
343
  raise HTTPException(
344
  status_code=status.HTTP_504_GATEWAY_TIMEOUT,
345
+ detail="Request timed out"
346
  )
347
 
348
  except Exception as e:
349
  logger.error(f"Unexpected error: {e}", exc_info=True)
350
  raise HTTPException(
351
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
352
+ detail=f"Internal server error: {e}"
353
  )
354
 
355
  # === ENTRYPOINT === #
 
359
  exit(0)
360
 
361
  signal.signal(signal.SIGINT, handle_exit)
 
362
  uvicorn.run(app, host="0.0.0.0", port=8000)
363
 
364
+