afouda commited on
Commit
ea1e6bd
·
verified ·
1 Parent(s): 9397025

Upload 7 files

Browse files
Files changed (7) hide show
  1. Old_Document.py +175 -0
  2. RAG.py +123 -0
  3. RAG_Domain_know_doc.py +165 -0
  4. User_Specific_Documents.py +167 -0
  5. prompt_template.py +519 -0
  6. query_utils.py +314 -0
  7. web_search.py +83 -0
Old_Document.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from dotenv import load_dotenv
4
+ import gradio as gr
5
+ from query_utils import process_query_for_rewrite, get_non_autism_response
6
+
7
+ # helper functions
8
+ GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
9
+
10
+ TAVILY_API_KEY="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"
11
+
12
+ OPENAI_API_KEY="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"
13
+
14
+ QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
15
+
16
+ QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
17
+
18
+ OPENAI_API_KEY="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"
19
+
20
+ WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
21
+
22
+ WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
23
+
24
+ DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
25
+
26
+ DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
27
+
28
+
29
+ # if not (DEEPINFRA_TOKEN and WEAVIATE_URL and WEAVIATE_API_KEY):
30
+ # raise ValueError("Please set all required keys in .env")
31
+
32
+ # DeepInfra client
33
+ from openai import OpenAI
34
+ openai = OpenAI(
35
+ api_key=DEEPINFRA_API_KEY,
36
+ base_url="https://api.deepinfra.com/v1/openai",
37
+ )
38
+
39
+ # Weaviate client
40
+ import weaviate
41
+ from weaviate.classes.init import Auth
42
+ from contextlib import contextmanager
43
+
44
+ @contextmanager
45
+ def weaviate_client():
46
+ client = weaviate.connect_to_weaviate_cloud(
47
+ cluster_url=WEAVIATE_URL,
48
+ auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
49
+ skip_init_checks=True, # <-- This disables gRPC check
50
+
51
+ )
52
+ try:
53
+ yield client
54
+ finally:
55
+ client.close()
56
+
57
+ # Global path tracker
58
+ last_uploaded_path = None
59
+
60
+ # Embed function
61
+ def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
62
+ all_embeddings = []
63
+ for i in range(0, len(texts), batch_size):
64
+ batch = texts[i : i + batch_size]
65
+ try:
66
+ resp = openai.embeddings.create(
67
+ model="Qwen/Qwen3-Embedding-8B",
68
+ input=batch,
69
+ encoding_format="float"
70
+ )
71
+ batch_embs = [item.embedding for item in resp.data]
72
+ all_embeddings.extend(batch_embs)
73
+ except Exception as e:
74
+ print(f"Embedding error: {e}")
75
+ all_embeddings.extend([[] for _ in batch])
76
+ return all_embeddings
77
+
78
+ def encode_query(query: str) -> list[float] | None:
79
+ embs = embed_texts([query], batch_size=1)
80
+ if embs and embs[0]:
81
+ return embs[0]
82
+ return None
83
+
84
+ async def old_Document(query: str, top_k: int = 1) -> dict:
85
+ qe = encode_query(query)
86
+ if not qe:
87
+ return {"answer": []}
88
+
89
+ try:
90
+ with weaviate_client() as client:
91
+ coll = client.collections.get("user")
92
+ res = coll.query.near_vector(
93
+ near_vector=qe,
94
+ limit=top_k,
95
+ return_properties=["text"]
96
+ )
97
+ if not getattr(res, "objects", None):
98
+ return {"answer": []}
99
+ return {
100
+ "answer": [obj.properties.get("text", "[No Text]") for obj in res.objects]
101
+ }
102
+ except Exception as e:
103
+ print("RAG Error:", e)
104
+ return {"answer": []}
105
+
106
+ # New functions to support Gradio app
107
+ def ingest_file(path: str) -> str:
108
+ global last_uploaded_path
109
+ last_uploaded_path = path
110
+ return f"Old document ingested: {os.path.basename(path)}"
111
+
112
+ def answer_question(query: str) -> str:
113
+ try:
114
+ # Process query for rewriting and relevance checking
115
+ corrected_query, is_autism_related, rewritten_query = process_query_for_rewrite(query)
116
+
117
+ # If not autism-related, show direct rejection message
118
+ if not is_autism_related:
119
+ return get_non_autism_response()
120
+
121
+ # Use the corrected query for retrieval
122
+ rag_resp = asyncio.run(old_Document(corrected_query))
123
+ chunks = rag_resp.get("answer", [])
124
+ if not chunks:
125
+ return "Sorry, I couldn't find relevant content in the old document."
126
+
127
+ # Combine chunks into a single answer for relevance checking
128
+ combined_answer = "\n".join(f"- {c}" for c in chunks)
129
+
130
+ # NEW: Check if the retrieved content is sufficiently related to autism
131
+ from query_utils import check_answer_autism_relevance, get_non_autism_answer_response
132
+
133
+ answer_relevance_score = check_answer_autism_relevance(combined_answer)
134
+
135
+ # If answer relevance is below 50%, refuse the answer (updated threshold for enhanced scoring)
136
+ if answer_relevance_score < 50:
137
+ return get_non_autism_answer_response()
138
+
139
+ # If sufficiently autism-related, return the answer
140
+ return combined_answer
141
+ except Exception as e:
142
+ return f"Error processing your request: {e}"
143
+
144
+ # Gradio interface for Old Documents
145
+ with gr.Blocks(title="Old Documents RAG") as demo:
146
+ gr.Markdown("## Old Documents RAG")
147
+ query = gr.Textbox(placeholder="Your question...", lines=2, label="Ask about Old Documents")
148
+ doc_file = gr.File(label="Upload Old Document (PDF, DOCX, TXT)")
149
+ btn = gr.Button("Submit")
150
+ out = gr.Textbox(label="Answer from Old Documents", lines=8, interactive=False)
151
+
152
+ def process_old_doc(query, doc_file):
153
+ if doc_file:
154
+ # Save and ingest the uploaded file
155
+ upload_dir = os.path.join(os.path.dirname(__file__), "uploaded_docs")
156
+ os.makedirs(upload_dir, exist_ok=True)
157
+ safe_filename = os.path.basename(doc_file.name)
158
+ save_path = os.path.join(upload_dir, safe_filename)
159
+ with open(save_path, "wb") as f:
160
+ f.write(doc_file.read())
161
+ status = ingest_file(save_path)
162
+ answer = answer_question(query)
163
+ return f"{status}\n\n{answer}"
164
+ else:
165
+ # Use last uploaded file or return error if none exists
166
+ if last_uploaded_path:
167
+ answer = answer_question(query)
168
+ return f"[Using previously uploaded document: {os.path.basename(last_uploaded_path)}]\n\n{answer}"
169
+ else:
170
+ return "No document uploaded. Please upload an old document to proceed."
171
+
172
+ btn.click(fn=process_old_doc, inputs=[query, doc_file], outputs=out)
173
+
174
+ if __name__ == "__main__":
175
+ demo.launch(debug=True)
RAG.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from dotenv import load_dotenv
4
+
5
+ # helper functions
6
+ GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
7
+
8
+ TAVILY_API_KEY="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"
9
+
10
+ OPENAI_API_KEY="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"
11
+
12
+ QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
13
+
14
+ QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
15
+
16
+ OPENAI_API_KEY="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"
17
+
18
+ WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
19
+
20
+ WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
21
+
22
+ DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
23
+
24
+ DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
25
+
26
+ # Initialize DeepInfra-compatible OpenAI client
27
+ from openai import OpenAI
28
+ openai = OpenAI(
29
+ api_key=DEEPINFRA_API_KEY,
30
+ base_url="https://api.deepinfra.com/v1/openai",
31
+ )
32
+
33
+ # Weaviate imports
34
+ import weaviate
35
+ from weaviate.classes.init import Auth
36
+ from contextlib import contextmanager
37
+
38
+ @contextmanager
39
+ def weaviate_client():
40
+ """
41
+ Context manager that yields a Weaviate client and
42
+ guarantees client.close() on exit.
43
+ """
44
+ client = weaviate.connect_to_weaviate_cloud(
45
+ cluster_url=WEAVIATE_URL,
46
+ auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
47
+ skip_init_checks=True, # <-- This disables gRPC check
48
+
49
+ )
50
+ try:
51
+ yield client
52
+ finally:
53
+ client.close()
54
+
55
+ def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
56
+ """Embed texts in batches to avoid API limits."""
57
+ all_embeddings: list[list[float]] = []
58
+ for i in range(0, len(texts), batch_size):
59
+ batch = texts[i : i + batch_size]
60
+ try:
61
+ resp = openai.embeddings.create(
62
+ model="Qwen/Qwen3-Embedding-8B",
63
+ input=batch,
64
+ encoding_format="float"
65
+ )
66
+ batch_embs = [item.embedding for item in resp.data]
67
+ all_embeddings.extend(batch_embs)
68
+ except Exception as e:
69
+ print(f"Embedding batch error (items {i}–{i+len(batch)-1}): {e}")
70
+ all_embeddings.extend([[] for _ in batch])
71
+ return all_embeddings
72
+
73
+ def encode_query(query: str) -> list[float] | None:
74
+ """Generate a single embedding vector for a query string."""
75
+ embs = embed_texts([query], batch_size=1)
76
+ if embs and embs[0]:
77
+ print("Query embedding (first 5 dims):", embs[0][:5])
78
+ return embs[0]
79
+ print("Failed to generate query embedding.")
80
+ return None
81
+
82
+ async def rag_autism(query: str, top_k: int = 3) -> dict:
83
+ """
84
+ Run a RAG retrieval on the 'Books' collection in Weaviate.
85
+ Returns up to `top_k` matching text chunks.
86
+ """
87
+ qe = encode_query(query)
88
+ if not qe:
89
+ return {"answer": []}
90
+
91
+ try:
92
+ with weaviate_client() as client:
93
+ coll = client.collections.get("Books")
94
+ res = coll.query.near_vector(
95
+ near_vector=qe,
96
+ limit=top_k,
97
+ return_properties=["text"]
98
+ )
99
+ if not getattr(res, "objects", None):
100
+ return {"answer": []}
101
+ return {
102
+ "answer": [
103
+ obj.properties.get("text", "[No Text]")
104
+ for obj in res.objects
105
+ ]
106
+ }
107
+ except Exception as e:
108
+ print("RAG Error:", e)
109
+ return {"answer": []}
110
+
111
+ # Example test harness
112
+ # if __name__ == "__main__":
113
+ # test_queries = [
114
+ # "What are the common early signs of autism in young children?",
115
+ # "What diagnostic criteria are used for autism spectrum disorder?",
116
+ # "What support strategies help improve communication skills in autistic individuals?"
117
+ # ]
118
+ # for q in test_queries:
119
+ # print(f"\nQuery: {q}")
120
+ # out = asyncio.run(rag_autism(q, top_k=3))
121
+ # print("Retrieved contexts:")
122
+ # for idx, ctx in enumerate(out["answer"], 1):
123
+ # print(f"{idx}. {ctx}")
RAG_Domain_know_doc.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from openai import OpenAI
4
+ import weaviate
5
+ from weaviate.classes.init import Auth
6
+ import pypdf # Replaced PyPDF2
7
+ import docx
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from dotenv import load_dotenv
10
+ from prompt_template import (
11
+ Prompt_template_translation,
12
+ Prompt_template_LLM_Generation,
13
+ Prompt_template_Reranker,
14
+ Prompt_template_Wisal,
15
+ Prompt_template_Halluciations,
16
+ Prompt_template_paraphrasing,
17
+ Prompt_template_Translate_to_original,
18
+ Prompt_template_relevance
19
+ )
20
+ from query_utils import process_query_for_rewrite, get_non_autism_response
21
+ # ─── Configuration ─────────────────────────────────────────────────────────────
22
+ # helper functions
23
+ GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
24
+
25
+ TAVILY_API_KEY="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"
26
+
27
+ OPENAI_API_KEY="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"
28
+
29
+ QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
30
+
31
+ QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
32
+
33
+ OPENAI_API_KEY="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"
34
+
35
+ WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
36
+
37
+ WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
38
+
39
+ DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
40
+
41
+ DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
42
+
43
+ openai = OpenAI(
44
+ api_key=DEEPINFRA_API_KEY,
45
+ base_url="https://api.deepinfra.com/v1/openai",
46
+ )
47
+ # Initialize Weaviate client
48
+ client = weaviate.connect_to_weaviate_cloud(
49
+ cluster_url=WEAVIATE_URL,
50
+ auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
51
+ skip_init_checks=True, # <-- This disables gRPC check
52
+
53
+ )
54
+ # ─── Utility: Extract raw text ──────────────────────────────────────────────────
55
+ def extract_text(file_path: str) -> str:
56
+ ext = os.path.splitext(file_path)[1].lower()
57
+ if ext == ".pdf":
58
+ text = ""
59
+ with open(file_path, "rb") as f:
60
+ reader = pypdf.PdfReader(f)
61
+ for page in reader.pages:
62
+ page_text = page.extract_text() or ""
63
+ text += page_text + "\n"
64
+ elif ext == ".docx":
65
+ doc = docx.Document(file_path)
66
+ text = "\n".join(p.text for p in doc.paragraphs)
67
+ elif ext == ".txt":
68
+ with open(file_path, "r", encoding="utf-8") as f:
69
+ text = f.read()
70
+ else:
71
+ raise ValueError("Unsupported file format. Use PDF, DOCX, or TXT.")
72
+ return text
73
+ # ─── Chunker & Embed ──────────────────────────────────────────────────────────
74
+ splitter = RecursiveCharacterTextSplitter(
75
+ chunk_size=1000,
76
+ chunk_overlap=200,
77
+ separators=["\n\n", "\n", " "],
78
+ )
79
+ def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
80
+ """Embed texts in batches to avoid API limits."""
81
+ all_embeddings = []
82
+ for i in range(0, len(texts), batch_size):
83
+ batch = texts[i:i + batch_size]
84
+ resp = openai.embeddings.create(
85
+ model="Qwen/Qwen3-Embedding-8B",
86
+ input=batch,
87
+ encoding_format="float"
88
+ )
89
+ all_embeddings.extend([item.embedding for item in resp.data])
90
+ return all_embeddings
91
+ # ─── Ingest & Index ───────────────────────────────────────────────────────────
92
+ def ingest_file(file_path: str) -> str:
93
+ raw = extract_text(file_path)
94
+ docs = splitter.split_text(raw)
95
+ texts = [chunk for chunk in docs]
96
+ vectors = embed_texts(texts)
97
+ # Get the collection
98
+ documents = client.collections.get("Books")
99
+ # Batch insert with new API
100
+ with client.batch.dynamic() as batch:
101
+ for txt, vec in zip(texts, vectors):
102
+ batch.add_object(
103
+ collection="Books",
104
+ properties={"text": txt},
105
+ vector=vec
106
+ )
107
+ return f"Ingested {len(texts)} chunks from {os.path.basename(file_path)}"
108
+ # ─── Query & Answer ───────────────────────────────────────────────────────────
109
+ def answer_question(question: str) -> str:
110
+ # Process query for rewriting and relevance checking
111
+ corrected_query, is_autism_related, rewritten_query = process_query_for_rewrite(question)
112
+
113
+ # If not autism-related, show direct rejection message
114
+ if not is_autism_related:
115
+ return get_non_autism_response()
116
+
117
+ # Use the corrected query for retrieval
118
+ q_vec = embed_texts([corrected_query])[0]
119
+ documents = client.collections.get("Books")
120
+ response = documents.query.near_vector(
121
+ near_vector=q_vec,
122
+ limit=5,
123
+ return_metadata=["distance"]
124
+ )
125
+ hits = response.objects
126
+ context = "\n\n".join(hit.properties["text"] for hit in hits)
127
+ print(context)
128
+ wisal_prompt = Prompt_template_Wisal.format(new_query=corrected_query, document=context)
129
+ chat = openai.chat.completions.create(
130
+ model="Qwen/Qwen3-32B",
131
+ messages=[
132
+ {"role": "user", "content": wisal_prompt
133
+ }
134
+ ],
135
+ temperature=0,
136
+ reasoning_effort="none"
137
+ )
138
+ initial_answer = chat.choices[0].message.content
139
+
140
+ # NEW: Check if the generated answer is sufficiently related to autism
141
+ from query_utils import check_answer_autism_relevance, get_non_autism_answer_response
142
+
143
+ answer_relevance_score = check_answer_autism_relevance(initial_answer)
144
+
145
+ # If answer relevance is below 50%, refuse the answer (updated threshold for enhanced scoring)
146
+ if answer_relevance_score < 50:
147
+ return get_non_autism_answer_response()
148
+
149
+ # If sufficiently autism-related, return the answer
150
+ return initial_answer
151
+ # ─── Gradio Interface ─────────────────────────────────────────────────────────
152
+ with gr.Blocks(title="Document Q&A with Qwen & Weaviate") as demo:
153
+ gr.Markdown("## Upload a PDF, DOCX, or TXT and then ask away!")
154
+ with gr.Row():
155
+ up = gr.File(label="Select document")
156
+ btn = gr.Button("Ingest")
157
+ out = gr.Textbox(label="Status", interactive=False)
158
+ btn.click(fn=lambda f: ingest_file(f.name), inputs=up, outputs=out)
159
+ with gr.Row():
160
+ q = gr.Textbox(placeholder="Your question...", lines=2)
161
+ ask = gr.Button("Ask")
162
+ ans = gr.Textbox(label="Answer", lines=6, interactive=False)
163
+ ask.click(fn=answer_question, inputs=q, outputs=ans)
164
+ if __name__ == "__main__":
165
+ demo.launch(debug=True)
User_Specific_Documents.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from openai import OpenAI
4
+ import weaviate
5
+ from weaviate.classes.init import Auth
6
+ import pypdf # Replaced PyPDF2
7
+ import docx
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from dotenv import load_dotenv
10
+ from prompt_template import (
11
+ Prompt_template_translation,
12
+ Prompt_template_LLM_Generation,
13
+ Prompt_template_Reranker,
14
+ Prompt_template_Wisal,
15
+ Prompt_template_Halluciations,
16
+ Prompt_template_paraphrasing,
17
+ Prompt_template_Translate_to_original,
18
+ Prompt_template_relevance,
19
+ Prompt_template_User_document_prompt
20
+ )
21
+ from query_utils import process_query_for_rewrite, get_non_autism_response
22
+ # ─── Configuration ─────────────────────────────────────────────────────────────
23
+ # helper functions
24
+ GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
25
+
26
+ TAVILY_API_KEY="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"
27
+
28
+ OPENAI_API_KEY="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"
29
+
30
+ QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
31
+
32
+ QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
33
+
34
+ OPENAI_API_KEY="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"
35
+
36
+ WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
37
+
38
+ WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
39
+
40
+ DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
41
+
42
+ DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
43
+
44
+ openai = OpenAI(
45
+ api_key=DEEPINFRA_API_KEY,
46
+ base_url="https://api.deepinfra.com/v1/openai",
47
+ )
48
+ # Initialize Weaviate client
49
+ client = weaviate.connect_to_weaviate_cloud(
50
+ cluster_url=WEAVIATE_URL,
51
+ auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
52
+ skip_init_checks=True, # <-- This disables gRPC check
53
+
54
+ )
55
+ # ─── Utility: Extract raw text ──────────────────────────────────────────────────
56
+ def extract_text(file_path: str) -> str:
57
+ ext = os.path.splitext(file_path)[1].lower()
58
+ if ext == ".pdf":
59
+ text = ""
60
+ with open(file_path, "rb") as f:
61
+ reader = pypdf.PdfReader(f)
62
+ for page in reader.pages:
63
+ page_text = page.extract_text() or ""
64
+ text += page_text + "\n"
65
+ elif ext == ".docx":
66
+ doc = docx.Document(file_path)
67
+ text = "\n".join(p.text for p in doc.paragraphs)
68
+ elif ext == ".txt":
69
+ with open(file_path, "r", encoding="utf-8") as f:
70
+ text = f.read()
71
+ else:
72
+ raise ValueError("Unsupported file format. Use PDF, DOCX, or TXT.")
73
+ return text
74
+ # ─── Chunker & Embed ──────────────────────────────────────────────────────────
75
+ splitter = RecursiveCharacterTextSplitter(
76
+ chunk_size=1000,
77
+ chunk_overlap=200,
78
+ separators=["\n\n", "\n", " "],
79
+ )
80
+ def embed_texts(texts: list[str], batch_size: int = 70) -> list[list[float]]:
81
+ """Embed texts in batches to avoid API limits."""
82
+ all_embeddings = []
83
+ for i in range(0, len(texts), batch_size):
84
+ batch = texts[i:i + batch_size]
85
+ resp = openai.embeddings.create(
86
+ model="Qwen/Qwen3-Embedding-8B",
87
+ input=batch,
88
+ encoding_format="float"
89
+ )
90
+ all_embeddings.extend([item.embedding for item in resp.data])
91
+ return all_embeddings
92
+ # ─── Ingest & Index ───────────────────────────────────────────────────────────
93
+ def ingest_file(file_path: str) -> str:
94
+ raw = extract_text(file_path)
95
+ docs = splitter.split_text(raw)
96
+ texts = [chunk for chunk in docs]
97
+ vectors = embed_texts(texts)
98
+ # Get the collection
99
+ documents = client.collections.get("user")
100
+ # Batch insert with new API
101
+ with client.batch.dynamic() as batch:
102
+ for txt, vec in zip(texts, vectors):
103
+ batch.add_object(
104
+ collection="user",
105
+ properties={"text": txt},
106
+ vector=vec
107
+ )
108
+ return f"Ingested {len(texts)} chunks from {os.path.basename(file_path)}"
109
+ # ───────────────────────────────────────────── Query & Answer ───────────────────────────────────────────────────────────
110
+ def answer_question(question: str) -> str:
111
+ # Process query for rewriting and relevance checking
112
+ corrected_query, is_autism_related, rewritten_query = process_query_for_rewrite(question)
113
+
114
+ # If not autism-related, show direct rejection message
115
+ if not is_autism_related:
116
+ return get_non_autism_response()
117
+
118
+ # Use the corrected query for retrieval
119
+ q_vec = embed_texts([corrected_query])[0]
120
+ documents = client.collections.get("user")
121
+ response = documents.query.near_vector(
122
+ near_vector=q_vec,
123
+ limit=5,
124
+ return_metadata=["distance"]
125
+ )
126
+ hits = response.objects
127
+ context = "\n\n".join(hit.properties["text"] for hit in hits)
128
+ print(context)
129
+
130
+ UserSpecificDocument_prompt = Prompt_template_User_document_prompt.format(new_query=corrected_query, document=context)
131
+ chat = openai.chat.completions.create(
132
+ model="Qwen/Qwen3-32B",
133
+ messages=[
134
+ {"role": "user", "content": UserSpecificDocument_prompt
135
+ }
136
+ ],
137
+ temperature=0,
138
+ reasoning_effort="none"
139
+ )
140
+ initial_answer = chat.choices[0].message.content
141
+
142
+ # NEW: Check if the generated answer is sufficiently related to autism
143
+ from query_utils import check_answer_autism_relevance, get_non_autism_answer_response
144
+
145
+ answer_relevance_score = check_answer_autism_relevance(initial_answer)
146
+
147
+ # If answer relevance is below 50%, refuse the answer (updated threshold for enhanced scoring)
148
+ if answer_relevance_score < 50:
149
+ return get_non_autism_answer_response()
150
+
151
+ # If sufficiently autism-related, return the answer
152
+ return initial_answer
153
+ # ─── Gradio Interface ─────────────────────────────────────────────────────────
154
+ with gr.Blocks(title="Document Q&A with Qwen & Weaviate") as demo:
155
+ gr.Markdown("## Upload a PDF, DOCX, or TXT and then ask away!")
156
+ with gr.Row():
157
+ up = gr.File(label="Select document")
158
+ btn = gr.Button("Ingest")
159
+ out = gr.Textbox(label="Status", interactive=False)
160
+ btn.click(fn=lambda f: ingest_file(f.name), inputs=up, outputs=out)
161
+ with gr.Row():
162
+ q = gr.Textbox(placeholder="Your question...", lines=2)
163
+ ask = gr.Button("Ask")
164
+ ans = gr.Textbox(label="Answer", lines=6, interactive=False)
165
+ ask.click(fn=answer_question, inputs=q, outputs=ans)
166
+ if __name__ == "__main__":
167
+ demo.launch(debug=True)
prompt_template.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import nest_asyncio
4
+ from dotenv import load_dotenv
5
+ # A prompt used to divide the part of the specified user experience document.
6
+ Prompt_template_Chunking = """
7
+ You are a specialized document processing agent tasked with meticulously cleaning, structuring, and chunking raw text content originating from structured book sources (e.g., medical or diagnostic manuals). Adhere to the following strict guidelines to prepare this content for downstream applications such as training data, search indexing, or diagnostic referencing, ensuring absolute preservation of original semantic meaning and formatting.
8
+
9
+ INSTRUCTIONS:
10
+ 1. CONTENT CLEANING:
11
+ * REMOVE: All headers, footers, and page numbers, code like F84.0 and References.
12
+ * PRESERVE: All original content, including all section titles, sub-titles, bullet points, numbered lists, and tables. Do not omit or alter any part of the original text.
13
+ * DO NOT: Summarize, rephrase, paraphrase, or alter any part of the content. Maintain the exact original wording.
14
+
15
+ 2. CONTENT STRUCTURING:
16
+ * IDENTIFY HEADERS: Recognize and utilize natural section headers (e.g., "Diagnostic Criteria", "Level 1", "Level 2", "Symptoms", "Treatment", "Prognosis", "Introduction", "Summary", "Methodology") as primary paragraph separators or markers for new logical blocks.
17
+ * LOGICAL BREAKS: If explicit headers are not present, use logical breaks between distinct topics or complete ideas to segment the content.
18
+
19
+ 3. CONTENT CHUNKING:
20
+ * PARAGRAPH LENGTH: Divide the cleaned and structured content into paragraphs, aiming for each paragraph to be approximately 300 to 500 words.
21
+ * SENTENCE INTEGRITY: Absolutely do not split sentences or separate parts of the same complete idea across different paragraphs. A paragraph must contain whole, coherent ideas.
22
+ * SHORTER SECTIONS: If a logical section (identified by a header or a complete idea) is naturally shorter than 300 words but represents a complete and standalone piece of information, retain it as-is without trying to pad it or merge it with unrelated content.
23
+
24
+ 4. TABLE FORMATTING:
25
+ * PRESERVE EXACTLY: All tables must be preserved in their entirety, including all rows and columns.
26
+ * MARKDOWN SYNTAX: Format all tables using standard Markdown table syntax.
27
+ Example:
28
+ | Column Header A | Column Header B |
29
+ |-----------------|-----------------|
30
+ | Row 1 Value A | Row 1 Value B |
31
+ | Row 2 Value A | Row 2 Value B |
32
+
33
+ 5. NO INTERPRETATION OR EXTERNAL INFORMATION:
34
+ * STRICTLY CONTENT-BASED: Do not interpret, rephrase, summarize, infer, rewrite, or add any external information, comments, or your own insights.
35
+ * OBJECTIVE PROCESSING: Base all decisions and transformations purely on the content provided to you.
36
+
37
+ Your response should be the cleaned, structured, and chunked content. Do not include any conversational filler, introductions, or conclusions; just the processed text.
38
+
39
+ {pdf_chunk_text}
40
+ """
41
+ ######################################################################################################
42
+
43
+ Prompt_template_translation = """
44
+
45
+ You are a friendly AI assistant. For each incoming user query, do **only** this:
46
+
47
+
48
+
49
+ 1. Detect the query’s language.
50
+
51
+ 2. If it isn’t English, translate it into English.
52
+
53
+ 3. If it *is* English (or once translated), check for clarity & grammar. If the phrasing is unclear or ungrammatical, rephrase it into a precise, professional English sentence that preserves the original meaning.
54
+
55
+
56
+
57
+ **Output**: the final, corrected English query—nothing else.
58
+
59
+ Query: {query}
60
+
61
+ """
62
+
63
+ #############################################################################################
64
+
65
+ Prompt_template_relevance = """
66
+
67
+ You are Wisal, an AI assistant specialized in Autism Spectrum Disorders (ASD).
68
+
69
+ Given the **corrected English query** from step 1, decide if it’s about ASD (e.g. symptoms, diagnosis, therapy, behavior in ASD).
70
+
71
+
72
+
73
+ - If **yes**, respond with: `RELATED`
74
+
75
+ - If **no**, respond with exactly:
76
+
77
+ “Hello I’m Wisal, an AI assistant developed by Compumacy AI, and a knowledgeable Autism specialist.
78
+
79
+ If you have any question related to autism please submit a question specifically about autism.”
80
+
81
+
82
+ **Do not** include any other text.
83
+
84
+ Query: {corrected_query}
85
+
86
+ """
87
+
88
+ #############################################################################################
89
+ # Prompt_template_relevance = """
90
+ # You are Wisal, an AI assistant specialized in Autism Spectrum Disorders (ASD).
91
+
92
+ # Given a **corrected English query**, your task is to determine if it is specifically related to ASD — such as symptoms, diagnosis, therapies, behaviors, or other autism-related topics.
93
+
94
+ # Follow these steps:
95
+
96
+ # 1. If the query is clearly about Autism, respond with: `RELATED`
97
+
98
+ # 2. If the query is general or unclear, try to rephrase it to be Autism-specific.
99
+ # Example:
100
+ # - Original: “What are some ways that parents can reduce their stress?”
101
+ # - Rephrased: “What are some ways that parents of children with Autism can reduce their stress?”
102
+
103
+ # 3. If the query cannot be meaningfully rephrased in the context of Autism, return the polite redirection:
104
+ # **“Hello I’m Wisal, an AI assistant developed by Compumacy AI, and a knowledgeable Autism specialist.
105
+ # If you have any question related to autism please submit a question specifically about autism.”**
106
+
107
+ # **Do not add or include any other text.**
108
+
109
+ # Query: {corrected_query}
110
+ # """
111
+
112
+ #############################################################################################
113
+ # LLM Generation
114
+ Prompt_template_LLM_Generation = """
115
+ You are Wisal, an AI assistant developed by Compumacy AI , and a knowledgeable Autism .And Question-Answering assistant specializing in Autism.When I ask a question related to Autism, respond with a clear, concise, and accurate answer.
116
+ Question:{new_query}
117
+ your Answer here
118
+ """
119
+ ######################################################################################################
120
+
121
+ Prompt_template_Reranker= """
122
+ You are an impartial evaluator tasked with sorting and outputting text passages based on their semantic relevance to a given query. Your goal is to determine which passages most directly address the core meaning of the query.
123
+
124
+ Instructions:
125
+ You will be given a query and a list of 5 passages, each with a number identifier.
126
+ Sort and output the passages from most relevant [1] to least relevant [5].
127
+ Only provide the sorted output using the number identifiers and corresponding passage text.
128
+ Do not include explanations, rewritten content, or extra commentary.
129
+ Focus solely on semantic relevance — how directly the passage answers or relates to the query.
130
+
131
+ Input Format:
132
+ Query: {new_query}
133
+ Passages:
134
+ {answers_list}
135
+
136
+ Output Format:
137
+ [1] <passage number> <passage text>
138
+ [2] <passage number> <passage text>
139
+ [3] <passage number> <passage text>
140
+ [4] <passage number> <passage text>
141
+ [5] <passage number> <passage text>
142
+ """
143
+
144
+ #####################################################################################################
145
+
146
+ Prompt_template_Wisal= """
147
+ You are Wisal, an AI assistant developed by Compumacy AI , and a knowledgeable Autism .
148
+ Your sole purpose is to provide helpful, respectful, and easy-to-understand answers about Autism Spectrum Disorder (ASD).
149
+ Always be clear, non-judgmental, and supportive.
150
+ Question: {new_query}
151
+ Answer the question based only on the provided context:
152
+ {document}
153
+
154
+ """
155
+ ######################################################################################################################
156
+ Prompt_template_paraphrasing= """
157
+ Rephrase the following passage using different words but keep the original meaning. Focus on directness and vary the phrasing for the cause.
158
+ Only give one single rephrased version — no explanations, no options.
159
+ Text : {document}
160
+
161
+ """
162
+
163
+ #########################################################################################################
164
+ Prompt_template_Halluciations= """
165
+ Evaluate how confident you are that the given Answer is a good and accurate response to the Question.
166
+ Please assign a Score using the following 5-point scale:
167
+ 1: You are not confident that the Answer addresses the Question at all, the Answer may be entirely off-topic or irrelevant to the Question.
168
+ 2: You have low confidence that the Answer addresses the Question, there are doubts and uncertainties about the accuracy of the Answer.
169
+ 3: You have moderate confidence that the Answer addresses the Question, the Answer seems reasonably accurate and on-topic, but with room for improvement.
170
+ 4: You have high confidence that the Answer addresses the Question, the Answer provides accurate information that addresses most of the Question.
171
+ 5: You are extremely confident that the Answer addresses the Question, the Answer is highly accurate, relevant, and effectively addresses the Question in its entirety.
172
+ The output should strictly use the following template: Explanation: [provide a brief reasoning you used to derive the rating Score] and then write 'Score: <rating>' on the last line.
173
+ Question: {new_query}
174
+ Context:{document}
175
+ Answer: {answer}
176
+ """
177
+ ############################################################################################################
178
+
179
+ Prompt_template_Translate_to_original= """
180
+ You are a translation assistant. Whenever you receive a user Question, determine its language. Then take your Answer (which is currently in English or any other language) and:
181
+ If the Question is in Arabic, translate the Answer into Arabic.
182
+ Otherwise, translate the Answer into the same language as the Question.
183
+ Requirements:
184
+ Preserve the original tone and style exactly.
185
+ Don’t add, remove, or change any content beyond translating.
186
+ Do not include any extra commentary or explanations—output only the translated text.
187
+ Question: {query}
188
+ Answer : {document}
189
+ """
190
+
191
+ ############################################################################################################
192
+ Prompt_template_User_document_prompt = """
193
+
194
+ You are Wisal, an AI assistant developed by Compumacy AI, specialized in autism. When a user asks a question, you must respond only by quoting verbatim from the provided document(s). Do not add any of your own words, summaries, explanations, or interpretations. If the answer cannot be found in the documents, reply with exactly:
195
+ “Answer not found in the document.”
196
+ Question: {new_query}
197
+ Answer the question based only on the provided context:
198
+ {document}
199
+
200
+
201
+ """
202
+ # Prompt_template_Reranker= """
203
+ # You are an expert evaluator tasked with rating how well a given document matches a user query. Assess the document across three specific dimensions and provide a total relevance score out of 10.
204
+
205
+ # Please consider the following criteria:
206
+
207
+ # 1. Direct Answer Relevance (0–5 points):
208
+ # - Does the document directly address the core of the query?
209
+ # - Higher scores reflect more focused and pertinent content.
210
+ # - A score of 5 means the answer is highly aligned with the query.
211
+
212
+ # 2. Information Completeness (0–3 points):
213
+ # - Does the document provide sufficient detail or context to fully answer the question?
214
+ # - Is the response thorough and informative, rather than partial or vague?
215
+
216
+ # 3. Factual Accuracy (0–2 points):
217
+ # - Are the statements in the document factually correct and reliable?
218
+ # - Deduct points if any part of the document contains inaccuracies, outdated info, or misleading claims.
219
+ # Query:{query}
220
+
221
+ # Document:{document}
222
+
223
+ # """
224
+
225
+ # Prompt_template_relevant= """
226
+ # You are a grader assessing relevance of a retrieved document to a user question.
227
+ # Here is the retrieved document: {document}
228
+ # Here is the user question: {new_query}
229
+ # If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
230
+ # Give a binary score 'yes' or 'no' to indicate whether the document is relevant to the question.
231
+ # """
232
+
233
+ # Prompt_template_Reranker_relevant = """
234
+ # You are given a user question and two responses from two AI assistants. Your task is to act as an impartial judge
235
+ # and evaluate which response better follows the user's instructions and provides a higher-quality answer.
236
+ # First, provide your reasoning within <think> and </think> tags. This should include your evaluation criteria for
237
+ # a high-quality response, a detailed comparison of the two responses, and when helpful, a reference answer as
238
+ # part of your evaluation. Be explicit in your thought process, referencing your criteria and explaining how each
239
+ # response aligns with or deviates from them.
240
+ # Avoid any position biases and ensure that the order in which the responses were presented does not influence your
241
+ # decision. Do not allow the length of the responses to influence your evaluation. Do not favor certain names of
242
+ # the assistants. Be as objective as possible.
243
+ # Finally, assign the assistant's response a score from 0 to 10, using either an integer or a decimal with up
244
+ # to 0.1 precision, with a higher score indicating a higher-quality response that better satisfies the criteria.
245
+ # Enclose the scores within the tags <score_A> </score_A>, and <score_B> </score_B>.
246
+ # Format your output like this:
247
+ # <think> your_thinking_process </think>
248
+ # <score_A> your_score_a </score_A> <score_B> your_score_b </score_B>
249
+ # Below are the user's question and the two responses:
250
+ # [User Question]
251
+ # {instruction}
252
+ # {new_query}
253
+ # [The Start of Assistant A's Answer]
254
+ # {web_answer}
255
+ # [The End of Assistant A's Answer]
256
+ # [The Start of Assistant B's Answer]
257
+ # {generated_answer}
258
+ # [The End of Assistant B's Answer]
259
+ # """
260
+
261
+
262
+
263
+ # Prompt_template_Evaluation= """
264
+ # SYSTEM: You are a mental health concept knowledge evaluator. Your task is to assess how accurately, completely, and clearly the candidate's response defines the concept provided in the "Answer" field, taking into account the clinical context in the "History."
265
+ # USER:
266
+ # INSTRUCTIONS:
267
+
268
+ # 1. Read the "Answer" — this is the clinical concept or term to define (e.g., "Loss of interest or pleasure in activities…").
269
+ # 2. Read the "Candidate Response" — the model's definition/explanation of that concept.
270
+ # 3. Evaluate the response on:
271
+ # Definition Accuracy & Completeness: Are all core features of the concept present and correctly described?
272
+ # Clarity & Precision: Is the explanation clear, unambiguous, and clinically precise?
273
+ # Depth of Explanation: Does it include relevant examples or elaborations that demonstrate understanding?
274
+ # Relevance & Focus: Does it avoid irrelevant details and stick to the concept at hand?
275
+ # 4. Provide a single numeric score between 0 and 100:
276
+ # 0:No meaningful overlap—incorrect or missing core elements.
277
+ # 50:Some correct elements but major omissions or inaccuracies.
278
+ # 75: Mostly correct with only minor gaps or imprecisions.
279
+ # 90:Very close to a perfect definition; only small details missing.
280
+ # 100:Perfectly accurate, complete, and clear.
281
+
282
+ # Do not justify or explain—output **only** the numeric score.
283
+
284
+ # Now, evaluate the following:
285
+ # Concept to Define (Correct_Answer):
286
+ # {answer}
287
+ # Candidate Response (Response_Answer):
288
+ # {final_answer}
289
+ # """
290
+
291
+ ############################################################################################################
292
+ # ENHANCED PROMPTS FOR AUTISM CONFIDENCE SCORING AND AUTOMATIC REWRITING
293
+ ############################################################################################################
294
+
295
+ Prompt_template_autism_confidence = """
296
+ You are an autism specialist AI evaluating how related a query is to autism or Autism Spectrum Disorders (ASD).
297
+
298
+ Analyze the following query and provide a confidence score from 0 to 100 indicating how related it is to autism, ASD, or autism-related topics.
299
+
300
+ ENHANCED SCORING GUIDELINES:
301
+
302
+ **90-100: DIRECTLY AUTISM-RELATED**
303
+ - Explicitly mentions autism, ASD, Asperger's, autistic individuals
304
+ - Autism-specific therapies (ABA, TEACCH, social skills training)
305
+ - Autism diagnostic criteria or screening tools
306
+ - Autism-specific accommodations or support strategies
307
+
308
+ **75-89: HIGHLY AUTISM-RELEVANT (Core Symptoms & Characteristics)**
309
+ - Social communication difficulties, pragmatic language issues
310
+ - Sensory processing disorders, sensory seeking/avoiding behaviors
311
+ - Repetitive behaviors, stimming, self-regulation strategies
312
+ - Special interests, restricted interests, hyperfocus
313
+ - Executive functioning challenges in developmental context
314
+ - Theory of mind, perspective-taking difficulties
315
+
316
+ **60-74: SIGNIFICANTLY AUTISM-RELEVANT (Common Comorbidities & Related Issues)**
317
+ - Depression in children/adolescents/adults (very common in autism)
318
+ - Anxiety disorders, social anxiety, specific phobias
319
+ - ADHD symptoms, attention and hyperactivity issues
320
+ - Sleep disorders, sleep difficulties in neurodevelopmental context
321
+ - Mood regulation, emotional dysregulation, meltdowns
322
+ - Self-harm behaviors, aggression in developmental context
323
+ - Eating difficulties, food selectivity, feeding issues
324
+ - Gastrointestinal problems in neurodevelopmental context
325
+ - Toileting issues, developmental delays in self-care
326
+
327
+ **45-59: MODERATELY AUTISM-RELEVANT (Broader Developmental & Family Concerns)**
328
+ - General child development questions (when could apply to autism)
329
+ - Parent stress, family coping with special needs
330
+ - School accommodations, IEP/504 plans
331
+ - Transition planning, life skills development
332
+ - Communication aids, assistive technology
333
+ - Behavioral interventions, positive behavior support
334
+ - Inclusion strategies, peer relationships
335
+
336
+ **30-44: SOMEWHAT AUTISM-RELEVANT (General Topics with Potential Autism Applications)**
337
+ - General behavioral challenges in children
338
+ - Learning differences, cognitive development
339
+ - Social skills development (general)
340
+ - Mental health in children/adolescents
341
+ - Developmental milestones, early childhood development
342
+ - Family therapy, counseling approaches
343
+
344
+ **0-29: NOT AUTISM-RELEVANT**
345
+ - Unrelated medical conditions (unless neurological/developmental)
346
+ - General adult topics without developmental context
347
+ - Physical health unrelated to common autism comorbidities
348
+ - Non-developmental behavioral issues
349
+ - Completely unrelated topics (weather, cooking, sports, etc.)
350
+
351
+ **SPECIAL CONSIDERATIONS:**
352
+ - Questions about depression, anxiety, ADHD should score 60+ due to high comorbidity rates
353
+ - Sensory issues, sleep problems, mood regulation should score 65+
354
+ - Parent/caregiver stress and coping should score 50+
355
+ - School and educational topics should score 45+
356
+ - If query mentions children, adolescents, or developmental context, add 10-15 points
357
+ - Behavioral questions in pediatric context should score 45+
358
+
359
+ **EXAMPLES OF INDIRECT BUT HIGHLY RELEVANT QUERIES:**
360
+ - "My child has frequent meltdowns" → 70-80 (common autism behavior)
361
+ - "How to help with depression in teenagers" → 65-75 (very common in autism)
362
+ - "Sleep problems in children" → 65 (extremely common autism comorbidity)
363
+ - "ADHD and focus issues" → 65 (high comorbidity with autism)
364
+ - "Anxiety in social situations" → 70 (core autism challenge)
365
+ - "Eating problems in kids" → 60 (common autism issue)
366
+ - "Parent stress with special needs child" → 55 (autism family context)
367
+
368
+ Query: {query}
369
+
370
+ Consider the context, age implications, and potential autism connections before scoring.
371
+ Output only the numeric confidence score (0-100):
372
+ """
373
+
374
+ ############################################################################################################
375
+
376
+ Prompt_template_autism_rewriter = """
377
+ You are an autism specialist AI assistant. Your task is to rewrite queries to make them specifically about autism or Autism Spectrum Disorders (ASD) while preserving the original intent and recognizing common comorbidities.
378
+
379
+ ENHANCED REWRITING GUIDELINES:
380
+
381
+ **For Direct Autism Topics (85-100% relevance):**
382
+ - Keep as-is, just ensure clarity and proper terminology
383
+
384
+ **For Core Autism Symptoms (70-84% relevance):**
385
+ - Frame within autism context while preserving specificity
386
+ - Examples: "sensory issues" → "sensory processing challenges in autism"
387
+
388
+ **For Comorbid Conditions (55-69% relevance):**
389
+ - Explicitly connect to autism while maintaining the specific condition focus
390
+ - Recognize high comorbidity rates and autism-specific aspects
391
+
392
+ **For Developmental/Family Concerns (40-54% relevance):**
393
+ - Frame within autism family/developmental context
394
+ - Emphasize autism-specific challenges and considerations
395
+
396
+ **COMORBIDITY-AWARE REWRITING EXAMPLES:**
397
+
398
+ Depression/Mental Health:
399
+ - "How to help with depression in teenagers?" → "How to support teenagers with autism who are experiencing depression?"
400
+ - "Managing anxiety" → "Managing anxiety in individuals with autism spectrum disorders"
401
+
402
+ ADHD/Attention Issues:
403
+ - "ADHD symptoms in children" → "Understanding ADHD symptoms in children with autism (dual diagnosis)"
404
+ - "Focus and attention problems" → "Addressing attention and focus challenges in autism"
405
+
406
+ Sleep & Behavioral Issues:
407
+ - "Sleep problems in kids" → "Managing sleep difficulties in children with autism"
408
+ - "Child having meltdowns" → "Understanding and managing meltdowns in autism"
409
+ - "Aggressive behavior" → "Addressing aggressive behaviors in individuals with autism"
410
+
411
+ Sensory & Regulatory Issues:
412
+ - "Sensory processing problems" → "Sensory processing disorders in autism spectrum conditions"
413
+ - "Emotional regulation" → "Supporting emotional regulation in autism"
414
+
415
+ Family & Educational:
416
+ - "Parent stress with special needs child" → "Supporting parents of children with autism: managing stress and building resilience"
417
+ - "School accommodations" → "Educational accommodations and supports for students with autism"
418
+ - "Social skills development" → "Social skills training and development for individuals with autism"
419
+
420
+ Feeding & Development:
421
+ - "Eating problems in children" → "Addressing feeding difficulties and food selectivity in autism"
422
+ - "Developmental delays" → "Understanding developmental patterns and delays in autism spectrum disorders"
423
+
424
+ **REWRITING PRINCIPLES:**
425
+ 1. Always maintain the specific concern (depression, sleep, behavior, etc.)
426
+ 2. Explicitly connect to autism context
427
+ 3. Use person-first or identity-first language appropriately
428
+ 4. Preserve the question type and intent
429
+ 5. Add autism-specific considerations when relevant
430
+ 6. For comorbid conditions, acknowledge the dual nature
431
+
432
+ **AVOID:**
433
+ - Generic "autism-related" phrases
434
+ - Losing the specific concern in overly broad rewriting
435
+ - Ignoring the comorbidity aspect
436
+ - Making assumptions about causation
437
+
438
+ Original Query: {query}
439
+
440
+ Rewritten autism-specific query:
441
+ """
442
+
443
+ ############################################################################################################
444
+
445
+ Prompt_template_answer_autism_relevance = """
446
+ You are an autism specialist evaluating whether an answer is sufficiently related to autism or Autism Spectrum Disorders (ASD).
447
+
448
+ Analyze the following answer and determine if it is adequately focused on autism-related content, including common comorbidities and associated conditions.
449
+
450
+ ENHANCED AUTISM-RELEVANCE CRITERIA:
451
+
452
+ **85-100: HIGHLY AUTISM-RELEVANT**
453
+ - Directly mentions autism, ASD, autistic individuals, or autism-specific terms
454
+ - Discusses autism-specific interventions, therapies, or strategies
455
+ - Addresses autism diagnostic criteria or assessment
456
+ - Covers autism-specific accommodations or support systems
457
+
458
+ **70-84: STRONGLY AUTISM-RELEVANT**
459
+ - Discusses core autism characteristics (social communication, sensory processing, repetitive behaviors)
460
+ - Addresses autism-related developmental patterns
461
+ - Covers autism-specific educational or therapeutic approaches
462
+ - Discusses autism family dynamics or support strategies
463
+
464
+ **55-69: SIGNIFICANTLY AUTISM-RELEVANT**
465
+ - Addresses common autism comorbidities IN DEVELOPMENTAL CONTEXT:
466
+ * Depression, anxiety, or mood disorders in children/adolescents
467
+ * ADHD symptoms or attention challenges in developmental context
468
+ * Sleep disorders with sensory or behavioral components
469
+ * Feeding/eating difficulties with sensory aspects
470
+ * Self-regulation or emotional dysregulation issues
471
+ - Discusses developmental delays or milestones with autism implications
472
+ - Addresses sensory processing issues (even without explicit autism mention)
473
+ - Covers behavioral challenges common in autism (meltdowns, aggression, self-harm)
474
+
475
+ **40-54: MODERATELY AUTISM-RELEVANT**
476
+ - Discusses general developmental topics that frequently apply to autism
477
+ - Addresses parent/caregiver stress in special needs context
478
+ - Covers educational accommodations or special needs support
479
+ - Discusses social skills development or peer relationships in developmental context
480
+ - Addresses communication challenges or assistive technology
481
+
482
+ **25-39: SOMEWHAT AUTISM-RELEVANT**
483
+ - General child development information that could apply to autism
484
+ - Basic behavioral strategies that might be relevant
485
+ - General mental health information in pediatric context
486
+ - Family therapy or support approaches
487
+
488
+ **0-24: NOT SUFFICIENTLY AUTISM-RELEVANT**
489
+ - Purely general medical information without developmental context
490
+ - Adult-focused content without autism or developmental relevance
491
+ - Generic advice without special needs consideration
492
+ - Completely unrelated topics
493
+
494
+ **SPECIAL AUTISM-RELEVANCE INDICATORS:**
495
+ ✓ Mentions sensory processing, regulation, or sensory-seeking/avoiding
496
+ ✓ Discusses meltdowns, stimming, or repetitive behaviors
497
+ ✓ Addresses social communication or pragmatic language
498
+ ✓ Covers executive functioning in developmental context
499
+ ✓ Mentions special interests or restricted interests
500
+ ✓ Discusses transition planning or life skills development
501
+ ✓ Addresses inclusion, accommodations, or accessibility
502
+ ✓ Covers co-occurring conditions with autism-specific considerations
503
+
504
+ **COMORBIDITY CONTEXT CLUES:**
505
+ - Depression/anxiety discussed with developmental, social, or sensory factors
506
+ - ADHD mentioned with autism-like symptoms or dual diagnosis considerations
507
+ - Sleep issues connected to sensory processing or routine needs
508
+ - Feeding problems involving texture, sensory, or routine aspects
509
+ - Behavioral issues described with developmental or environmental triggers
510
+
511
+ **THRESHOLD FOR ACCEPTANCE:**
512
+ - 50-100: Adequately autism-relevant, should be provided
513
+ - 0-49: Not sufficiently autism-focused, should be refused
514
+
515
+ Answer to evaluate: {answer}
516
+
517
+ Consider the developmental context, autism comorbidities, and indirect autism connections.
518
+ Output only the numeric relevance score (0-100):
519
+ """
query_utils.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for query processing and rewriting.
3
+ """
4
+ import time
5
+ import logging
6
+ from openai import OpenAI
7
+ from prompt_template import (
8
+ Prompt_template_translation,
9
+ Prompt_template_relevance,
10
+ Prompt_template_autism_confidence,
11
+ Prompt_template_autism_rewriter,
12
+ Prompt_template_answer_autism_relevance
13
+ )
14
+
15
+ # Set up logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Initialize OpenAI client
20
+ DEEPINFRA_API_KEY = "285LUJulGIprqT6hcPhiXtcrphU04FG4"
21
+ openai = OpenAI(
22
+ api_key=DEEPINFRA_API_KEY,
23
+ base_url="https://api.deepinfra.com/v1/openai",
24
+ )
25
+
26
+ def call_llm(model: str, messages: list[dict], temperature: float = 0.0, timeout: int = 30, **kwargs) -> str:
27
+ """Call the LLM with given messages and return the response."""
28
+ try:
29
+ logger.info(f"Making API call to {model} with timeout {timeout}s")
30
+ start_time = time.time()
31
+
32
+ resp = openai.chat.completions.create(
33
+ model=model,
34
+ messages=messages,
35
+ temperature=temperature,
36
+ timeout=timeout,
37
+ **kwargs
38
+ )
39
+
40
+ elapsed = time.time() - start_time
41
+ logger.info(f"API call completed in {elapsed:.2f}s")
42
+
43
+ return resp.choices[0].message.content.strip()
44
+
45
+ except Exception as e:
46
+ logger.error(f"API call failed: {e}")
47
+ # Return fallback response
48
+ if "translation" in str(messages).lower():
49
+ # For translation, return the original query
50
+ return messages[0]["content"].split("Query: ")[-1] if "Query: " in messages[0]["content"] else "Error"
51
+ else:
52
+ # For relevance, assume not related
53
+ return "0"
54
+
55
+ def enhanced_autism_relevance_check(query: str) -> dict:
56
+ """
57
+ Enhanced autism relevance checking with detailed analysis.
58
+ Returns a dictionary with score, category, and reasoning.
59
+ """
60
+ try:
61
+ logger.info(f"Enhanced autism relevance check for: '{query[:50]}...'")
62
+
63
+ # Use the enhanced confidence prompt
64
+ confidence_prompt = Prompt_template_autism_confidence.format(query=query)
65
+ response = call_llm(
66
+ model="Qwen/Qwen3-32B",
67
+ messages=[{"role": "user", "content": confidence_prompt}],
68
+ reasoning_effort="none",
69
+ timeout=15
70
+ )
71
+
72
+ # Extract numeric score
73
+ confidence_score = 0
74
+ try:
75
+ import re
76
+ numbers = re.findall(r'\d+', response)
77
+ if numbers:
78
+ confidence_score = int(numbers[0])
79
+ confidence_score = max(0, min(100, confidence_score))
80
+ except:
81
+ confidence_score = 0
82
+
83
+ # Determine category and action based on enhanced scoring
84
+ if confidence_score >= 85:
85
+ category = "directly_autism_related"
86
+ action = "accept_as_is"
87
+ reasoning = "Directly mentions autism or autism-specific topics"
88
+ elif confidence_score >= 70:
89
+ category = "highly_autism_relevant"
90
+ action = "accept_as_is"
91
+ reasoning = "Core autism symptoms or characteristics"
92
+ elif confidence_score >= 55:
93
+ category = "significantly_autism_relevant"
94
+ action = "rewrite_for_autism"
95
+ reasoning = "Common comorbidity or autism-related issue"
96
+ elif confidence_score >= 40:
97
+ category = "moderately_autism_relevant"
98
+ action = "rewrite_for_autism"
99
+ reasoning = "Broader developmental or family concern related to autism"
100
+ elif confidence_score >= 25:
101
+ category = "somewhat_autism_relevant"
102
+ action = "conditional_rewrite"
103
+ reasoning = "General topic with potential autism applications"
104
+ else:
105
+ category = "not_autism_relevant"
106
+ action = "reject"
107
+ reasoning = "Not related to autism or autism care"
108
+
109
+ result = {
110
+ "score": confidence_score,
111
+ "category": category,
112
+ "action": action,
113
+ "reasoning": reasoning
114
+ }
115
+
116
+ logger.info(f"Enhanced relevance result: {result}")
117
+ return result
118
+
119
+ except Exception as e:
120
+ logger.error(f"Error in enhanced_autism_relevance_check: {e}")
121
+ return {
122
+ "score": 0,
123
+ "category": "error",
124
+ "action": "reject",
125
+ "reasoning": "Error during processing"
126
+ }
127
+
128
+ def check_autism_confidence(query: str) -> int:
129
+ """
130
+ Check autism relevance confidence score (0-100).
131
+ Returns the confidence score as an integer.
132
+ """
133
+ try:
134
+ logger.info(f"Checking autism confidence for query: '{query[:50]}...'")
135
+
136
+ confidence_prompt = Prompt_template_autism_confidence.format(query=query)
137
+ response = call_llm(
138
+ model="Qwen/Qwen3-32B",
139
+ messages=[{"role": "user", "content": confidence_prompt}],
140
+ reasoning_effort="none",
141
+ timeout=15
142
+ )
143
+
144
+ # Extract numeric score from response
145
+ confidence_score = 0
146
+ try:
147
+ # Try to extract number from response
148
+ import re
149
+ numbers = re.findall(r'\d+', response)
150
+ if numbers:
151
+ confidence_score = int(numbers[0])
152
+ # Ensure it's within valid range
153
+ confidence_score = max(0, min(100, confidence_score))
154
+ else:
155
+ logger.warning(f"No numeric score found in response: {response}")
156
+ confidence_score = 0
157
+ except:
158
+ logger.error(f"Failed to parse confidence score from: {response}")
159
+ confidence_score = 0
160
+
161
+ logger.info(f"Autism confidence score: {confidence_score}")
162
+ return confidence_score
163
+
164
+ except Exception as e:
165
+ logger.error(f"Error in check_autism_confidence: {e}")
166
+ return 0
167
+
168
+ def rewrite_query_for_autism(query: str) -> str:
169
+ """
170
+ Automatically rewrite a query to be autism-specific.
171
+ """
172
+ try:
173
+ logger.info(f"Rewriting query for autism: '{query[:50]}...'")
174
+
175
+ rewrite_prompt = Prompt_template_autism_rewriter.format(query=query)
176
+ rewritten_query = call_llm(
177
+ model="Qwen/Qwen3-32B",
178
+ messages=[{"role": "user", "content": rewrite_prompt}],
179
+ reasoning_effort="none",
180
+ timeout=15
181
+ )
182
+
183
+ if rewritten_query == "Error" or len(rewritten_query.strip()) == 0:
184
+ logger.warning("Rewriting failed, using fallback")
185
+ rewritten_query = f"How does autism relate to {query.lower()}?"
186
+ else:
187
+ rewritten_query = rewritten_query.strip()
188
+
189
+ logger.info(f"Query rewritten to: '{rewritten_query[:50]}...'")
190
+ return rewritten_query
191
+
192
+ except Exception as e:
193
+ logger.error(f"Error in rewrite_query_for_autism: {e}")
194
+ return f"How does autism relate to {query.lower()}?"
195
+
196
+ def check_answer_autism_relevance(answer: str) -> int:
197
+ """
198
+ Check if an answer is sufficiently related to autism (0-100 score).
199
+ Used for document-based queries to filter non-autism answers.
200
+ """
201
+ try:
202
+ logger.info(f"Checking answer autism relevance for: '{answer[:50]}...'")
203
+
204
+ relevance_prompt = Prompt_template_answer_autism_relevance.format(answer=answer)
205
+ response = call_llm(
206
+ model="Qwen/Qwen3-32B",
207
+ messages=[{"role": "user", "content": relevance_prompt}],
208
+ reasoning_effort="none",
209
+ timeout=15
210
+ )
211
+
212
+ # Extract numeric score from response
213
+ relevance_score = 0
214
+ try:
215
+ import re
216
+ numbers = re.findall(r'\d+', response)
217
+ if numbers:
218
+ relevance_score = int(numbers[0])
219
+ relevance_score = max(0, min(100, relevance_score))
220
+ else:
221
+ logger.warning(f"No numeric score found in response: {response}")
222
+ relevance_score = 0
223
+ except:
224
+ logger.error(f"Failed to parse relevance score from: {response}")
225
+ relevance_score = 0
226
+
227
+ logger.info(f"Answer autism relevance score: {relevance_score}")
228
+ return relevance_score
229
+
230
+ except Exception as e:
231
+ logger.error(f"Error in check_answer_autism_relevance: {e}")
232
+ return 0
233
+
234
+ def process_query_for_rewrite(query: str) -> tuple[str, bool, str]:
235
+ """
236
+ Enhanced query processing with sophisticated autism relevance detection.
237
+
238
+ NEW ENHANCED LOGIC:
239
+ 1. Score 85-100 → Directly autism-related, use as-is
240
+ 2. Score 70-84 → Highly autism-relevant (core symptoms), use as-is
241
+ 3. Score 55-69 → Significantly autism-relevant (comorbidities), rewrite for autism
242
+ 4. Score 40-54 → Moderately autism-relevant, rewrite for autism
243
+ 5. Score 25-39 → Somewhat relevant, conditional rewrite (ask user or auto-rewrite)
244
+ 6. Score 0-24 → Not autism-related, reject
245
+
246
+ Returns: (processed_query, is_autism_related, rewritten_query_if_needed)
247
+ """
248
+ try:
249
+ logger.info(f"Processing query with enhanced confidence logic: '{query[:50]}...'")
250
+ start_time = time.time()
251
+
252
+ # Step 1: Translate and correct the query
253
+ logger.info("Step 1: Translating/correcting query")
254
+ corrected_query = call_llm(
255
+ model="Qwen/Qwen3-32B",
256
+ messages=[{"role": "user", "content": Prompt_template_translation.format(query=query)}],
257
+ reasoning_effort="none",
258
+ timeout=15
259
+ )
260
+
261
+ if corrected_query == "Error":
262
+ logger.warning("Translation failed, using original query")
263
+ corrected_query = query
264
+
265
+ # Step 2: Get enhanced autism relevance analysis
266
+ logger.info("Step 2: Enhanced autism relevance checking")
267
+ relevance_result = enhanced_autism_relevance_check(corrected_query)
268
+
269
+ confidence_score = relevance_result["score"]
270
+ action = relevance_result["action"]
271
+ reasoning = relevance_result["reasoning"]
272
+
273
+ logger.info(f"Relevance analysis: {confidence_score}% - {reasoning}")
274
+
275
+ # Step 3: Take action based on enhanced analysis
276
+ if action == "accept_as_is":
277
+ logger.info(f"High relevance ({confidence_score}%) - accepting as-is: {reasoning}")
278
+ return corrected_query, True, ""
279
+
280
+ elif action == "rewrite_for_autism":
281
+ logger.info(f"Moderate relevance ({confidence_score}%) - rewriting for autism: {reasoning}")
282
+ rewritten_query = rewrite_query_for_autism(corrected_query)
283
+ return rewritten_query, True, ""
284
+
285
+ elif action == "conditional_rewrite":
286
+ # For somewhat relevant queries, automatically rewrite (could be enhanced with user confirmation)
287
+ logger.info(f"Low-moderate relevance ({confidence_score}%) - conditionally rewriting: {reasoning}")
288
+ rewritten_query = rewrite_query_for_autism(corrected_query)
289
+ return rewritten_query, True, ""
290
+
291
+ else: # action == "reject"
292
+ logger.info(f"Low relevance ({confidence_score}%) - rejecting: {reasoning}")
293
+ return corrected_query, False, ""
294
+
295
+ elapsed = time.time() - start_time
296
+ logger.info(f"Enhanced query processing completed in {elapsed:.2f}s")
297
+
298
+ except Exception as e:
299
+ logger.error(f"Error in process_query_for_rewrite: {e}")
300
+ # Fallback: return original query as not autism-related
301
+ return query, False, ""
302
+
303
+ def get_non_autism_response() -> str:
304
+ """Return a more human-like response for non-autism queries."""
305
+ return ("Hi there! I appreciate you reaching out to me. I'm Wisal, and I specialize specifically in autism and Autism Spectrum Disorders. "
306
+ "I noticed your question isn't quite related to autism topics. I'd love to help you, but I'm most effective when answering "
307
+ "questions about autism, ASD, autism support strategies, therapies, or related concerns.\n\n"
308
+ "Could you try asking me something about autism instead? I'm here and ready to help with any autism-related questions you might have! 😊")
309
+
310
+ def get_non_autism_answer_response() -> str:
311
+ """Return a more human-like response when document answers are not autism-related."""
312
+ return ("I'm sorry, but the information I found in the document doesn't seem to be related to autism or Autism Spectrum Disorders. "
313
+ "Since I'm Wisal, your autism specialist, I want to make sure I'm providing you with relevant, autism-focused information. "
314
+ "Could you try asking a question that's more specifically about autism? I'm here to help with any autism-related topics! 😊")
web_search.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import httpx
4
+ import nest_asyncio
5
+ from dotenv import load_dotenv
6
+
7
+ # Apply nested asyncio patch
8
+ nest_asyncio.apply()
9
+
10
+ # helper functions
11
+ GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
12
+
13
+ TAVILY_API_KEY="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"
14
+
15
+ OPENAI_API_KEY="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"
16
+
17
+ QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
18
+
19
+ QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
20
+
21
+ OPENAI_API_KEY="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"
22
+
23
+ WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
24
+
25
+ WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
26
+
27
+ DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
28
+
29
+ DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
30
+
31
+ # Try to import tavily with fallback
32
+ try:
33
+ from tavily import TavilyClient
34
+ tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
35
+ TAVILY_AVAILABLE = True
36
+ except ImportError:
37
+ print("Warning: Tavily package not found. Web search will use fallback mode.")
38
+ tavily_client = None
39
+ TAVILY_AVAILABLE = False
40
+
41
+ async def search_autism(query: str) -> dict:
42
+ """Performs a web search for information about autism."""
43
+
44
+ if not TAVILY_AVAILABLE:
45
+ print("Web search unavailable - tavily package not installed")
46
+ return {
47
+ "results": [],
48
+ "answer": "Web search functionality is currently unavailable. Please ensure all dependencies are installed."
49
+ }
50
+
51
+ try:
52
+ # Execute a search query using Tavily
53
+ response = tavily_client.search(
54
+ query=query,
55
+ max_results=5,
56
+ search_depth="advanced",
57
+ topic="general",
58
+ include_answer=True
59
+ )
60
+ return {
61
+ "results": response.get("results", []),
62
+ "answer": response.get("answer", "")
63
+ }
64
+ except Exception as e:
65
+ print(f"Search error: {str(e)}")
66
+ return {
67
+ "results": [],
68
+ "answer": f"Unable to perform web search: {str(e)}"
69
+ }
70
+
71
+ # Test function for development
72
+ async def main():
73
+ query = "autism symptoms and treatments"
74
+ result = await search_autism(query)
75
+ print("Search Results:")
76
+ for res in result.get("results", []):
77
+ print(f"- {res.get('title')} ({res.get('url')})")
78
+ print("\nAnswer:")
79
+ print(result.get("answer", "No answer provided."))
80
+
81
+ # Run the script
82
+ if __name__ == "__main__":
83
+ asyncio.run(main())