Spaces:
Runtime error
Runtime error
Upload 7 files
Browse files- Old_Document.py +175 -0
- RAG.py +123 -0
- RAG_Domain_know_doc.py +165 -0
- User_Specific_Documents.py +167 -0
- prompt_template.py +519 -0
- query_utils.py +314 -0
- web_search.py +83 -0
Old_Document.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import asyncio
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import gradio as gr
|
5 |
+
from query_utils import process_query_for_rewrite, get_non_autism_response
|
6 |
+
|
7 |
+
# helper functions
|
8 |
+
GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
|
9 |
+
|
10 |
+
TAVILY_API_KEY="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"
|
11 |
+
|
12 |
+
OPENAI_API_KEY="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"
|
13 |
+
|
14 |
+
QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
|
15 |
+
|
16 |
+
QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
|
17 |
+
|
18 |
+
OPENAI_API_KEY="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"
|
19 |
+
|
20 |
+
WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
|
21 |
+
|
22 |
+
WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
|
23 |
+
|
24 |
+
DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
|
25 |
+
|
26 |
+
DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
|
27 |
+
|
28 |
+
|
29 |
+
# if not (DEEPINFRA_TOKEN and WEAVIATE_URL and WEAVIATE_API_KEY):
|
30 |
+
# raise ValueError("Please set all required keys in .env")
|
31 |
+
|
32 |
+
# DeepInfra client
|
33 |
+
from openai import OpenAI
|
34 |
+
openai = OpenAI(
|
35 |
+
api_key=DEEPINFRA_API_KEY,
|
36 |
+
base_url="https://api.deepinfra.com/v1/openai",
|
37 |
+
)
|
38 |
+
|
39 |
+
# Weaviate client
|
40 |
+
import weaviate
|
41 |
+
from weaviate.classes.init import Auth
|
42 |
+
from contextlib import contextmanager
|
43 |
+
|
44 |
+
@contextmanager
|
45 |
+
def weaviate_client():
|
46 |
+
client = weaviate.connect_to_weaviate_cloud(
|
47 |
+
cluster_url=WEAVIATE_URL,
|
48 |
+
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
49 |
+
skip_init_checks=True, # <-- This disables gRPC check
|
50 |
+
|
51 |
+
)
|
52 |
+
try:
|
53 |
+
yield client
|
54 |
+
finally:
|
55 |
+
client.close()
|
56 |
+
|
57 |
+
# Global path tracker
|
58 |
+
last_uploaded_path = None
|
59 |
+
|
60 |
+
# Embed function
|
61 |
+
def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
|
62 |
+
all_embeddings = []
|
63 |
+
for i in range(0, len(texts), batch_size):
|
64 |
+
batch = texts[i : i + batch_size]
|
65 |
+
try:
|
66 |
+
resp = openai.embeddings.create(
|
67 |
+
model="Qwen/Qwen3-Embedding-8B",
|
68 |
+
input=batch,
|
69 |
+
encoding_format="float"
|
70 |
+
)
|
71 |
+
batch_embs = [item.embedding for item in resp.data]
|
72 |
+
all_embeddings.extend(batch_embs)
|
73 |
+
except Exception as e:
|
74 |
+
print(f"Embedding error: {e}")
|
75 |
+
all_embeddings.extend([[] for _ in batch])
|
76 |
+
return all_embeddings
|
77 |
+
|
78 |
+
def encode_query(query: str) -> list[float] | None:
|
79 |
+
embs = embed_texts([query], batch_size=1)
|
80 |
+
if embs and embs[0]:
|
81 |
+
return embs[0]
|
82 |
+
return None
|
83 |
+
|
84 |
+
async def old_Document(query: str, top_k: int = 1) -> dict:
|
85 |
+
qe = encode_query(query)
|
86 |
+
if not qe:
|
87 |
+
return {"answer": []}
|
88 |
+
|
89 |
+
try:
|
90 |
+
with weaviate_client() as client:
|
91 |
+
coll = client.collections.get("user")
|
92 |
+
res = coll.query.near_vector(
|
93 |
+
near_vector=qe,
|
94 |
+
limit=top_k,
|
95 |
+
return_properties=["text"]
|
96 |
+
)
|
97 |
+
if not getattr(res, "objects", None):
|
98 |
+
return {"answer": []}
|
99 |
+
return {
|
100 |
+
"answer": [obj.properties.get("text", "[No Text]") for obj in res.objects]
|
101 |
+
}
|
102 |
+
except Exception as e:
|
103 |
+
print("RAG Error:", e)
|
104 |
+
return {"answer": []}
|
105 |
+
|
106 |
+
# New functions to support Gradio app
|
107 |
+
def ingest_file(path: str) -> str:
|
108 |
+
global last_uploaded_path
|
109 |
+
last_uploaded_path = path
|
110 |
+
return f"Old document ingested: {os.path.basename(path)}"
|
111 |
+
|
112 |
+
def answer_question(query: str) -> str:
|
113 |
+
try:
|
114 |
+
# Process query for rewriting and relevance checking
|
115 |
+
corrected_query, is_autism_related, rewritten_query = process_query_for_rewrite(query)
|
116 |
+
|
117 |
+
# If not autism-related, show direct rejection message
|
118 |
+
if not is_autism_related:
|
119 |
+
return get_non_autism_response()
|
120 |
+
|
121 |
+
# Use the corrected query for retrieval
|
122 |
+
rag_resp = asyncio.run(old_Document(corrected_query))
|
123 |
+
chunks = rag_resp.get("answer", [])
|
124 |
+
if not chunks:
|
125 |
+
return "Sorry, I couldn't find relevant content in the old document."
|
126 |
+
|
127 |
+
# Combine chunks into a single answer for relevance checking
|
128 |
+
combined_answer = "\n".join(f"- {c}" for c in chunks)
|
129 |
+
|
130 |
+
# NEW: Check if the retrieved content is sufficiently related to autism
|
131 |
+
from query_utils import check_answer_autism_relevance, get_non_autism_answer_response
|
132 |
+
|
133 |
+
answer_relevance_score = check_answer_autism_relevance(combined_answer)
|
134 |
+
|
135 |
+
# If answer relevance is below 50%, refuse the answer (updated threshold for enhanced scoring)
|
136 |
+
if answer_relevance_score < 50:
|
137 |
+
return get_non_autism_answer_response()
|
138 |
+
|
139 |
+
# If sufficiently autism-related, return the answer
|
140 |
+
return combined_answer
|
141 |
+
except Exception as e:
|
142 |
+
return f"Error processing your request: {e}"
|
143 |
+
|
144 |
+
# Gradio interface for Old Documents
|
145 |
+
with gr.Blocks(title="Old Documents RAG") as demo:
|
146 |
+
gr.Markdown("## Old Documents RAG")
|
147 |
+
query = gr.Textbox(placeholder="Your question...", lines=2, label="Ask about Old Documents")
|
148 |
+
doc_file = gr.File(label="Upload Old Document (PDF, DOCX, TXT)")
|
149 |
+
btn = gr.Button("Submit")
|
150 |
+
out = gr.Textbox(label="Answer from Old Documents", lines=8, interactive=False)
|
151 |
+
|
152 |
+
def process_old_doc(query, doc_file):
|
153 |
+
if doc_file:
|
154 |
+
# Save and ingest the uploaded file
|
155 |
+
upload_dir = os.path.join(os.path.dirname(__file__), "uploaded_docs")
|
156 |
+
os.makedirs(upload_dir, exist_ok=True)
|
157 |
+
safe_filename = os.path.basename(doc_file.name)
|
158 |
+
save_path = os.path.join(upload_dir, safe_filename)
|
159 |
+
with open(save_path, "wb") as f:
|
160 |
+
f.write(doc_file.read())
|
161 |
+
status = ingest_file(save_path)
|
162 |
+
answer = answer_question(query)
|
163 |
+
return f"{status}\n\n{answer}"
|
164 |
+
else:
|
165 |
+
# Use last uploaded file or return error if none exists
|
166 |
+
if last_uploaded_path:
|
167 |
+
answer = answer_question(query)
|
168 |
+
return f"[Using previously uploaded document: {os.path.basename(last_uploaded_path)}]\n\n{answer}"
|
169 |
+
else:
|
170 |
+
return "No document uploaded. Please upload an old document to proceed."
|
171 |
+
|
172 |
+
btn.click(fn=process_old_doc, inputs=[query, doc_file], outputs=out)
|
173 |
+
|
174 |
+
if __name__ == "__main__":
|
175 |
+
demo.launch(debug=True)
|
RAG.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import asyncio
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
# helper functions
|
6 |
+
GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
|
7 |
+
|
8 |
+
TAVILY_API_KEY="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"
|
9 |
+
|
10 |
+
OPENAI_API_KEY="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"
|
11 |
+
|
12 |
+
QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
|
13 |
+
|
14 |
+
QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
|
15 |
+
|
16 |
+
OPENAI_API_KEY="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"
|
17 |
+
|
18 |
+
WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
|
19 |
+
|
20 |
+
WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
|
21 |
+
|
22 |
+
DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
|
23 |
+
|
24 |
+
DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
|
25 |
+
|
26 |
+
# Initialize DeepInfra-compatible OpenAI client
|
27 |
+
from openai import OpenAI
|
28 |
+
openai = OpenAI(
|
29 |
+
api_key=DEEPINFRA_API_KEY,
|
30 |
+
base_url="https://api.deepinfra.com/v1/openai",
|
31 |
+
)
|
32 |
+
|
33 |
+
# Weaviate imports
|
34 |
+
import weaviate
|
35 |
+
from weaviate.classes.init import Auth
|
36 |
+
from contextlib import contextmanager
|
37 |
+
|
38 |
+
@contextmanager
|
39 |
+
def weaviate_client():
|
40 |
+
"""
|
41 |
+
Context manager that yields a Weaviate client and
|
42 |
+
guarantees client.close() on exit.
|
43 |
+
"""
|
44 |
+
client = weaviate.connect_to_weaviate_cloud(
|
45 |
+
cluster_url=WEAVIATE_URL,
|
46 |
+
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
47 |
+
skip_init_checks=True, # <-- This disables gRPC check
|
48 |
+
|
49 |
+
)
|
50 |
+
try:
|
51 |
+
yield client
|
52 |
+
finally:
|
53 |
+
client.close()
|
54 |
+
|
55 |
+
def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
|
56 |
+
"""Embed texts in batches to avoid API limits."""
|
57 |
+
all_embeddings: list[list[float]] = []
|
58 |
+
for i in range(0, len(texts), batch_size):
|
59 |
+
batch = texts[i : i + batch_size]
|
60 |
+
try:
|
61 |
+
resp = openai.embeddings.create(
|
62 |
+
model="Qwen/Qwen3-Embedding-8B",
|
63 |
+
input=batch,
|
64 |
+
encoding_format="float"
|
65 |
+
)
|
66 |
+
batch_embs = [item.embedding for item in resp.data]
|
67 |
+
all_embeddings.extend(batch_embs)
|
68 |
+
except Exception as e:
|
69 |
+
print(f"Embedding batch error (items {i}–{i+len(batch)-1}): {e}")
|
70 |
+
all_embeddings.extend([[] for _ in batch])
|
71 |
+
return all_embeddings
|
72 |
+
|
73 |
+
def encode_query(query: str) -> list[float] | None:
|
74 |
+
"""Generate a single embedding vector for a query string."""
|
75 |
+
embs = embed_texts([query], batch_size=1)
|
76 |
+
if embs and embs[0]:
|
77 |
+
print("Query embedding (first 5 dims):", embs[0][:5])
|
78 |
+
return embs[0]
|
79 |
+
print("Failed to generate query embedding.")
|
80 |
+
return None
|
81 |
+
|
82 |
+
async def rag_autism(query: str, top_k: int = 3) -> dict:
|
83 |
+
"""
|
84 |
+
Run a RAG retrieval on the 'Books' collection in Weaviate.
|
85 |
+
Returns up to `top_k` matching text chunks.
|
86 |
+
"""
|
87 |
+
qe = encode_query(query)
|
88 |
+
if not qe:
|
89 |
+
return {"answer": []}
|
90 |
+
|
91 |
+
try:
|
92 |
+
with weaviate_client() as client:
|
93 |
+
coll = client.collections.get("Books")
|
94 |
+
res = coll.query.near_vector(
|
95 |
+
near_vector=qe,
|
96 |
+
limit=top_k,
|
97 |
+
return_properties=["text"]
|
98 |
+
)
|
99 |
+
if not getattr(res, "objects", None):
|
100 |
+
return {"answer": []}
|
101 |
+
return {
|
102 |
+
"answer": [
|
103 |
+
obj.properties.get("text", "[No Text]")
|
104 |
+
for obj in res.objects
|
105 |
+
]
|
106 |
+
}
|
107 |
+
except Exception as e:
|
108 |
+
print("RAG Error:", e)
|
109 |
+
return {"answer": []}
|
110 |
+
|
111 |
+
# Example test harness
|
112 |
+
# if __name__ == "__main__":
|
113 |
+
# test_queries = [
|
114 |
+
# "What are the common early signs of autism in young children?",
|
115 |
+
# "What diagnostic criteria are used for autism spectrum disorder?",
|
116 |
+
# "What support strategies help improve communication skills in autistic individuals?"
|
117 |
+
# ]
|
118 |
+
# for q in test_queries:
|
119 |
+
# print(f"\nQuery: {q}")
|
120 |
+
# out = asyncio.run(rag_autism(q, top_k=3))
|
121 |
+
# print("Retrieved contexts:")
|
122 |
+
# for idx, ctx in enumerate(out["answer"], 1):
|
123 |
+
# print(f"{idx}. {ctx}")
|
RAG_Domain_know_doc.py
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from openai import OpenAI
|
4 |
+
import weaviate
|
5 |
+
from weaviate.classes.init import Auth
|
6 |
+
import pypdf # Replaced PyPDF2
|
7 |
+
import docx
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from prompt_template import (
|
11 |
+
Prompt_template_translation,
|
12 |
+
Prompt_template_LLM_Generation,
|
13 |
+
Prompt_template_Reranker,
|
14 |
+
Prompt_template_Wisal,
|
15 |
+
Prompt_template_Halluciations,
|
16 |
+
Prompt_template_paraphrasing,
|
17 |
+
Prompt_template_Translate_to_original,
|
18 |
+
Prompt_template_relevance
|
19 |
+
)
|
20 |
+
from query_utils import process_query_for_rewrite, get_non_autism_response
|
21 |
+
# ─── Configuration ─────────────────────────────────────────────────────────────
|
22 |
+
# helper functions
|
23 |
+
GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
|
24 |
+
|
25 |
+
TAVILY_API_KEY="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"
|
26 |
+
|
27 |
+
OPENAI_API_KEY="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"
|
28 |
+
|
29 |
+
QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
|
30 |
+
|
31 |
+
QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
|
32 |
+
|
33 |
+
OPENAI_API_KEY="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"
|
34 |
+
|
35 |
+
WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
|
36 |
+
|
37 |
+
WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
|
38 |
+
|
39 |
+
DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
|
40 |
+
|
41 |
+
DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
|
42 |
+
|
43 |
+
openai = OpenAI(
|
44 |
+
api_key=DEEPINFRA_API_KEY,
|
45 |
+
base_url="https://api.deepinfra.com/v1/openai",
|
46 |
+
)
|
47 |
+
# Initialize Weaviate client
|
48 |
+
client = weaviate.connect_to_weaviate_cloud(
|
49 |
+
cluster_url=WEAVIATE_URL,
|
50 |
+
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
51 |
+
skip_init_checks=True, # <-- This disables gRPC check
|
52 |
+
|
53 |
+
)
|
54 |
+
# ─── Utility: Extract raw text ──────────────────────────────────────────────────
|
55 |
+
def extract_text(file_path: str) -> str:
|
56 |
+
ext = os.path.splitext(file_path)[1].lower()
|
57 |
+
if ext == ".pdf":
|
58 |
+
text = ""
|
59 |
+
with open(file_path, "rb") as f:
|
60 |
+
reader = pypdf.PdfReader(f)
|
61 |
+
for page in reader.pages:
|
62 |
+
page_text = page.extract_text() or ""
|
63 |
+
text += page_text + "\n"
|
64 |
+
elif ext == ".docx":
|
65 |
+
doc = docx.Document(file_path)
|
66 |
+
text = "\n".join(p.text for p in doc.paragraphs)
|
67 |
+
elif ext == ".txt":
|
68 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
69 |
+
text = f.read()
|
70 |
+
else:
|
71 |
+
raise ValueError("Unsupported file format. Use PDF, DOCX, or TXT.")
|
72 |
+
return text
|
73 |
+
# ─── Chunker & Embed ──────────────────────────────────────────────────────────
|
74 |
+
splitter = RecursiveCharacterTextSplitter(
|
75 |
+
chunk_size=1000,
|
76 |
+
chunk_overlap=200,
|
77 |
+
separators=["\n\n", "\n", " "],
|
78 |
+
)
|
79 |
+
def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
|
80 |
+
"""Embed texts in batches to avoid API limits."""
|
81 |
+
all_embeddings = []
|
82 |
+
for i in range(0, len(texts), batch_size):
|
83 |
+
batch = texts[i:i + batch_size]
|
84 |
+
resp = openai.embeddings.create(
|
85 |
+
model="Qwen/Qwen3-Embedding-8B",
|
86 |
+
input=batch,
|
87 |
+
encoding_format="float"
|
88 |
+
)
|
89 |
+
all_embeddings.extend([item.embedding for item in resp.data])
|
90 |
+
return all_embeddings
|
91 |
+
# ─── Ingest & Index ───────────────────────────────────────────────────────────
|
92 |
+
def ingest_file(file_path: str) -> str:
|
93 |
+
raw = extract_text(file_path)
|
94 |
+
docs = splitter.split_text(raw)
|
95 |
+
texts = [chunk for chunk in docs]
|
96 |
+
vectors = embed_texts(texts)
|
97 |
+
# Get the collection
|
98 |
+
documents = client.collections.get("Books")
|
99 |
+
# Batch insert with new API
|
100 |
+
with client.batch.dynamic() as batch:
|
101 |
+
for txt, vec in zip(texts, vectors):
|
102 |
+
batch.add_object(
|
103 |
+
collection="Books",
|
104 |
+
properties={"text": txt},
|
105 |
+
vector=vec
|
106 |
+
)
|
107 |
+
return f"Ingested {len(texts)} chunks from {os.path.basename(file_path)}"
|
108 |
+
# ─── Query & Answer ───────────────────────────────────────────────────────────
|
109 |
+
def answer_question(question: str) -> str:
|
110 |
+
# Process query for rewriting and relevance checking
|
111 |
+
corrected_query, is_autism_related, rewritten_query = process_query_for_rewrite(question)
|
112 |
+
|
113 |
+
# If not autism-related, show direct rejection message
|
114 |
+
if not is_autism_related:
|
115 |
+
return get_non_autism_response()
|
116 |
+
|
117 |
+
# Use the corrected query for retrieval
|
118 |
+
q_vec = embed_texts([corrected_query])[0]
|
119 |
+
documents = client.collections.get("Books")
|
120 |
+
response = documents.query.near_vector(
|
121 |
+
near_vector=q_vec,
|
122 |
+
limit=5,
|
123 |
+
return_metadata=["distance"]
|
124 |
+
)
|
125 |
+
hits = response.objects
|
126 |
+
context = "\n\n".join(hit.properties["text"] for hit in hits)
|
127 |
+
print(context)
|
128 |
+
wisal_prompt = Prompt_template_Wisal.format(new_query=corrected_query, document=context)
|
129 |
+
chat = openai.chat.completions.create(
|
130 |
+
model="Qwen/Qwen3-32B",
|
131 |
+
messages=[
|
132 |
+
{"role": "user", "content": wisal_prompt
|
133 |
+
}
|
134 |
+
],
|
135 |
+
temperature=0,
|
136 |
+
reasoning_effort="none"
|
137 |
+
)
|
138 |
+
initial_answer = chat.choices[0].message.content
|
139 |
+
|
140 |
+
# NEW: Check if the generated answer is sufficiently related to autism
|
141 |
+
from query_utils import check_answer_autism_relevance, get_non_autism_answer_response
|
142 |
+
|
143 |
+
answer_relevance_score = check_answer_autism_relevance(initial_answer)
|
144 |
+
|
145 |
+
# If answer relevance is below 50%, refuse the answer (updated threshold for enhanced scoring)
|
146 |
+
if answer_relevance_score < 50:
|
147 |
+
return get_non_autism_answer_response()
|
148 |
+
|
149 |
+
# If sufficiently autism-related, return the answer
|
150 |
+
return initial_answer
|
151 |
+
# ─── Gradio Interface ─────────────────────────────────────────────────────────
|
152 |
+
with gr.Blocks(title="Document Q&A with Qwen & Weaviate") as demo:
|
153 |
+
gr.Markdown("## Upload a PDF, DOCX, or TXT and then ask away!")
|
154 |
+
with gr.Row():
|
155 |
+
up = gr.File(label="Select document")
|
156 |
+
btn = gr.Button("Ingest")
|
157 |
+
out = gr.Textbox(label="Status", interactive=False)
|
158 |
+
btn.click(fn=lambda f: ingest_file(f.name), inputs=up, outputs=out)
|
159 |
+
with gr.Row():
|
160 |
+
q = gr.Textbox(placeholder="Your question...", lines=2)
|
161 |
+
ask = gr.Button("Ask")
|
162 |
+
ans = gr.Textbox(label="Answer", lines=6, interactive=False)
|
163 |
+
ask.click(fn=answer_question, inputs=q, outputs=ans)
|
164 |
+
if __name__ == "__main__":
|
165 |
+
demo.launch(debug=True)
|
User_Specific_Documents.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from openai import OpenAI
|
4 |
+
import weaviate
|
5 |
+
from weaviate.classes.init import Auth
|
6 |
+
import pypdf # Replaced PyPDF2
|
7 |
+
import docx
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from prompt_template import (
|
11 |
+
Prompt_template_translation,
|
12 |
+
Prompt_template_LLM_Generation,
|
13 |
+
Prompt_template_Reranker,
|
14 |
+
Prompt_template_Wisal,
|
15 |
+
Prompt_template_Halluciations,
|
16 |
+
Prompt_template_paraphrasing,
|
17 |
+
Prompt_template_Translate_to_original,
|
18 |
+
Prompt_template_relevance,
|
19 |
+
Prompt_template_User_document_prompt
|
20 |
+
)
|
21 |
+
from query_utils import process_query_for_rewrite, get_non_autism_response
|
22 |
+
# ─── Configuration ─────────────────────────────────────────────────────────────
|
23 |
+
# helper functions
|
24 |
+
GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
|
25 |
+
|
26 |
+
TAVILY_API_KEY="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"
|
27 |
+
|
28 |
+
OPENAI_API_KEY="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"
|
29 |
+
|
30 |
+
QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
|
31 |
+
|
32 |
+
QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
|
33 |
+
|
34 |
+
OPENAI_API_KEY="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"
|
35 |
+
|
36 |
+
WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
|
37 |
+
|
38 |
+
WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
|
39 |
+
|
40 |
+
DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
|
41 |
+
|
42 |
+
DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
|
43 |
+
|
44 |
+
openai = OpenAI(
|
45 |
+
api_key=DEEPINFRA_API_KEY,
|
46 |
+
base_url="https://api.deepinfra.com/v1/openai",
|
47 |
+
)
|
48 |
+
# Initialize Weaviate client
|
49 |
+
client = weaviate.connect_to_weaviate_cloud(
|
50 |
+
cluster_url=WEAVIATE_URL,
|
51 |
+
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
52 |
+
skip_init_checks=True, # <-- This disables gRPC check
|
53 |
+
|
54 |
+
)
|
55 |
+
# ─── Utility: Extract raw text ──────────────────────────────────────────────────
|
56 |
+
def extract_text(file_path: str) -> str:
|
57 |
+
ext = os.path.splitext(file_path)[1].lower()
|
58 |
+
if ext == ".pdf":
|
59 |
+
text = ""
|
60 |
+
with open(file_path, "rb") as f:
|
61 |
+
reader = pypdf.PdfReader(f)
|
62 |
+
for page in reader.pages:
|
63 |
+
page_text = page.extract_text() or ""
|
64 |
+
text += page_text + "\n"
|
65 |
+
elif ext == ".docx":
|
66 |
+
doc = docx.Document(file_path)
|
67 |
+
text = "\n".join(p.text for p in doc.paragraphs)
|
68 |
+
elif ext == ".txt":
|
69 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
70 |
+
text = f.read()
|
71 |
+
else:
|
72 |
+
raise ValueError("Unsupported file format. Use PDF, DOCX, or TXT.")
|
73 |
+
return text
|
74 |
+
# ─── Chunker & Embed ──────────────────────────────────────────────────────────
|
75 |
+
splitter = RecursiveCharacterTextSplitter(
|
76 |
+
chunk_size=1000,
|
77 |
+
chunk_overlap=200,
|
78 |
+
separators=["\n\n", "\n", " "],
|
79 |
+
)
|
80 |
+
def embed_texts(texts: list[str], batch_size: int = 70) -> list[list[float]]:
|
81 |
+
"""Embed texts in batches to avoid API limits."""
|
82 |
+
all_embeddings = []
|
83 |
+
for i in range(0, len(texts), batch_size):
|
84 |
+
batch = texts[i:i + batch_size]
|
85 |
+
resp = openai.embeddings.create(
|
86 |
+
model="Qwen/Qwen3-Embedding-8B",
|
87 |
+
input=batch,
|
88 |
+
encoding_format="float"
|
89 |
+
)
|
90 |
+
all_embeddings.extend([item.embedding for item in resp.data])
|
91 |
+
return all_embeddings
|
92 |
+
# ─── Ingest & Index ───────────────────────────────────────────────────────────
|
93 |
+
def ingest_file(file_path: str) -> str:
|
94 |
+
raw = extract_text(file_path)
|
95 |
+
docs = splitter.split_text(raw)
|
96 |
+
texts = [chunk for chunk in docs]
|
97 |
+
vectors = embed_texts(texts)
|
98 |
+
# Get the collection
|
99 |
+
documents = client.collections.get("user")
|
100 |
+
# Batch insert with new API
|
101 |
+
with client.batch.dynamic() as batch:
|
102 |
+
for txt, vec in zip(texts, vectors):
|
103 |
+
batch.add_object(
|
104 |
+
collection="user",
|
105 |
+
properties={"text": txt},
|
106 |
+
vector=vec
|
107 |
+
)
|
108 |
+
return f"Ingested {len(texts)} chunks from {os.path.basename(file_path)}"
|
109 |
+
# ───────────────────────────────────────────── Query & Answer ───────────────────────────────────────────────────────────
|
110 |
+
def answer_question(question: str) -> str:
|
111 |
+
# Process query for rewriting and relevance checking
|
112 |
+
corrected_query, is_autism_related, rewritten_query = process_query_for_rewrite(question)
|
113 |
+
|
114 |
+
# If not autism-related, show direct rejection message
|
115 |
+
if not is_autism_related:
|
116 |
+
return get_non_autism_response()
|
117 |
+
|
118 |
+
# Use the corrected query for retrieval
|
119 |
+
q_vec = embed_texts([corrected_query])[0]
|
120 |
+
documents = client.collections.get("user")
|
121 |
+
response = documents.query.near_vector(
|
122 |
+
near_vector=q_vec,
|
123 |
+
limit=5,
|
124 |
+
return_metadata=["distance"]
|
125 |
+
)
|
126 |
+
hits = response.objects
|
127 |
+
context = "\n\n".join(hit.properties["text"] for hit in hits)
|
128 |
+
print(context)
|
129 |
+
|
130 |
+
UserSpecificDocument_prompt = Prompt_template_User_document_prompt.format(new_query=corrected_query, document=context)
|
131 |
+
chat = openai.chat.completions.create(
|
132 |
+
model="Qwen/Qwen3-32B",
|
133 |
+
messages=[
|
134 |
+
{"role": "user", "content": UserSpecificDocument_prompt
|
135 |
+
}
|
136 |
+
],
|
137 |
+
temperature=0,
|
138 |
+
reasoning_effort="none"
|
139 |
+
)
|
140 |
+
initial_answer = chat.choices[0].message.content
|
141 |
+
|
142 |
+
# NEW: Check if the generated answer is sufficiently related to autism
|
143 |
+
from query_utils import check_answer_autism_relevance, get_non_autism_answer_response
|
144 |
+
|
145 |
+
answer_relevance_score = check_answer_autism_relevance(initial_answer)
|
146 |
+
|
147 |
+
# If answer relevance is below 50%, refuse the answer (updated threshold for enhanced scoring)
|
148 |
+
if answer_relevance_score < 50:
|
149 |
+
return get_non_autism_answer_response()
|
150 |
+
|
151 |
+
# If sufficiently autism-related, return the answer
|
152 |
+
return initial_answer
|
153 |
+
# ─── Gradio Interface ─────────────────────────────────────────────────────────
|
154 |
+
with gr.Blocks(title="Document Q&A with Qwen & Weaviate") as demo:
|
155 |
+
gr.Markdown("## Upload a PDF, DOCX, or TXT and then ask away!")
|
156 |
+
with gr.Row():
|
157 |
+
up = gr.File(label="Select document")
|
158 |
+
btn = gr.Button("Ingest")
|
159 |
+
out = gr.Textbox(label="Status", interactive=False)
|
160 |
+
btn.click(fn=lambda f: ingest_file(f.name), inputs=up, outputs=out)
|
161 |
+
with gr.Row():
|
162 |
+
q = gr.Textbox(placeholder="Your question...", lines=2)
|
163 |
+
ask = gr.Button("Ask")
|
164 |
+
ans = gr.Textbox(label="Answer", lines=6, interactive=False)
|
165 |
+
ask.click(fn=answer_question, inputs=q, outputs=ans)
|
166 |
+
if __name__ == "__main__":
|
167 |
+
demo.launch(debug=True)
|
prompt_template.py
ADDED
@@ -0,0 +1,519 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import asyncio
|
3 |
+
import nest_asyncio
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
# A prompt used to divide the part of the specified user experience document.
|
6 |
+
Prompt_template_Chunking = """
|
7 |
+
You are a specialized document processing agent tasked with meticulously cleaning, structuring, and chunking raw text content originating from structured book sources (e.g., medical or diagnostic manuals). Adhere to the following strict guidelines to prepare this content for downstream applications such as training data, search indexing, or diagnostic referencing, ensuring absolute preservation of original semantic meaning and formatting.
|
8 |
+
|
9 |
+
INSTRUCTIONS:
|
10 |
+
1. CONTENT CLEANING:
|
11 |
+
* REMOVE: All headers, footers, and page numbers, code like F84.0 and References.
|
12 |
+
* PRESERVE: All original content, including all section titles, sub-titles, bullet points, numbered lists, and tables. Do not omit or alter any part of the original text.
|
13 |
+
* DO NOT: Summarize, rephrase, paraphrase, or alter any part of the content. Maintain the exact original wording.
|
14 |
+
|
15 |
+
2. CONTENT STRUCTURING:
|
16 |
+
* IDENTIFY HEADERS: Recognize and utilize natural section headers (e.g., "Diagnostic Criteria", "Level 1", "Level 2", "Symptoms", "Treatment", "Prognosis", "Introduction", "Summary", "Methodology") as primary paragraph separators or markers for new logical blocks.
|
17 |
+
* LOGICAL BREAKS: If explicit headers are not present, use logical breaks between distinct topics or complete ideas to segment the content.
|
18 |
+
|
19 |
+
3. CONTENT CHUNKING:
|
20 |
+
* PARAGRAPH LENGTH: Divide the cleaned and structured content into paragraphs, aiming for each paragraph to be approximately 300 to 500 words.
|
21 |
+
* SENTENCE INTEGRITY: Absolutely do not split sentences or separate parts of the same complete idea across different paragraphs. A paragraph must contain whole, coherent ideas.
|
22 |
+
* SHORTER SECTIONS: If a logical section (identified by a header or a complete idea) is naturally shorter than 300 words but represents a complete and standalone piece of information, retain it as-is without trying to pad it or merge it with unrelated content.
|
23 |
+
|
24 |
+
4. TABLE FORMATTING:
|
25 |
+
* PRESERVE EXACTLY: All tables must be preserved in their entirety, including all rows and columns.
|
26 |
+
* MARKDOWN SYNTAX: Format all tables using standard Markdown table syntax.
|
27 |
+
Example:
|
28 |
+
| Column Header A | Column Header B |
|
29 |
+
|-----------------|-----------------|
|
30 |
+
| Row 1 Value A | Row 1 Value B |
|
31 |
+
| Row 2 Value A | Row 2 Value B |
|
32 |
+
|
33 |
+
5. NO INTERPRETATION OR EXTERNAL INFORMATION:
|
34 |
+
* STRICTLY CONTENT-BASED: Do not interpret, rephrase, summarize, infer, rewrite, or add any external information, comments, or your own insights.
|
35 |
+
* OBJECTIVE PROCESSING: Base all decisions and transformations purely on the content provided to you.
|
36 |
+
|
37 |
+
Your response should be the cleaned, structured, and chunked content. Do not include any conversational filler, introductions, or conclusions; just the processed text.
|
38 |
+
|
39 |
+
{pdf_chunk_text}
|
40 |
+
"""
|
41 |
+
######################################################################################################
|
42 |
+
|
43 |
+
Prompt_template_translation = """
|
44 |
+
|
45 |
+
You are a friendly AI assistant. For each incoming user query, do **only** this:
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
1. Detect the query’s language.
|
50 |
+
|
51 |
+
2. If it isn’t English, translate it into English.
|
52 |
+
|
53 |
+
3. If it *is* English (or once translated), check for clarity & grammar. If the phrasing is unclear or ungrammatical, rephrase it into a precise, professional English sentence that preserves the original meaning.
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
**Output**: the final, corrected English query—nothing else.
|
58 |
+
|
59 |
+
Query: {query}
|
60 |
+
|
61 |
+
"""
|
62 |
+
|
63 |
+
#############################################################################################
|
64 |
+
|
65 |
+
Prompt_template_relevance = """
|
66 |
+
|
67 |
+
You are Wisal, an AI assistant specialized in Autism Spectrum Disorders (ASD).
|
68 |
+
|
69 |
+
Given the **corrected English query** from step 1, decide if it’s about ASD (e.g. symptoms, diagnosis, therapy, behavior in ASD).
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
- If **yes**, respond with: `RELATED`
|
74 |
+
|
75 |
+
- If **no**, respond with exactly:
|
76 |
+
|
77 |
+
“Hello I’m Wisal, an AI assistant developed by Compumacy AI, and a knowledgeable Autism specialist.
|
78 |
+
|
79 |
+
If you have any question related to autism please submit a question specifically about autism.”
|
80 |
+
|
81 |
+
|
82 |
+
**Do not** include any other text.
|
83 |
+
|
84 |
+
Query: {corrected_query}
|
85 |
+
|
86 |
+
"""
|
87 |
+
|
88 |
+
#############################################################################################
|
89 |
+
# Prompt_template_relevance = """
|
90 |
+
# You are Wisal, an AI assistant specialized in Autism Spectrum Disorders (ASD).
|
91 |
+
|
92 |
+
# Given a **corrected English query**, your task is to determine if it is specifically related to ASD — such as symptoms, diagnosis, therapies, behaviors, or other autism-related topics.
|
93 |
+
|
94 |
+
# Follow these steps:
|
95 |
+
|
96 |
+
# 1. If the query is clearly about Autism, respond with: `RELATED`
|
97 |
+
|
98 |
+
# 2. If the query is general or unclear, try to rephrase it to be Autism-specific.
|
99 |
+
# Example:
|
100 |
+
# - Original: “What are some ways that parents can reduce their stress?”
|
101 |
+
# - Rephrased: “What are some ways that parents of children with Autism can reduce their stress?”
|
102 |
+
|
103 |
+
# 3. If the query cannot be meaningfully rephrased in the context of Autism, return the polite redirection:
|
104 |
+
# **“Hello I’m Wisal, an AI assistant developed by Compumacy AI, and a knowledgeable Autism specialist.
|
105 |
+
# If you have any question related to autism please submit a question specifically about autism.”**
|
106 |
+
|
107 |
+
# **Do not add or include any other text.**
|
108 |
+
|
109 |
+
# Query: {corrected_query}
|
110 |
+
# """
|
111 |
+
|
112 |
+
#############################################################################################
|
113 |
+
# LLM Generation
|
114 |
+
Prompt_template_LLM_Generation = """
|
115 |
+
You are Wisal, an AI assistant developed by Compumacy AI , and a knowledgeable Autism .And Question-Answering assistant specializing in Autism.When I ask a question related to Autism, respond with a clear, concise, and accurate answer.
|
116 |
+
Question:{new_query}
|
117 |
+
your Answer here
|
118 |
+
"""
|
119 |
+
######################################################################################################
|
120 |
+
|
121 |
+
Prompt_template_Reranker= """
|
122 |
+
You are an impartial evaluator tasked with sorting and outputting text passages based on their semantic relevance to a given query. Your goal is to determine which passages most directly address the core meaning of the query.
|
123 |
+
|
124 |
+
Instructions:
|
125 |
+
You will be given a query and a list of 5 passages, each with a number identifier.
|
126 |
+
Sort and output the passages from most relevant [1] to least relevant [5].
|
127 |
+
Only provide the sorted output using the number identifiers and corresponding passage text.
|
128 |
+
Do not include explanations, rewritten content, or extra commentary.
|
129 |
+
Focus solely on semantic relevance — how directly the passage answers or relates to the query.
|
130 |
+
|
131 |
+
Input Format:
|
132 |
+
Query: {new_query}
|
133 |
+
Passages:
|
134 |
+
{answers_list}
|
135 |
+
|
136 |
+
Output Format:
|
137 |
+
[1] <passage number> <passage text>
|
138 |
+
[2] <passage number> <passage text>
|
139 |
+
[3] <passage number> <passage text>
|
140 |
+
[4] <passage number> <passage text>
|
141 |
+
[5] <passage number> <passage text>
|
142 |
+
"""
|
143 |
+
|
144 |
+
#####################################################################################################
|
145 |
+
|
146 |
+
Prompt_template_Wisal= """
|
147 |
+
You are Wisal, an AI assistant developed by Compumacy AI , and a knowledgeable Autism .
|
148 |
+
Your sole purpose is to provide helpful, respectful, and easy-to-understand answers about Autism Spectrum Disorder (ASD).
|
149 |
+
Always be clear, non-judgmental, and supportive.
|
150 |
+
Question: {new_query}
|
151 |
+
Answer the question based only on the provided context:
|
152 |
+
{document}
|
153 |
+
|
154 |
+
"""
|
155 |
+
######################################################################################################################
|
156 |
+
Prompt_template_paraphrasing= """
|
157 |
+
Rephrase the following passage using different words but keep the original meaning. Focus on directness and vary the phrasing for the cause.
|
158 |
+
Only give one single rephrased version — no explanations, no options.
|
159 |
+
Text : {document}
|
160 |
+
|
161 |
+
"""
|
162 |
+
|
163 |
+
#########################################################################################################
|
164 |
+
Prompt_template_Halluciations= """
|
165 |
+
Evaluate how confident you are that the given Answer is a good and accurate response to the Question.
|
166 |
+
Please assign a Score using the following 5-point scale:
|
167 |
+
1: You are not confident that the Answer addresses the Question at all, the Answer may be entirely off-topic or irrelevant to the Question.
|
168 |
+
2: You have low confidence that the Answer addresses the Question, there are doubts and uncertainties about the accuracy of the Answer.
|
169 |
+
3: You have moderate confidence that the Answer addresses the Question, the Answer seems reasonably accurate and on-topic, but with room for improvement.
|
170 |
+
4: You have high confidence that the Answer addresses the Question, the Answer provides accurate information that addresses most of the Question.
|
171 |
+
5: You are extremely confident that the Answer addresses the Question, the Answer is highly accurate, relevant, and effectively addresses the Question in its entirety.
|
172 |
+
The output should strictly use the following template: Explanation: [provide a brief reasoning you used to derive the rating Score] and then write 'Score: <rating>' on the last line.
|
173 |
+
Question: {new_query}
|
174 |
+
Context:{document}
|
175 |
+
Answer: {answer}
|
176 |
+
"""
|
177 |
+
############################################################################################################
|
178 |
+
|
179 |
+
Prompt_template_Translate_to_original= """
|
180 |
+
You are a translation assistant. Whenever you receive a user Question, determine its language. Then take your Answer (which is currently in English or any other language) and:
|
181 |
+
If the Question is in Arabic, translate the Answer into Arabic.
|
182 |
+
Otherwise, translate the Answer into the same language as the Question.
|
183 |
+
Requirements:
|
184 |
+
Preserve the original tone and style exactly.
|
185 |
+
Don’t add, remove, or change any content beyond translating.
|
186 |
+
Do not include any extra commentary or explanations—output only the translated text.
|
187 |
+
Question: {query}
|
188 |
+
Answer : {document}
|
189 |
+
"""
|
190 |
+
|
191 |
+
############################################################################################################
|
192 |
+
Prompt_template_User_document_prompt = """
|
193 |
+
|
194 |
+
You are Wisal, an AI assistant developed by Compumacy AI, specialized in autism. When a user asks a question, you must respond only by quoting verbatim from the provided document(s). Do not add any of your own words, summaries, explanations, or interpretations. If the answer cannot be found in the documents, reply with exactly:
|
195 |
+
“Answer not found in the document.”
|
196 |
+
Question: {new_query}
|
197 |
+
Answer the question based only on the provided context:
|
198 |
+
{document}
|
199 |
+
|
200 |
+
|
201 |
+
"""
|
202 |
+
# Prompt_template_Reranker= """
|
203 |
+
# You are an expert evaluator tasked with rating how well a given document matches a user query. Assess the document across three specific dimensions and provide a total relevance score out of 10.
|
204 |
+
|
205 |
+
# Please consider the following criteria:
|
206 |
+
|
207 |
+
# 1. Direct Answer Relevance (0–5 points):
|
208 |
+
# - Does the document directly address the core of the query?
|
209 |
+
# - Higher scores reflect more focused and pertinent content.
|
210 |
+
# - A score of 5 means the answer is highly aligned with the query.
|
211 |
+
|
212 |
+
# 2. Information Completeness (0–3 points):
|
213 |
+
# - Does the document provide sufficient detail or context to fully answer the question?
|
214 |
+
# - Is the response thorough and informative, rather than partial or vague?
|
215 |
+
|
216 |
+
# 3. Factual Accuracy (0–2 points):
|
217 |
+
# - Are the statements in the document factually correct and reliable?
|
218 |
+
# - Deduct points if any part of the document contains inaccuracies, outdated info, or misleading claims.
|
219 |
+
# Query:{query}
|
220 |
+
|
221 |
+
# Document:{document}
|
222 |
+
|
223 |
+
# """
|
224 |
+
|
225 |
+
# Prompt_template_relevant= """
|
226 |
+
# You are a grader assessing relevance of a retrieved document to a user question.
|
227 |
+
# Here is the retrieved document: {document}
|
228 |
+
# Here is the user question: {new_query}
|
229 |
+
# If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
|
230 |
+
# Give a binary score 'yes' or 'no' to indicate whether the document is relevant to the question.
|
231 |
+
# """
|
232 |
+
|
233 |
+
# Prompt_template_Reranker_relevant = """
|
234 |
+
# You are given a user question and two responses from two AI assistants. Your task is to act as an impartial judge
|
235 |
+
# and evaluate which response better follows the user's instructions and provides a higher-quality answer.
|
236 |
+
# First, provide your reasoning within <think> and </think> tags. This should include your evaluation criteria for
|
237 |
+
# a high-quality response, a detailed comparison of the two responses, and when helpful, a reference answer as
|
238 |
+
# part of your evaluation. Be explicit in your thought process, referencing your criteria and explaining how each
|
239 |
+
# response aligns with or deviates from them.
|
240 |
+
# Avoid any position biases and ensure that the order in which the responses were presented does not influence your
|
241 |
+
# decision. Do not allow the length of the responses to influence your evaluation. Do not favor certain names of
|
242 |
+
# the assistants. Be as objective as possible.
|
243 |
+
# Finally, assign the assistant's response a score from 0 to 10, using either an integer or a decimal with up
|
244 |
+
# to 0.1 precision, with a higher score indicating a higher-quality response that better satisfies the criteria.
|
245 |
+
# Enclose the scores within the tags <score_A> </score_A>, and <score_B> </score_B>.
|
246 |
+
# Format your output like this:
|
247 |
+
# <think> your_thinking_process </think>
|
248 |
+
# <score_A> your_score_a </score_A> <score_B> your_score_b </score_B>
|
249 |
+
# Below are the user's question and the two responses:
|
250 |
+
# [User Question]
|
251 |
+
# {instruction}
|
252 |
+
# {new_query}
|
253 |
+
# [The Start of Assistant A's Answer]
|
254 |
+
# {web_answer}
|
255 |
+
# [The End of Assistant A's Answer]
|
256 |
+
# [The Start of Assistant B's Answer]
|
257 |
+
# {generated_answer}
|
258 |
+
# [The End of Assistant B's Answer]
|
259 |
+
# """
|
260 |
+
|
261 |
+
|
262 |
+
|
263 |
+
# Prompt_template_Evaluation= """
|
264 |
+
# SYSTEM: You are a mental health concept knowledge evaluator. Your task is to assess how accurately, completely, and clearly the candidate's response defines the concept provided in the "Answer" field, taking into account the clinical context in the "History."
|
265 |
+
# USER:
|
266 |
+
# INSTRUCTIONS:
|
267 |
+
|
268 |
+
# 1. Read the "Answer" — this is the clinical concept or term to define (e.g., "Loss of interest or pleasure in activities…").
|
269 |
+
# 2. Read the "Candidate Response" — the model's definition/explanation of that concept.
|
270 |
+
# 3. Evaluate the response on:
|
271 |
+
# Definition Accuracy & Completeness: Are all core features of the concept present and correctly described?
|
272 |
+
# Clarity & Precision: Is the explanation clear, unambiguous, and clinically precise?
|
273 |
+
# Depth of Explanation: Does it include relevant examples or elaborations that demonstrate understanding?
|
274 |
+
# Relevance & Focus: Does it avoid irrelevant details and stick to the concept at hand?
|
275 |
+
# 4. Provide a single numeric score between 0 and 100:
|
276 |
+
# 0:No meaningful overlap—incorrect or missing core elements.
|
277 |
+
# 50:Some correct elements but major omissions or inaccuracies.
|
278 |
+
# 75: Mostly correct with only minor gaps or imprecisions.
|
279 |
+
# 90:Very close to a perfect definition; only small details missing.
|
280 |
+
# 100:Perfectly accurate, complete, and clear.
|
281 |
+
|
282 |
+
# Do not justify or explain—output **only** the numeric score.
|
283 |
+
|
284 |
+
# Now, evaluate the following:
|
285 |
+
# Concept to Define (Correct_Answer):
|
286 |
+
# {answer}
|
287 |
+
# Candidate Response (Response_Answer):
|
288 |
+
# {final_answer}
|
289 |
+
# """
|
290 |
+
|
291 |
+
############################################################################################################
|
292 |
+
# ENHANCED PROMPTS FOR AUTISM CONFIDENCE SCORING AND AUTOMATIC REWRITING
|
293 |
+
############################################################################################################
|
294 |
+
|
295 |
+
Prompt_template_autism_confidence = """
|
296 |
+
You are an autism specialist AI evaluating how related a query is to autism or Autism Spectrum Disorders (ASD).
|
297 |
+
|
298 |
+
Analyze the following query and provide a confidence score from 0 to 100 indicating how related it is to autism, ASD, or autism-related topics.
|
299 |
+
|
300 |
+
ENHANCED SCORING GUIDELINES:
|
301 |
+
|
302 |
+
**90-100: DIRECTLY AUTISM-RELATED**
|
303 |
+
- Explicitly mentions autism, ASD, Asperger's, autistic individuals
|
304 |
+
- Autism-specific therapies (ABA, TEACCH, social skills training)
|
305 |
+
- Autism diagnostic criteria or screening tools
|
306 |
+
- Autism-specific accommodations or support strategies
|
307 |
+
|
308 |
+
**75-89: HIGHLY AUTISM-RELEVANT (Core Symptoms & Characteristics)**
|
309 |
+
- Social communication difficulties, pragmatic language issues
|
310 |
+
- Sensory processing disorders, sensory seeking/avoiding behaviors
|
311 |
+
- Repetitive behaviors, stimming, self-regulation strategies
|
312 |
+
- Special interests, restricted interests, hyperfocus
|
313 |
+
- Executive functioning challenges in developmental context
|
314 |
+
- Theory of mind, perspective-taking difficulties
|
315 |
+
|
316 |
+
**60-74: SIGNIFICANTLY AUTISM-RELEVANT (Common Comorbidities & Related Issues)**
|
317 |
+
- Depression in children/adolescents/adults (very common in autism)
|
318 |
+
- Anxiety disorders, social anxiety, specific phobias
|
319 |
+
- ADHD symptoms, attention and hyperactivity issues
|
320 |
+
- Sleep disorders, sleep difficulties in neurodevelopmental context
|
321 |
+
- Mood regulation, emotional dysregulation, meltdowns
|
322 |
+
- Self-harm behaviors, aggression in developmental context
|
323 |
+
- Eating difficulties, food selectivity, feeding issues
|
324 |
+
- Gastrointestinal problems in neurodevelopmental context
|
325 |
+
- Toileting issues, developmental delays in self-care
|
326 |
+
|
327 |
+
**45-59: MODERATELY AUTISM-RELEVANT (Broader Developmental & Family Concerns)**
|
328 |
+
- General child development questions (when could apply to autism)
|
329 |
+
- Parent stress, family coping with special needs
|
330 |
+
- School accommodations, IEP/504 plans
|
331 |
+
- Transition planning, life skills development
|
332 |
+
- Communication aids, assistive technology
|
333 |
+
- Behavioral interventions, positive behavior support
|
334 |
+
- Inclusion strategies, peer relationships
|
335 |
+
|
336 |
+
**30-44: SOMEWHAT AUTISM-RELEVANT (General Topics with Potential Autism Applications)**
|
337 |
+
- General behavioral challenges in children
|
338 |
+
- Learning differences, cognitive development
|
339 |
+
- Social skills development (general)
|
340 |
+
- Mental health in children/adolescents
|
341 |
+
- Developmental milestones, early childhood development
|
342 |
+
- Family therapy, counseling approaches
|
343 |
+
|
344 |
+
**0-29: NOT AUTISM-RELEVANT**
|
345 |
+
- Unrelated medical conditions (unless neurological/developmental)
|
346 |
+
- General adult topics without developmental context
|
347 |
+
- Physical health unrelated to common autism comorbidities
|
348 |
+
- Non-developmental behavioral issues
|
349 |
+
- Completely unrelated topics (weather, cooking, sports, etc.)
|
350 |
+
|
351 |
+
**SPECIAL CONSIDERATIONS:**
|
352 |
+
- Questions about depression, anxiety, ADHD should score 60+ due to high comorbidity rates
|
353 |
+
- Sensory issues, sleep problems, mood regulation should score 65+
|
354 |
+
- Parent/caregiver stress and coping should score 50+
|
355 |
+
- School and educational topics should score 45+
|
356 |
+
- If query mentions children, adolescents, or developmental context, add 10-15 points
|
357 |
+
- Behavioral questions in pediatric context should score 45+
|
358 |
+
|
359 |
+
**EXAMPLES OF INDIRECT BUT HIGHLY RELEVANT QUERIES:**
|
360 |
+
- "My child has frequent meltdowns" → 70-80 (common autism behavior)
|
361 |
+
- "How to help with depression in teenagers" → 65-75 (very common in autism)
|
362 |
+
- "Sleep problems in children" → 65 (extremely common autism comorbidity)
|
363 |
+
- "ADHD and focus issues" → 65 (high comorbidity with autism)
|
364 |
+
- "Anxiety in social situations" → 70 (core autism challenge)
|
365 |
+
- "Eating problems in kids" → 60 (common autism issue)
|
366 |
+
- "Parent stress with special needs child" → 55 (autism family context)
|
367 |
+
|
368 |
+
Query: {query}
|
369 |
+
|
370 |
+
Consider the context, age implications, and potential autism connections before scoring.
|
371 |
+
Output only the numeric confidence score (0-100):
|
372 |
+
"""
|
373 |
+
|
374 |
+
############################################################################################################
|
375 |
+
|
376 |
+
Prompt_template_autism_rewriter = """
|
377 |
+
You are an autism specialist AI assistant. Your task is to rewrite queries to make them specifically about autism or Autism Spectrum Disorders (ASD) while preserving the original intent and recognizing common comorbidities.
|
378 |
+
|
379 |
+
ENHANCED REWRITING GUIDELINES:
|
380 |
+
|
381 |
+
**For Direct Autism Topics (85-100% relevance):**
|
382 |
+
- Keep as-is, just ensure clarity and proper terminology
|
383 |
+
|
384 |
+
**For Core Autism Symptoms (70-84% relevance):**
|
385 |
+
- Frame within autism context while preserving specificity
|
386 |
+
- Examples: "sensory issues" → "sensory processing challenges in autism"
|
387 |
+
|
388 |
+
**For Comorbid Conditions (55-69% relevance):**
|
389 |
+
- Explicitly connect to autism while maintaining the specific condition focus
|
390 |
+
- Recognize high comorbidity rates and autism-specific aspects
|
391 |
+
|
392 |
+
**For Developmental/Family Concerns (40-54% relevance):**
|
393 |
+
- Frame within autism family/developmental context
|
394 |
+
- Emphasize autism-specific challenges and considerations
|
395 |
+
|
396 |
+
**COMORBIDITY-AWARE REWRITING EXAMPLES:**
|
397 |
+
|
398 |
+
Depression/Mental Health:
|
399 |
+
- "How to help with depression in teenagers?" → "How to support teenagers with autism who are experiencing depression?"
|
400 |
+
- "Managing anxiety" → "Managing anxiety in individuals with autism spectrum disorders"
|
401 |
+
|
402 |
+
ADHD/Attention Issues:
|
403 |
+
- "ADHD symptoms in children" → "Understanding ADHD symptoms in children with autism (dual diagnosis)"
|
404 |
+
- "Focus and attention problems" → "Addressing attention and focus challenges in autism"
|
405 |
+
|
406 |
+
Sleep & Behavioral Issues:
|
407 |
+
- "Sleep problems in kids" → "Managing sleep difficulties in children with autism"
|
408 |
+
- "Child having meltdowns" → "Understanding and managing meltdowns in autism"
|
409 |
+
- "Aggressive behavior" → "Addressing aggressive behaviors in individuals with autism"
|
410 |
+
|
411 |
+
Sensory & Regulatory Issues:
|
412 |
+
- "Sensory processing problems" → "Sensory processing disorders in autism spectrum conditions"
|
413 |
+
- "Emotional regulation" → "Supporting emotional regulation in autism"
|
414 |
+
|
415 |
+
Family & Educational:
|
416 |
+
- "Parent stress with special needs child" → "Supporting parents of children with autism: managing stress and building resilience"
|
417 |
+
- "School accommodations" → "Educational accommodations and supports for students with autism"
|
418 |
+
- "Social skills development" → "Social skills training and development for individuals with autism"
|
419 |
+
|
420 |
+
Feeding & Development:
|
421 |
+
- "Eating problems in children" → "Addressing feeding difficulties and food selectivity in autism"
|
422 |
+
- "Developmental delays" → "Understanding developmental patterns and delays in autism spectrum disorders"
|
423 |
+
|
424 |
+
**REWRITING PRINCIPLES:**
|
425 |
+
1. Always maintain the specific concern (depression, sleep, behavior, etc.)
|
426 |
+
2. Explicitly connect to autism context
|
427 |
+
3. Use person-first or identity-first language appropriately
|
428 |
+
4. Preserve the question type and intent
|
429 |
+
5. Add autism-specific considerations when relevant
|
430 |
+
6. For comorbid conditions, acknowledge the dual nature
|
431 |
+
|
432 |
+
**AVOID:**
|
433 |
+
- Generic "autism-related" phrases
|
434 |
+
- Losing the specific concern in overly broad rewriting
|
435 |
+
- Ignoring the comorbidity aspect
|
436 |
+
- Making assumptions about causation
|
437 |
+
|
438 |
+
Original Query: {query}
|
439 |
+
|
440 |
+
Rewritten autism-specific query:
|
441 |
+
"""
|
442 |
+
|
443 |
+
############################################################################################################
|
444 |
+
|
445 |
+
Prompt_template_answer_autism_relevance = """
|
446 |
+
You are an autism specialist evaluating whether an answer is sufficiently related to autism or Autism Spectrum Disorders (ASD).
|
447 |
+
|
448 |
+
Analyze the following answer and determine if it is adequately focused on autism-related content, including common comorbidities and associated conditions.
|
449 |
+
|
450 |
+
ENHANCED AUTISM-RELEVANCE CRITERIA:
|
451 |
+
|
452 |
+
**85-100: HIGHLY AUTISM-RELEVANT**
|
453 |
+
- Directly mentions autism, ASD, autistic individuals, or autism-specific terms
|
454 |
+
- Discusses autism-specific interventions, therapies, or strategies
|
455 |
+
- Addresses autism diagnostic criteria or assessment
|
456 |
+
- Covers autism-specific accommodations or support systems
|
457 |
+
|
458 |
+
**70-84: STRONGLY AUTISM-RELEVANT**
|
459 |
+
- Discusses core autism characteristics (social communication, sensory processing, repetitive behaviors)
|
460 |
+
- Addresses autism-related developmental patterns
|
461 |
+
- Covers autism-specific educational or therapeutic approaches
|
462 |
+
- Discusses autism family dynamics or support strategies
|
463 |
+
|
464 |
+
**55-69: SIGNIFICANTLY AUTISM-RELEVANT**
|
465 |
+
- Addresses common autism comorbidities IN DEVELOPMENTAL CONTEXT:
|
466 |
+
* Depression, anxiety, or mood disorders in children/adolescents
|
467 |
+
* ADHD symptoms or attention challenges in developmental context
|
468 |
+
* Sleep disorders with sensory or behavioral components
|
469 |
+
* Feeding/eating difficulties with sensory aspects
|
470 |
+
* Self-regulation or emotional dysregulation issues
|
471 |
+
- Discusses developmental delays or milestones with autism implications
|
472 |
+
- Addresses sensory processing issues (even without explicit autism mention)
|
473 |
+
- Covers behavioral challenges common in autism (meltdowns, aggression, self-harm)
|
474 |
+
|
475 |
+
**40-54: MODERATELY AUTISM-RELEVANT**
|
476 |
+
- Discusses general developmental topics that frequently apply to autism
|
477 |
+
- Addresses parent/caregiver stress in special needs context
|
478 |
+
- Covers educational accommodations or special needs support
|
479 |
+
- Discusses social skills development or peer relationships in developmental context
|
480 |
+
- Addresses communication challenges or assistive technology
|
481 |
+
|
482 |
+
**25-39: SOMEWHAT AUTISM-RELEVANT**
|
483 |
+
- General child development information that could apply to autism
|
484 |
+
- Basic behavioral strategies that might be relevant
|
485 |
+
- General mental health information in pediatric context
|
486 |
+
- Family therapy or support approaches
|
487 |
+
|
488 |
+
**0-24: NOT SUFFICIENTLY AUTISM-RELEVANT**
|
489 |
+
- Purely general medical information without developmental context
|
490 |
+
- Adult-focused content without autism or developmental relevance
|
491 |
+
- Generic advice without special needs consideration
|
492 |
+
- Completely unrelated topics
|
493 |
+
|
494 |
+
**SPECIAL AUTISM-RELEVANCE INDICATORS:**
|
495 |
+
✓ Mentions sensory processing, regulation, or sensory-seeking/avoiding
|
496 |
+
✓ Discusses meltdowns, stimming, or repetitive behaviors
|
497 |
+
✓ Addresses social communication or pragmatic language
|
498 |
+
✓ Covers executive functioning in developmental context
|
499 |
+
✓ Mentions special interests or restricted interests
|
500 |
+
✓ Discusses transition planning or life skills development
|
501 |
+
✓ Addresses inclusion, accommodations, or accessibility
|
502 |
+
✓ Covers co-occurring conditions with autism-specific considerations
|
503 |
+
|
504 |
+
**COMORBIDITY CONTEXT CLUES:**
|
505 |
+
- Depression/anxiety discussed with developmental, social, or sensory factors
|
506 |
+
- ADHD mentioned with autism-like symptoms or dual diagnosis considerations
|
507 |
+
- Sleep issues connected to sensory processing or routine needs
|
508 |
+
- Feeding problems involving texture, sensory, or routine aspects
|
509 |
+
- Behavioral issues described with developmental or environmental triggers
|
510 |
+
|
511 |
+
**THRESHOLD FOR ACCEPTANCE:**
|
512 |
+
- 50-100: Adequately autism-relevant, should be provided
|
513 |
+
- 0-49: Not sufficiently autism-focused, should be refused
|
514 |
+
|
515 |
+
Answer to evaluate: {answer}
|
516 |
+
|
517 |
+
Consider the developmental context, autism comorbidities, and indirect autism connections.
|
518 |
+
Output only the numeric relevance score (0-100):
|
519 |
+
"""
|
query_utils.py
ADDED
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Utility functions for query processing and rewriting.
|
3 |
+
"""
|
4 |
+
import time
|
5 |
+
import logging
|
6 |
+
from openai import OpenAI
|
7 |
+
from prompt_template import (
|
8 |
+
Prompt_template_translation,
|
9 |
+
Prompt_template_relevance,
|
10 |
+
Prompt_template_autism_confidence,
|
11 |
+
Prompt_template_autism_rewriter,
|
12 |
+
Prompt_template_answer_autism_relevance
|
13 |
+
)
|
14 |
+
|
15 |
+
# Set up logging
|
16 |
+
logging.basicConfig(level=logging.INFO)
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
|
19 |
+
# Initialize OpenAI client
|
20 |
+
DEEPINFRA_API_KEY = "285LUJulGIprqT6hcPhiXtcrphU04FG4"
|
21 |
+
openai = OpenAI(
|
22 |
+
api_key=DEEPINFRA_API_KEY,
|
23 |
+
base_url="https://api.deepinfra.com/v1/openai",
|
24 |
+
)
|
25 |
+
|
26 |
+
def call_llm(model: str, messages: list[dict], temperature: float = 0.0, timeout: int = 30, **kwargs) -> str:
|
27 |
+
"""Call the LLM with given messages and return the response."""
|
28 |
+
try:
|
29 |
+
logger.info(f"Making API call to {model} with timeout {timeout}s")
|
30 |
+
start_time = time.time()
|
31 |
+
|
32 |
+
resp = openai.chat.completions.create(
|
33 |
+
model=model,
|
34 |
+
messages=messages,
|
35 |
+
temperature=temperature,
|
36 |
+
timeout=timeout,
|
37 |
+
**kwargs
|
38 |
+
)
|
39 |
+
|
40 |
+
elapsed = time.time() - start_time
|
41 |
+
logger.info(f"API call completed in {elapsed:.2f}s")
|
42 |
+
|
43 |
+
return resp.choices[0].message.content.strip()
|
44 |
+
|
45 |
+
except Exception as e:
|
46 |
+
logger.error(f"API call failed: {e}")
|
47 |
+
# Return fallback response
|
48 |
+
if "translation" in str(messages).lower():
|
49 |
+
# For translation, return the original query
|
50 |
+
return messages[0]["content"].split("Query: ")[-1] if "Query: " in messages[0]["content"] else "Error"
|
51 |
+
else:
|
52 |
+
# For relevance, assume not related
|
53 |
+
return "0"
|
54 |
+
|
55 |
+
def enhanced_autism_relevance_check(query: str) -> dict:
|
56 |
+
"""
|
57 |
+
Enhanced autism relevance checking with detailed analysis.
|
58 |
+
Returns a dictionary with score, category, and reasoning.
|
59 |
+
"""
|
60 |
+
try:
|
61 |
+
logger.info(f"Enhanced autism relevance check for: '{query[:50]}...'")
|
62 |
+
|
63 |
+
# Use the enhanced confidence prompt
|
64 |
+
confidence_prompt = Prompt_template_autism_confidence.format(query=query)
|
65 |
+
response = call_llm(
|
66 |
+
model="Qwen/Qwen3-32B",
|
67 |
+
messages=[{"role": "user", "content": confidence_prompt}],
|
68 |
+
reasoning_effort="none",
|
69 |
+
timeout=15
|
70 |
+
)
|
71 |
+
|
72 |
+
# Extract numeric score
|
73 |
+
confidence_score = 0
|
74 |
+
try:
|
75 |
+
import re
|
76 |
+
numbers = re.findall(r'\d+', response)
|
77 |
+
if numbers:
|
78 |
+
confidence_score = int(numbers[0])
|
79 |
+
confidence_score = max(0, min(100, confidence_score))
|
80 |
+
except:
|
81 |
+
confidence_score = 0
|
82 |
+
|
83 |
+
# Determine category and action based on enhanced scoring
|
84 |
+
if confidence_score >= 85:
|
85 |
+
category = "directly_autism_related"
|
86 |
+
action = "accept_as_is"
|
87 |
+
reasoning = "Directly mentions autism or autism-specific topics"
|
88 |
+
elif confidence_score >= 70:
|
89 |
+
category = "highly_autism_relevant"
|
90 |
+
action = "accept_as_is"
|
91 |
+
reasoning = "Core autism symptoms or characteristics"
|
92 |
+
elif confidence_score >= 55:
|
93 |
+
category = "significantly_autism_relevant"
|
94 |
+
action = "rewrite_for_autism"
|
95 |
+
reasoning = "Common comorbidity or autism-related issue"
|
96 |
+
elif confidence_score >= 40:
|
97 |
+
category = "moderately_autism_relevant"
|
98 |
+
action = "rewrite_for_autism"
|
99 |
+
reasoning = "Broader developmental or family concern related to autism"
|
100 |
+
elif confidence_score >= 25:
|
101 |
+
category = "somewhat_autism_relevant"
|
102 |
+
action = "conditional_rewrite"
|
103 |
+
reasoning = "General topic with potential autism applications"
|
104 |
+
else:
|
105 |
+
category = "not_autism_relevant"
|
106 |
+
action = "reject"
|
107 |
+
reasoning = "Not related to autism or autism care"
|
108 |
+
|
109 |
+
result = {
|
110 |
+
"score": confidence_score,
|
111 |
+
"category": category,
|
112 |
+
"action": action,
|
113 |
+
"reasoning": reasoning
|
114 |
+
}
|
115 |
+
|
116 |
+
logger.info(f"Enhanced relevance result: {result}")
|
117 |
+
return result
|
118 |
+
|
119 |
+
except Exception as e:
|
120 |
+
logger.error(f"Error in enhanced_autism_relevance_check: {e}")
|
121 |
+
return {
|
122 |
+
"score": 0,
|
123 |
+
"category": "error",
|
124 |
+
"action": "reject",
|
125 |
+
"reasoning": "Error during processing"
|
126 |
+
}
|
127 |
+
|
128 |
+
def check_autism_confidence(query: str) -> int:
|
129 |
+
"""
|
130 |
+
Check autism relevance confidence score (0-100).
|
131 |
+
Returns the confidence score as an integer.
|
132 |
+
"""
|
133 |
+
try:
|
134 |
+
logger.info(f"Checking autism confidence for query: '{query[:50]}...'")
|
135 |
+
|
136 |
+
confidence_prompt = Prompt_template_autism_confidence.format(query=query)
|
137 |
+
response = call_llm(
|
138 |
+
model="Qwen/Qwen3-32B",
|
139 |
+
messages=[{"role": "user", "content": confidence_prompt}],
|
140 |
+
reasoning_effort="none",
|
141 |
+
timeout=15
|
142 |
+
)
|
143 |
+
|
144 |
+
# Extract numeric score from response
|
145 |
+
confidence_score = 0
|
146 |
+
try:
|
147 |
+
# Try to extract number from response
|
148 |
+
import re
|
149 |
+
numbers = re.findall(r'\d+', response)
|
150 |
+
if numbers:
|
151 |
+
confidence_score = int(numbers[0])
|
152 |
+
# Ensure it's within valid range
|
153 |
+
confidence_score = max(0, min(100, confidence_score))
|
154 |
+
else:
|
155 |
+
logger.warning(f"No numeric score found in response: {response}")
|
156 |
+
confidence_score = 0
|
157 |
+
except:
|
158 |
+
logger.error(f"Failed to parse confidence score from: {response}")
|
159 |
+
confidence_score = 0
|
160 |
+
|
161 |
+
logger.info(f"Autism confidence score: {confidence_score}")
|
162 |
+
return confidence_score
|
163 |
+
|
164 |
+
except Exception as e:
|
165 |
+
logger.error(f"Error in check_autism_confidence: {e}")
|
166 |
+
return 0
|
167 |
+
|
168 |
+
def rewrite_query_for_autism(query: str) -> str:
|
169 |
+
"""
|
170 |
+
Automatically rewrite a query to be autism-specific.
|
171 |
+
"""
|
172 |
+
try:
|
173 |
+
logger.info(f"Rewriting query for autism: '{query[:50]}...'")
|
174 |
+
|
175 |
+
rewrite_prompt = Prompt_template_autism_rewriter.format(query=query)
|
176 |
+
rewritten_query = call_llm(
|
177 |
+
model="Qwen/Qwen3-32B",
|
178 |
+
messages=[{"role": "user", "content": rewrite_prompt}],
|
179 |
+
reasoning_effort="none",
|
180 |
+
timeout=15
|
181 |
+
)
|
182 |
+
|
183 |
+
if rewritten_query == "Error" or len(rewritten_query.strip()) == 0:
|
184 |
+
logger.warning("Rewriting failed, using fallback")
|
185 |
+
rewritten_query = f"How does autism relate to {query.lower()}?"
|
186 |
+
else:
|
187 |
+
rewritten_query = rewritten_query.strip()
|
188 |
+
|
189 |
+
logger.info(f"Query rewritten to: '{rewritten_query[:50]}...'")
|
190 |
+
return rewritten_query
|
191 |
+
|
192 |
+
except Exception as e:
|
193 |
+
logger.error(f"Error in rewrite_query_for_autism: {e}")
|
194 |
+
return f"How does autism relate to {query.lower()}?"
|
195 |
+
|
196 |
+
def check_answer_autism_relevance(answer: str) -> int:
|
197 |
+
"""
|
198 |
+
Check if an answer is sufficiently related to autism (0-100 score).
|
199 |
+
Used for document-based queries to filter non-autism answers.
|
200 |
+
"""
|
201 |
+
try:
|
202 |
+
logger.info(f"Checking answer autism relevance for: '{answer[:50]}...'")
|
203 |
+
|
204 |
+
relevance_prompt = Prompt_template_answer_autism_relevance.format(answer=answer)
|
205 |
+
response = call_llm(
|
206 |
+
model="Qwen/Qwen3-32B",
|
207 |
+
messages=[{"role": "user", "content": relevance_prompt}],
|
208 |
+
reasoning_effort="none",
|
209 |
+
timeout=15
|
210 |
+
)
|
211 |
+
|
212 |
+
# Extract numeric score from response
|
213 |
+
relevance_score = 0
|
214 |
+
try:
|
215 |
+
import re
|
216 |
+
numbers = re.findall(r'\d+', response)
|
217 |
+
if numbers:
|
218 |
+
relevance_score = int(numbers[0])
|
219 |
+
relevance_score = max(0, min(100, relevance_score))
|
220 |
+
else:
|
221 |
+
logger.warning(f"No numeric score found in response: {response}")
|
222 |
+
relevance_score = 0
|
223 |
+
except:
|
224 |
+
logger.error(f"Failed to parse relevance score from: {response}")
|
225 |
+
relevance_score = 0
|
226 |
+
|
227 |
+
logger.info(f"Answer autism relevance score: {relevance_score}")
|
228 |
+
return relevance_score
|
229 |
+
|
230 |
+
except Exception as e:
|
231 |
+
logger.error(f"Error in check_answer_autism_relevance: {e}")
|
232 |
+
return 0
|
233 |
+
|
234 |
+
def process_query_for_rewrite(query: str) -> tuple[str, bool, str]:
|
235 |
+
"""
|
236 |
+
Enhanced query processing with sophisticated autism relevance detection.
|
237 |
+
|
238 |
+
NEW ENHANCED LOGIC:
|
239 |
+
1. Score 85-100 → Directly autism-related, use as-is
|
240 |
+
2. Score 70-84 → Highly autism-relevant (core symptoms), use as-is
|
241 |
+
3. Score 55-69 → Significantly autism-relevant (comorbidities), rewrite for autism
|
242 |
+
4. Score 40-54 → Moderately autism-relevant, rewrite for autism
|
243 |
+
5. Score 25-39 → Somewhat relevant, conditional rewrite (ask user or auto-rewrite)
|
244 |
+
6. Score 0-24 → Not autism-related, reject
|
245 |
+
|
246 |
+
Returns: (processed_query, is_autism_related, rewritten_query_if_needed)
|
247 |
+
"""
|
248 |
+
try:
|
249 |
+
logger.info(f"Processing query with enhanced confidence logic: '{query[:50]}...'")
|
250 |
+
start_time = time.time()
|
251 |
+
|
252 |
+
# Step 1: Translate and correct the query
|
253 |
+
logger.info("Step 1: Translating/correcting query")
|
254 |
+
corrected_query = call_llm(
|
255 |
+
model="Qwen/Qwen3-32B",
|
256 |
+
messages=[{"role": "user", "content": Prompt_template_translation.format(query=query)}],
|
257 |
+
reasoning_effort="none",
|
258 |
+
timeout=15
|
259 |
+
)
|
260 |
+
|
261 |
+
if corrected_query == "Error":
|
262 |
+
logger.warning("Translation failed, using original query")
|
263 |
+
corrected_query = query
|
264 |
+
|
265 |
+
# Step 2: Get enhanced autism relevance analysis
|
266 |
+
logger.info("Step 2: Enhanced autism relevance checking")
|
267 |
+
relevance_result = enhanced_autism_relevance_check(corrected_query)
|
268 |
+
|
269 |
+
confidence_score = relevance_result["score"]
|
270 |
+
action = relevance_result["action"]
|
271 |
+
reasoning = relevance_result["reasoning"]
|
272 |
+
|
273 |
+
logger.info(f"Relevance analysis: {confidence_score}% - {reasoning}")
|
274 |
+
|
275 |
+
# Step 3: Take action based on enhanced analysis
|
276 |
+
if action == "accept_as_is":
|
277 |
+
logger.info(f"High relevance ({confidence_score}%) - accepting as-is: {reasoning}")
|
278 |
+
return corrected_query, True, ""
|
279 |
+
|
280 |
+
elif action == "rewrite_for_autism":
|
281 |
+
logger.info(f"Moderate relevance ({confidence_score}%) - rewriting for autism: {reasoning}")
|
282 |
+
rewritten_query = rewrite_query_for_autism(corrected_query)
|
283 |
+
return rewritten_query, True, ""
|
284 |
+
|
285 |
+
elif action == "conditional_rewrite":
|
286 |
+
# For somewhat relevant queries, automatically rewrite (could be enhanced with user confirmation)
|
287 |
+
logger.info(f"Low-moderate relevance ({confidence_score}%) - conditionally rewriting: {reasoning}")
|
288 |
+
rewritten_query = rewrite_query_for_autism(corrected_query)
|
289 |
+
return rewritten_query, True, ""
|
290 |
+
|
291 |
+
else: # action == "reject"
|
292 |
+
logger.info(f"Low relevance ({confidence_score}%) - rejecting: {reasoning}")
|
293 |
+
return corrected_query, False, ""
|
294 |
+
|
295 |
+
elapsed = time.time() - start_time
|
296 |
+
logger.info(f"Enhanced query processing completed in {elapsed:.2f}s")
|
297 |
+
|
298 |
+
except Exception as e:
|
299 |
+
logger.error(f"Error in process_query_for_rewrite: {e}")
|
300 |
+
# Fallback: return original query as not autism-related
|
301 |
+
return query, False, ""
|
302 |
+
|
303 |
+
def get_non_autism_response() -> str:
|
304 |
+
"""Return a more human-like response for non-autism queries."""
|
305 |
+
return ("Hi there! I appreciate you reaching out to me. I'm Wisal, and I specialize specifically in autism and Autism Spectrum Disorders. "
|
306 |
+
"I noticed your question isn't quite related to autism topics. I'd love to help you, but I'm most effective when answering "
|
307 |
+
"questions about autism, ASD, autism support strategies, therapies, or related concerns.\n\n"
|
308 |
+
"Could you try asking me something about autism instead? I'm here and ready to help with any autism-related questions you might have! 😊")
|
309 |
+
|
310 |
+
def get_non_autism_answer_response() -> str:
|
311 |
+
"""Return a more human-like response when document answers are not autism-related."""
|
312 |
+
return ("I'm sorry, but the information I found in the document doesn't seem to be related to autism or Autism Spectrum Disorders. "
|
313 |
+
"Since I'm Wisal, your autism specialist, I want to make sure I'm providing you with relevant, autism-focused information. "
|
314 |
+
"Could you try asking a question that's more specifically about autism? I'm here to help with any autism-related topics! 😊")
|
web_search.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import asyncio
|
3 |
+
import httpx
|
4 |
+
import nest_asyncio
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Apply nested asyncio patch
|
8 |
+
nest_asyncio.apply()
|
9 |
+
|
10 |
+
# helper functions
|
11 |
+
GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
|
12 |
+
|
13 |
+
TAVILY_API_KEY="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"
|
14 |
+
|
15 |
+
OPENAI_API_KEY="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"
|
16 |
+
|
17 |
+
QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
|
18 |
+
|
19 |
+
QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
|
20 |
+
|
21 |
+
OPENAI_API_KEY="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"
|
22 |
+
|
23 |
+
WEAVIATE_URL="yorcqe2sqswhcaivxvt9a.c0.us-west3.gcp.weaviate.cloud"
|
24 |
+
|
25 |
+
WEAVIATE_API_KEY="d2d0VGdZQTBmdTFlOWdDZl9tT2h3WDVWd1NpT1dQWHdGK0xjR1hYeWxicUxHVnFRazRUSjY2VlRUVlkwPV92MjAw"
|
26 |
+
|
27 |
+
DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
|
28 |
+
|
29 |
+
DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
|
30 |
+
|
31 |
+
# Try to import tavily with fallback
|
32 |
+
try:
|
33 |
+
from tavily import TavilyClient
|
34 |
+
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
|
35 |
+
TAVILY_AVAILABLE = True
|
36 |
+
except ImportError:
|
37 |
+
print("Warning: Tavily package not found. Web search will use fallback mode.")
|
38 |
+
tavily_client = None
|
39 |
+
TAVILY_AVAILABLE = False
|
40 |
+
|
41 |
+
async def search_autism(query: str) -> dict:
|
42 |
+
"""Performs a web search for information about autism."""
|
43 |
+
|
44 |
+
if not TAVILY_AVAILABLE:
|
45 |
+
print("Web search unavailable - tavily package not installed")
|
46 |
+
return {
|
47 |
+
"results": [],
|
48 |
+
"answer": "Web search functionality is currently unavailable. Please ensure all dependencies are installed."
|
49 |
+
}
|
50 |
+
|
51 |
+
try:
|
52 |
+
# Execute a search query using Tavily
|
53 |
+
response = tavily_client.search(
|
54 |
+
query=query,
|
55 |
+
max_results=5,
|
56 |
+
search_depth="advanced",
|
57 |
+
topic="general",
|
58 |
+
include_answer=True
|
59 |
+
)
|
60 |
+
return {
|
61 |
+
"results": response.get("results", []),
|
62 |
+
"answer": response.get("answer", "")
|
63 |
+
}
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Search error: {str(e)}")
|
66 |
+
return {
|
67 |
+
"results": [],
|
68 |
+
"answer": f"Unable to perform web search: {str(e)}"
|
69 |
+
}
|
70 |
+
|
71 |
+
# Test function for development
|
72 |
+
async def main():
|
73 |
+
query = "autism symptoms and treatments"
|
74 |
+
result = await search_autism(query)
|
75 |
+
print("Search Results:")
|
76 |
+
for res in result.get("results", []):
|
77 |
+
print(f"- {res.get('title')} ({res.get('url')})")
|
78 |
+
print("\nAnswer:")
|
79 |
+
print(result.get("answer", "No answer provided."))
|
80 |
+
|
81 |
+
# Run the script
|
82 |
+
if __name__ == "__main__":
|
83 |
+
asyncio.run(main())
|