File size: 7,043 Bytes
735b4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90b90c3
735b4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c3d43f
735b4d0
7c3d43f
 
d07a80b
 
81638c3
d07a80b
7c3d43f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735b4d0
 
 
 
 
 
 
 
 
 
7c3d43f
735b4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#!/usr/bin/env python
# coding: utf-8

# ## Prior Authorization Review APP: PriorAuthAI

# **Import general libraries**

# In[1]:


import os
import glob
from dotenv import load_dotenv
import gradio as gr
import asyncio

import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go


# **Langchain related imports**

# In[2]:


from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain


# **LLM Info**

# In[3]:


load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
MODEL = "gpt-4o-mini"


# **Load Data**

# In[4]:


# Define the knowledge base folder
folders = glob.glob("MCS_Policydocs/*")

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)  # Extract the procedure name (folder name)

    # Load all PDFs inside the procedure folder
    loader = DirectoryLoader(folder, glob="**/*.pdf", loader_cls=PyPDFLoader)
    docs_folder = loader.load()

    # Tag each document with its procedure name
    for doc in docs_folder:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)


# In[5]:


len(documents)


# **Split the text into chunks for manageability and token limits**

# In[6]:


# text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
# chunks = text_splitter.split_documents(documents) # rechunk existing documents


# **Populate Vector store with embeddings for each document**

# In[7]:


embeddings = OpenAIEmbeddings() # We will use OpenAI EMbeddings

# Delete if already exists
db_name = "vector_db"
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create embedding vectorstore
vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=db_name)

print(f"Vectorstore created with {vectorstore._collection.count()} documents")


# In[8]:


collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")


# **RAG Implementation with Langchain**

# In[9]:


from langchain.prompts import PromptTemplate
prompt = PromptTemplate(
    input_variables=["question", "context"],
    template="""
    You are a **Medicare Administrative Contractor (MAC) AI Assistant**, assisting nurses and medical reviewers in evaluating prior authorization requests. 
    Your responses must be **accurate, structured, and policy-backed**, ensuring compliance with Medicare LCD/NCD guidelines.
    For initial conversation greet the user that you are a MAC AI Assistant and you can currently answer any policy questions related to these four procedures:
    1. Spinal Neuro stimulation, 2. Cervical Fusion, 3. MRI for Chronic Back Pain, 4. Total Joint Arthroplasty
   
    πŸ”Ή **How to Respond:**       
    βœ… **Prioritize accuracy and relevance based on the question.**  
        - If the LLM has sufficient knowledge, answer directly.  
        - If the query requires policy-based details, retrieve information from the MCS_Policydocs knowledge base.  
        - If there's a conflict, always prioritize the retrieved policy data over general LLM knowledge.  

    βœ… **Adapt your response format based on the nature of the query:**  
        - If the query requires structured data (e.g., approval criteria, required documentation, denials), use **clear bullet points**.  
        - If the query is conversational, **respond naturally** without forcing structure.  
        - If it's a follow-up, **infer context** from the previous question instead of asking for unnecessary clarification.  

    βœ… **Retrieving Documents:**  
        - **Do not retrieve or mention sources unless necessary to answer the query.**  
        - **Offer** document references, but only show them if explicitly requested by the user.  
        - When retrieving sources, **ensure they are highly relevant** to the question asked.  

    βœ… **Handling Follow-up Questions:**  
        - If the user asks a follow-up (e.g., β€œWhat are the denial reasons?”), assume it refers to the **previous question** unless context suggests otherwise.  
        - If the follow-up is vague, attempt to infer meaning before asking for clarification.  
        - Avoid resetting the conversation flow or greeting the user again in a follow-up response.  

    πŸ”Ή **Context (Retrieved LCD/NCD Policies, if applicable):**  
    {context}

    **πŸ“Œ Answer:**  
    **Question:** {question}
    """
)



# In[17]:


# Initialize LLM
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)


# In[18]:


# set up conversatin memory for chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key="answer")

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()
#retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5})

# Setup ConversationalRetrievalChain with fixed memory handling
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever = retriever,
    memory=memory,  # βœ… Keep memory, but fix output_key issue
    return_source_documents=True,  # βœ… Keep retrieved documents
    combine_docs_chain_kwargs={"prompt": prompt}
)


# **Now Implement in Gradio UI**

# In[19]:


def chat(message, history):
    global retrieved_docs

    # Ensure retrieved_docs exists
    if "retrieved_docs" not in globals():
        retrieved_docs = {}

    # Handle requests for sources
    if message.lower() in ["show sources", "show documents", "provide references"]:
        if "last_retrieval" in retrieved_docs:
            docs = retrieved_docs["last_retrieval"]
            response = "**Retrieved Documents:**"
            for i, doc in enumerate(docs):
                response += f"\nπŸ“„ {i+1}. {doc.metadata.get('source', 'Unknown')}\nSnippet: {doc.page_content[:200]}...\n"
            return response if docs else "No relevant documents were retrieved."
        return "No sources available for the last query."

    # Use LLM to generate a response
    result = conversation_chain.invoke({"question": message})
    answer = result["answer"]
    documents = result["source_documents"]

    # Store retrieved documents for follow-ups if needed
    if documents:
        retrieved_docs["last_retrieval"] = documents

    # Offer documents but show only if requested
    return f"**Answer:** {answer}\n\n_(Type 'show sources' if you need document references.)_"


# In[20]:


# Launch Gradio UI
ui = gr.ChatInterface(fn=chat, title="PriorAuthAI: RAG based Prior Authorization Review Chatbot")
ui.launch()


# In[ ]: