Spaces:

PalleAI
/

PriorAuthAI-RAG

Sleeping

App Files Files Community

PriorAuthAI-RAG / app.py

PalleAI

Update app.py

d07a80b verified 6 months ago

raw

history blame contribute delete

7.04 kB

	#!/usr/bin/env python
	# coding: utf-8

	# ## Prior Authorization Review APP: PriorAuthAI

	# Import general libraries

	# In[1]:


	import os
	import glob
	from dotenv import load_dotenv
	import gradio as gr
	import asyncio

	import numpy as np
	from sklearn.manifold import TSNE
	import plotly.graph_objects as go


	# Langchain related imports

	# In[2]:


	from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.schema import Document
	from langchain_openai import OpenAIEmbeddings, ChatOpenAI
	from langchain_chroma import Chroma
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain


	# LLM Info

	# In[3]:


	load_dotenv()
	os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
	MODEL = "gpt-4o-mini"


	# Load Data

	# In[4]:


	# Define the knowledge base folder
	folders = glob.glob("MCS_Policydocs/*")

	documents = []
	for folder in folders:
	doc_type = os.path.basename(folder) # Extract the procedure name (folder name)

	# Load all PDFs inside the procedure folder
	loader = DirectoryLoader(folder, glob="*/.pdf", loader_cls=PyPDFLoader)
	docs_folder = loader.load()

	# Tag each document with its procedure name
	for doc in docs_folder:
	doc.metadata["doc_type"] = doc_type
	documents.append(doc)


	# In[5]:


	len(documents)


	# Split the text into chunks for manageability and token limits

	# In[6]:


	# text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
	# chunks = text_splitter.split_documents(documents) # rechunk existing documents


	# Populate Vector store with embeddings for each document

	# In[7]:


	embeddings = OpenAIEmbeddings() # We will use OpenAI EMbeddings

	# Delete if already exists
	db_name = "vector_db"
	if os.path.exists(db_name):
	Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

	# Create embedding vectorstore
	vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=db_name)

	print(f"Vectorstore created with {vectorstore._collection.count()} documents")


	# In[8]:


	collection = vectorstore._collection
	sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
	dimensions = len(sample_embedding)
	print(f"The vectors have {dimensions:,} dimensions")


	# RAG Implementation with Langchain

	# In[9]:


	from langchain.prompts import PromptTemplate
	prompt = PromptTemplate(
	input_variables=["question", "context"],
	template="""
	You are a Medicare Administrative Contractor (MAC) AI Assistant, assisting nurses and medical reviewers in evaluating prior authorization requests.
	Your responses must be accurate, structured, and policy-backed, ensuring compliance with Medicare LCD/NCD guidelines.
	For initial conversation greet the user that you are a MAC AI Assistant and you can currently answer any policy questions related to these four procedures:
	1. Spinal Neuro stimulation, 2. Cervical Fusion, 3. MRI for Chronic Back Pain, 4. Total Joint Arthroplasty

	🔹 How to Respond:
	✅ Prioritize accuracy and relevance based on the question.
	- If the LLM has sufficient knowledge, answer directly.
	- If the query requires policy-based details, retrieve information from the MCS_Policydocs knowledge base.
	- If there's a conflict, always prioritize the retrieved policy data over general LLM knowledge.

	✅ Adapt your response format based on the nature of the query:
	- If the query requires structured data (e.g., approval criteria, required documentation, denials), use clear bullet points.
	- If the query is conversational, respond naturally without forcing structure.
	- If it's a follow-up, infer context from the previous question instead of asking for unnecessary clarification.

	✅ Retrieving Documents:
	- Do not retrieve or mention sources unless necessary to answer the query.
	- Offer document references, but only show them if explicitly requested by the user.
	- When retrieving sources, ensure they are highly relevant to the question asked.

	✅ Handling Follow-up Questions:
	- If the user asks a follow-up (e.g., “What are the denial reasons?”), assume it refers to the previous question unless context suggests otherwise.
	- If the follow-up is vague, attempt to infer meaning before asking for clarification.
	- Avoid resetting the conversation flow or greeting the user again in a follow-up response.

	🔹 Context (Retrieved LCD/NCD Policies, if applicable):
	{context}

	📌 Answer:
	Question: {question}
	"""
	)



	# In[17]:


	# Initialize LLM
	llm = ChatOpenAI(temperature=0.7, model_name=MODEL)


	# In[18]:


	# set up conversatin memory for chat
	memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key="answer")

	# the retriever is an abstraction over the VectorStore that will be used during RAG
	retriever = vectorstore.as_retriever()
	#retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5})

	# Setup ConversationalRetrievalChain with fixed memory handling
	conversation_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever = retriever,
	memory=memory, # ✅ Keep memory, but fix output_key issue
	return_source_documents=True, # ✅ Keep retrieved documents
	combine_docs_chain_kwargs={"prompt": prompt}
	)


	# Now Implement in Gradio UI

	# In[19]:


	def chat(message, history):
	global retrieved_docs

	# Ensure retrieved_docs exists
	if "retrieved_docs" not in globals():
	retrieved_docs = {}

	# Handle requests for sources
	if message.lower() in ["show sources", "show documents", "provide references"]:
	if "last_retrieval" in retrieved_docs:
	docs = retrieved_docs["last_retrieval"]
	response = "Retrieved Documents:"
	for i, doc in enumerate(docs):
	response += f"\n📄 {i+1}. {doc.metadata.get('source', 'Unknown')}\nSnippet: {doc.page_content[:200]}...\n"
	return response if docs else "No relevant documents were retrieved."
	return "No sources available for the last query."

	# Use LLM to generate a response
	result = conversation_chain.invoke({"question": message})
	answer = result["answer"]
	documents = result["source_documents"]

	# Store retrieved documents for follow-ups if needed
	if documents:
	retrieved_docs["last_retrieval"] = documents

	# Offer documents but show only if requested
	return f"Answer: {answer}\n\n_(Type 'show sources' if you need document references.)_"


	# In[20]:


	# Launch Gradio UI
	ui = gr.ChatInterface(fn=chat, title="PriorAuthAI: RAG based Prior Authorization Review Chatbot")
	ui.launch()


	# In[ ]: