Spaces:

Sbnos
/

medchat2

Running on CPU Upgrade

App Files Files Community

Sbnos commited on 22 days ago

Commit

d938a07

verified ·

1 Parent(s): 64aa216

modernising the application

Browse files

updated.
also adding streaming

Files changed (1) hide show

app.py +106 -137

app.py CHANGED Viewed

@@ -1,145 +1,114 @@
-import streamlit as st
 import os
-import asyncio
-from langchain.chains import create_history_aware_retriever, create_retrieval_chain
-from langchain.chains.combine_documents import create_stuff_documents_chain
-from langchain_community.vectorstores import Chroma
-from langchain_together import Together
-from langchain_community.chat_message_histories import StreamlitChatMessageHistory
-from langchain_community.document_loaders import WebBaseLoader
-from langchain_core.chat_history import BaseChatMessageHistory
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain.embeddings import HuggingFaceBgeEmbeddings
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-# Initialize the LLMs
-llm = Together(
-    model="mistralai/Mixtral-8x22B-Instruct-v0.1",
-    temperature=0.2,
-    top_k=12,
-    max_tokens=22048,
-    together_api_key=os.environ['pilotikval']
 )
-# Function to store chat history
-store = {}
-model_name = "BAAI/bge-base-en"
-encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
-embedding_function = HuggingFaceBgeEmbeddings(
-    model_name=model_name,
-    encode_kwargs=encode_kwargs
 )
-def get_session_history(session_id: str) -> BaseChatMessageHistory:
-    if session_id not in store:
-        store[session_id] = StreamlitChatMessageHistory(key=session_id)
-    return store[session_id]
-# Define the Streamlit app
-def app():
-    with st.sidebar:
-        st.title("dochatter")
-        option = st.selectbox(
-            'Which retriever would you like to use?',
-            ('General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine')
-        )
-        # Define retrievers based on option
-        persist_directory = {
-            'General Medicine': "./oxfordmedbookdir/",
-            'Respiratory1': "./respfishmandbcud/",
-            'Respiratory2': "./respmurray/",
-            'Med2.2': "./medmrcp2store/",
-            'Med2.1': "./mrcpchromadb/"
-        }.get(option, "./mrcpchromadb/")
-        collection_name = {
-            'General Medicine': "oxfordmed",
-            'Respiratory1': "fishmannotescud",
-            'Respiratory2': "respmurraynotes",
-            'Med2.2': "medmrcp2notes",
-            'Med2.1': "mrcppassmednotes"
-        }.get(option, "mrcppassmednotes")
-        vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function, collection_name=collection_name)
-        retriever = vectordb.as_retriever(search_kwargs={"k": 5})
-    # Define the prompt templates
-    contextualize_q_system_prompt = (
-        "Given a chat history and the latest user question "
-        "which might reference context in the chat history, "
-        "formulate a standalone question which can be understood "
-        "without the chat history. Do NOT answer the question, "
-        "just reformulate it if needed and otherwise return it as is."
-    )
-    contextualize_q_prompt = ChatPromptTemplate.from_messages(
-        [
-            ("system", contextualize_q_system_prompt),
-            MessagesPlaceholder("chat_history"),
-            ("human", "{input}"),
-        ]
-    )
-    history_aware_retriever = create_history_aware_retriever(
-        llm, retriever, contextualize_q_prompt
-    )
-    system_prompt = (
-        "You are helping a doctor. Be as detailed and thorough as possible "
-        "Use the following pieces of retrieved context to answer "
-        "the question. If you don't know the answer, say that you "
-        "don't know."
-        "\n\n"
-        "{context}"
-    )
-    qa_prompt = ChatPromptTemplate.from_messages(
-        [
-            ("system", system_prompt),
-            MessagesPlaceholder("chat_history"),
-            ("human", "{input}"),
-        ]
-    )
-    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
-    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
-    # Statefully manage chat history
-    conversational_rag_chain = RunnableWithMessageHistory(
-        rag_chain,
-        get_session_history,
-        input_messages_key="input",
-        history_messages_key="chat_history",
-        output_messages_key="answer",
-    )
-    # Session State
-    if "messages" not in st.session_state.keys():
-        st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
-    st.header("Hello Doc!")
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.write(message["content"])
-    prompts2 = st.chat_input("Say something")
-    if prompts2:
-        st.session_state.messages.append({"role": "user", "content": prompts2})
-        with st.chat_message("user"):
-            st.write(prompts2)
-    if st.session_state.messages[-1]["role"] != "assistant":
-        with st.chat_message("assistant"):
-            with st.spinner("Thinking..."):
-                final_response = conversational_rag_chain.invoke(
-                    {
-                        "input": prompts2,
-                    },
-                    config={"configurable": {"session_id": "current_session"}}
-                )
-                st.write(final_response['answer'])
-        st.session_state.messages.append({"role": "assistant", "content": final_response['answer']})
-if __name__ == '__main__':
-    app()

 import os
+import streamlit as st
+from together import Together
+from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceBgeEmbeddings
+from langchain.chains import ConversationalRetrievalChain
+# --- Configuration ---
+TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
+if not TOGETHER_API_KEY:
+    st.error("Missing TOGETHER_API_KEY environment variable.")
+    st.stop()
+# Initialize TogetherAI client
+client = Together(api_key=TOGETHER_API_KEY)
+# Embeddings setup
+EMBED_MODEL_NAME = "BAAI/bge-base-en"
+embeddings = HuggingFaceBgeEmbeddings(
+    model_name=EMBED_MODEL_NAME,
+    encode_kwargs={"normalize_embeddings": True},
 )
+# Sidebar: select collection
+st.sidebar.title("DocChatter RAG")
+collection = st.sidebar.selectbox(
+    "Choose a document collection:",
+    ['General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine']
 )
+dirs = {
+    'General Medicine': './oxfordmedbookdir/',
+    'RespiratoryFishman': './respfishmandbcud/',
+    'RespiratoryMurray': './respmurray/',
+    'MedMRCP2': './medmrcp2store/',
+    'OldMedicine': './mrcpchromadb/'
+}
+cols = {
+    'General Medicine': 'oxfordmed',
+    'RespiratoryFishman': 'fishmannotescud',
+    'RespiratoryMurray': 'respmurraynotes',
+    'MedMRCP2': 'medmrcp2notes',
+    'OldMedicine': 'mrcppassmednotes'
+}
+persist_directory = dirs.get(collection)
+collection_name = cols.get(collection)
+# Load Chroma vector store
+vectorstore = Chroma(
+    collection_name=collection_name,
+    persist_directory=persist_directory,
+    embedding_function=embeddings
+)
+retriever = vectorstore.as_retriever(search_kwargs={"k":5})
+# System prompt template
+SYSTEM_PROMPT = (
+    "You are a helpful assistant for medical professionals. "
+    "Use the following context from medical documents to answer the question. "
+    "If you don't know, say you don't know.\n\nContext:\n{context}\n"
+)
+st.title("🩺 DocChatter RAG (Streaming)")
+# Initialize chat history
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []  # list of dicts {role, content}
+# Tabs
+chat_tab, clear_tab = st.tabs(["Chat", "Clear History"])
+with chat_tab:
+    # Display history
+    for msg in st.session_state.chat_history:
+        st.chat_message(msg['role']).write(msg['content'])
+    # User input
+    if prompt := st.chat_input("Ask anything about your docs..."):
+        # User message
+        st.chat_message("user").write(prompt)
+        st.session_state.chat_history.append({"role": "user", "content": prompt})
+        # Retrieve relevant docs
+        docs = retriever.get_relevant_documents(prompt)
+        context = "\n---\n".join([d.page_content for d in docs])
+        # Build messages for TogetherAI
+        system_msg = {"role": "system", "content": SYSTEM_PROMPT.format(context=context)}
+        messages = [system_msg]
+        # include prior conversation
+        for msg in st.session_state.chat_history:
+            if msg['role'] in ('user', 'assistant'):
+                messages.append(msg)
+        # Prepare streaming response
+        response_container = st.chat_message("assistant")
+        placeholder = response_container.empty()
+        answer = ""
+        # Stream tokens
+        for token in client.chat.completions.create(
+            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            messages=messages,
+            stream=True
+        ):
+            if hasattr(token, 'choices'):
+                delta = token.choices[0].delta.content
+                answer += delta
+                placeholder.write(answer)
+        # Save assistant message
+        st.session_state.chat_history.append({"role": "assistant", "content": answer})
+with clear_tab:
+    if st.button("🗑️ Clear chat history"):
+        st.session_state.chat_history = []
+        st.experimental_rerun()