File size: 3,836 Bytes
ba2d2e0
 
73afc0c
14d83f7
 
 
 
 
 
 
73afc0c
 
 
 
90c5900
 
 
73afc0c
90c5900
73afc0c
 
 
 
 
 
14d83f7
 
 
 
 
 
 
 
 
 
 
73afc0c
14d83f7
73afc0c
 
14d83f7
 
 
ba2d2e0
 
 
14d83f7
ba2d2e0
 
 
 
 
14d83f7
ba2d2e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73afc0c
 
 
 
ba2d2e0
03da1c6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader

## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
import os
api_key = os.getenv("LITELLM_KEY")
if api_key is None:
    api_key = st.secrets["LITELLM_KEY"]
        

st.title("RAG Demo")


'''
This demonstration combines an LLM trained specifically text embedding (`embed-mistral` in our case) with a traditional "instruct" based LLM (`llama3`) to create a retreival augmented generation (RAG) interface to a provided PDF document.  We can query the model to return relatively precise citations to the matched text in the PDF document to verify the queries. 


Provide a URL to a PDF document you want to ask questions about.
Once the document has been uploaded and parsed, ask your questions in the chat dialog that will appear below.  The default example comes from a recent report on California's initiative for biodiversity conservation.
'''

# Create a file uploader?
# st.sidebar.file_uploader("Choose a PDF file", type=["pdf"])
url = st.text_input("PDF URL", "https://www.resources.ca.gov/-/media/CNRA-Website/Files/2024_30x30_Pathways_Progress_Report.pdf")

# +
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


@st.cache_data
def pdf_loader(url):
    loader = PyPDFLoader(url)
    return loader.load()

docs = pdf_loader(url)


# Set up the language model
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = "llama3", api_key = api_key, base_url = "https://llm.nrp-nautilus.io",  temperature=0)

# Set up the embedding model
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(
    model = "embed-mistral", 
    api_key = api_key, 
    base_url = "https://llm.nrp-nautilus.io"
)

# Build a retrival agent
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = InMemoryVectorStore.from_documents(documents=splits, embedding=embedding)
retriever = vectorstore.as_retriever()

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


# Place agent inside a streamlit application:

if prompt := st.chat_input("What is the goal of CA 30x30?"):
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.chat_message("assistant"):
        results = rag_chain.invoke({"input": prompt})
        st.write(results['answer'])

        with st.expander("See context matched"):
            st.write(results['context'][0].page_content)
            st.write(results['context'][0].metadata)


# adapt for memory / multi-question interaction with:
# https://python.langchain.com/docs/tutorials/qa_chat_history/

# Also see structured outputs.