In [18]:
!pip install streamlit langchain-community langchain-openai langchain-text-splitters requests pathlib pypdf



In [19]:
!pip install -U langchain-chroma
!pip install protobuf==3.20.3

Collecting protobuf (from onnxruntime>=1.14.1->chromadb>=1.0.9->langchain-chroma)
  Using cached protobuf-5.29.5-cp310-abi3-win_amd64.whl.metadata (592 bytes)
Using cached protobuf-5.29.5-cp310-abi3-win_amd64.whl (434 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
Successfully installed protobuf-5.29.5


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.32.0 requires protobuf<5,>=3.20, but you have protobuf 5.29.5 which is incompatible.


Collecting protobuf==3.20.3
  Using cached protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Using cached protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.29.5
    Uninstalling protobuf-5.29.5:
      Successfully uninstalled protobuf-5.29.5
Successfully installed protobuf-3.20.3


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opentelemetry-proto 1.34.0 requires protobuf<6.0,>=5.0, but you have protobuf 3.20.3 which is incompatible.


In [20]:
## Handle secrets either from env vars or streamlit manager
import streamlit as st
import os
api_key = os.getenv("LITELLM_KEY")
if api_key is None:
    api_key = st.secrets["LITELLM_KEY"]
cirrus_key = os.getenv("CIRRUS_KEY")
if cirrus_key is None:
    cirrus_key = st.secrets["CIRRUS_KEY"]        


In [21]:
import pathlib
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma

def pdf_loader(path):
    all_documents = []
    docs_dir = pathlib.Path(path)
    for file in docs_dir.iterdir():
        loader = PyPDFLoader(file)
        documents = loader.load()
        all_documents.extend(documents)
    return all_documents

docs = pdf_loader('hwc/')

embedding = OpenAIEmbeddings(
    model="cirrus",
    api_key=cirrus_key,
    base_url="https://llm.cirrus.carlboettiger.info/v1",
)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
splits = text_splitter.split_documents(docs)

#With the help of a tutorial and a little bit of ChatGPT to take out some of the redundancies to make it all work
def calculate_chunk_ids(chunks):
    last_page_id = None
    current_chunk_index = 0
    for chunk in chunks:
        source = chunk.metadata.get("source")
        page = chunk.metadata.get("page")
        current_page_id = f"{source}:{page}"
        if current_page_id == last_page_id:
            current_chunk_index += 1
        else:
            current_chunk_index = 0
        chunk_id = f"{current_page_id}:{current_chunk_index}"
        last_page_id = current_page_id
        chunk.metadata["id"] = chunk_id
    return chunks

splits_with_ids = calculate_chunk_ids(splits)

CHROMA_PATH = "chroma"

db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding)

existing_items = db.get(include=[])
existing_ids = set(existing_items["ids"])
print(f"Number of existing documents in DB: {len(existing_ids)}")

new_chunks = [chunk for chunk in splits_with_ids if chunk.metadata["id"] not in existing_ids]
new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]

if new_chunks:
    db.add_documents(new_chunks, ids=new_chunk_ids)
    db.persist()
    print(f"Added {len(new_chunks)} new chunks to the database.")
else:
    print("No new chunks to add.")


Number of existing documents in DB: 768
No new chunks to add.


In [22]:

# NRP embedding model tends to throw errors
# embedding = OpenAIEmbeddings(model = "embed-mistral", api_key = api_key, base_url = "https://llm.nrp-nautilus.io")



In [23]:

"""
from langchain_community.embeddings.bedrock import BedrockEmbeddings
def get_embedding_function():
    embeddings = BedrockEmbeddings(credentials_profile_name = "default", region_name = "us-east-1"
    }
    return embeddings
"""
"""
from langchain_community.embeddings.ollama import OllamaEmbeddings
def get_embedding_function():
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    return embeddings
"""
#TO ADD CHUNKS AND UNIQUE IDS TO THEM AS WELL

'\nfrom langchain_community.embeddings.ollama import OllamaEmbeddings\ndef get_embedding_function():\n    embeddings = OllamaEmbeddings(model="nomic-embed-text")\n    return embeddings\n'

In [48]:
# slow part here, runs on remote GPU
retriever = db.as_retriever()

In [50]:


# Choose any of the models listed by their short-name:
# see `curl -H "Authorization: Bearer $OPENAI_API_KEY" https://llm.nrp-nautilus.io/v1/models`
models = {"llama3": "llama3-sdsc", 
          "deepseek-small": "DeepSeek-R1-Distill-Qwen-32B",
          "deepseek": "deepseek-r1-qwen-qualcomm",
          "gemma3": "gemma3",
          "phi3": "phi3",
          "olmo": "olmo"
         }

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = models['gemma3'],
                 api_key = api_key, 
                 base_url = "https://llm.nrp-nautilus.io",  
                 temperature=0)


from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following scientific articles as the retrieved context to answer "
    "the question. Appropriately cite the articles from the context on which your answer is based using (Author, Year) format. "
    "Do not attempt to cite articles that are not in the context."
    "If you don't know the answer, say that you don't know."
    "Use up to five sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
"""
results = db.similarity_search_with_score(query_text, k = 5)
context_text = "\n\n---\n\n".join([doc_page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question - query_text)
"""
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [52]:
prompt = "I live in Tanzania and am having issues with lions breaking into my boma and preying on cattle. What interventions might work best for me?"
results = rag_chain.invoke({"input": prompt})
results

{'input': 'I live in Tanzania and am having issues with lions breaking into my boma and preying on cattle. What interventions might work best for me?',
 'context': [Document(metadata={'title': 'Predicting intervention priorities for wildlife conflicts', 'moddate': '2025-05-27T12:12:25-07:00', 'author': '', 'keywords': '', 'page': 10, 'wps-journaldoi': '10.1111/(ISSN)1523-1739', 'creationdate': '2020-01-16T12:33:42+05:30', 'total_pages': 12, 'id': 'hwc\\Baynham-Herd et al. 2019.pdf:10:0', 'wps-articledoi': '10.1111/cobi.13372', 'wps-proclevel': '3', 'producer': 'Acrobat Distiller 10.1.10 (Windows); modified using iText 4.2.0 by 1T3XT', 'creator': 'LaTeX with hyperref package', 'source': 'hwc\\Baynham-Herd et al. 2019.pdf', 'page_label': '242', 'subject': 'Conservation Biology 2020.34:232-243'}, page_content='242 Conﬂict Intervention Priorities\nhelps foster more effective collaboration (Game et al.\n2013; Lute et al. 2018). Third, both the survey results\nand feedback were consistent wi

In [53]:
prompt = "What are the most cost-effective prevention methods for elephants raiding my crops?"

results = rag_chain.invoke({"input": prompt})
results

{'input': 'What are the most cost-effective prevention methods for elephants raiding my crops?',
 'context': [Document(metadata={'creationdate': '2020-01-16T12:33:42+05:30', 'keywords': '', 'wps-articledoi': '10.1111/cobi.13372', 'wps-proclevel': '3', 'page': 10, 'creator': 'LaTeX with hyperref package', 'id': 'hwc\\Baynham-Herd et al. 2019.pdf:10:0', 'page_label': '242', 'total_pages': 12, 'moddate': '2025-05-27T12:12:25-07:00', 'wps-journaldoi': '10.1111/(ISSN)1523-1739', 'title': 'Predicting intervention priorities for wildlife conflicts', 'source': 'hwc\\Baynham-Herd et al. 2019.pdf', 'producer': 'Acrobat Distiller 10.1.10 (Windows); modified using iText 4.2.0 by 1T3XT', 'author': '', 'subject': 'Conservation Biology 2020.34:232-243'}, page_content='242 Conﬂict Intervention Priorities\nhelps foster more effective collaboration (Game et al.\n2013; Lute et al. 2018). Third, both the survey results\nand feedback were consistent with recent scholarship\n(Redpath et al. 2017) that highl

In [54]:
rag_chain.invoke({"input": 
                  "I have a small herd of goats and cattle and I am worried about jaguars preying on them. What preventative measures can I take?"
                 })

{'input': 'I have a small herd of goats and cattle and I am worried about jaguars preying on them. What preventative measures can I take?',
 'context': [Document(metadata={'creationdate': '2022-06-23T12:24:35+08:00', 'id': 'hwc\\Tiller et al. 2022.pdf:7:0', 'source': 'hwc\\Tiller et al. 2022.pdf', 'subject': 'Human–elephant conflict is increasing across many parts of Asia and Africa. Mitigating elephant crop raiding has become a major focus of conservation intervention, however, many existing methods for tackling this problem are expensive and difficult to execute. Thus, there is a need for more affordable, farm-based methods. Testing these methods is key to ensuring their effectiveness and feasibility. In this study, we tested a novel olfactory deterrent, the “smelly elephant repellent”, a foul-smelling organic liquid, on 40 farms in Uganda and Kenya. Our results show that the repellent was effective at deterring elephants from crop raiding. Over the study period, 82% of 309 elephant 

In [55]:
rag_chain.invoke({"input": "I am trying to prevent coyotes from eating the calves of my free-range cattle. What may work best?"})

{'input': 'I am trying to prevent coyotes from eating the calves of my free-range cattle. What may work best?',
 'context': [Document(metadata={'author': '', 'page': 10, 'keywords': '', 'moddate': '2025-05-27T12:12:25-07:00', 'wps-articledoi': '10.1111/cobi.13372', 'producer': 'Acrobat Distiller 10.1.10 (Windows); modified using iText 4.2.0 by 1T3XT', 'total_pages': 12, 'page_label': '242', 'creationdate': '2020-01-16T12:33:42+05:30', 'title': 'Predicting intervention priorities for wildlife conflicts', 'subject': 'Conservation Biology 2020.34:232-243', 'wps-proclevel': '3', 'id': 'hwc\\Baynham-Herd et al. 2019.pdf:10:0', 'creator': 'LaTeX with hyperref package', 'wps-journaldoi': '10.1111/(ISSN)1523-1739', 'source': 'hwc\\Baynham-Herd et al. 2019.pdf'}, page_content='242 Conﬂict Intervention Priorities\nhelps foster more effective collaboration (Game et al.\n2013; Lute et al. 2018). Third, both the survey results\nand feedback were consistent with recent scholarship\n(Redpath et al. 2

In [56]:
rag_chain.invoke({"input": "We have major issues with deer raiding our large agricultural fields. Is there anything I can try to prevent this that won’t break the bank?"})

{'input': 'We have major issues with deer raiding our large agricultural fields. Is there anything I can try to prevent this that won’t break the bank?',
 'context': [Document(metadata={'doi': '10.1016/j.scitotenv.2022.156195', 'crossmarkdomains[2]': 'sciencedirect.com', 'producer': 'PyPDF', 'author': 'Charlotte Lorand', 'elsevierwebpdfspecifications': '7.0', 'creationdate': '2022-06-07T02:40:21+00:00', 'page_label': '6', 'crossmarkdomainexclusive': 'true', 'subject': 'Science of the Total Environment, 838 (2022) 156195. doi:10.1016/j.scitotenv.2022.156195', 'crossmarkmajorversiondate': '2010-04-23', 'crossmarkdomains[1]': 'elsevier.com', 'page': 5, 'title': "Effectiveness of interventions for managing human-large carnivore conflicts worldwide: Scare them off, don't remove them", 'source': 'hwc\\Lorand et al. 2022.pdf', 'id': 'hwc\\Lorand et al. 2022.pdf:5:0', 'robots': 'noindex', 'moddate': '2022-06-07T02:40:21+00:00', 'total_pages': 11, 'keywords': 'Human-carnivore coexistence; Letha

In [57]:
rag_chain.invoke({"input": "We live in a suburban area and bears sometimes come into our town to eat from our fruit trees and trash. What are the best ways for us to prevent this as a community? We don’t want to have to get rid of our fruit trees…"})

{'input': 'We live in a suburban area and bears sometimes come into our town to eat from our fruit trees and trash. What are the best ways for us to prevent this as a community? We don’t want to have to get rid of our fruit trees…',
 'context': [Document(metadata={'keywords': '', 'page_label': '242', 'wps-proclevel': '3', 'title': 'Predicting intervention priorities for wildlife conflicts', 'creator': 'LaTeX with hyperref package', 'subject': 'Conservation Biology 2020.34:232-243', 'wps-articledoi': '10.1111/cobi.13372', 'author': '', 'producer': 'Acrobat Distiller 10.1.10 (Windows); modified using iText 4.2.0 by 1T3XT', 'creationdate': '2020-01-16T12:33:42+05:30', 'id': 'hwc\\Baynham-Herd et al. 2019.pdf:10:0', 'total_pages': 12, 'page': 10, 'source': 'hwc\\Baynham-Herd et al. 2019.pdf', 'wps-journaldoi': '10.1111/(ISSN)1523-1739', 'moddate': '2025-05-27T12:12:25-07:00'}, page_content='242 Conﬂict Intervention Priorities\nhelps foster more effective collaboration (Game et al.\n2013; L

In [58]:
prompt = "What cattle husbandry strategies might be helpful to prevent conflict if we live in wolf country?"

rag_chain.invoke({"input": prompt})

{'input': 'What cattle husbandry strategies might be helpful to prevent conflict if we live in wolf country?',
 'context': [Document(metadata={'crossmarkdomainexclusive': 'true', 'id': 'hwc\\Lorand et al. 2022.pdf:9:2', 'total_pages': 11, 'keywords': 'Human-carnivore coexistence; Lethal control; Non-lethal management; Conservation interventions; Effectiveness; Evidence-based', 'elsevierwebpdfspecifications': '7.0', 'author': 'Charlotte Lorand', 'creationdate': '2022-06-07T02:40:21+00:00', 'doi': '10.1016/j.scitotenv.2022.156195', 'page': 9, 'robots': 'noindex', 'page_label': '10', 'subject': 'Science of the Total Environment, 838 (2022) 156195. doi:10.1016/j.scitotenv.2022.156195', 'source': 'hwc\\Lorand et al. 2022.pdf', 'crossmarkmajorversiondate': '2010-04-23', 'producer': 'PyPDF', 'crossmarkdomains[1]': 'elsevier.com', 'title': "Effectiveness of interventions for managing human-large carnivore conflicts worldwide: Scare them off, don't remove them", 'moddate': '2022-06-07T02:40:21+