Spaces:
Running
Running
:see_no_evil:
Browse files- .gitignore +3 -0
- Dockerfile +21 -0
- README.md +20 -1
- requirements.txt +14 -0
- src/app.py +124 -0
.gitignore
CHANGED
@@ -172,3 +172,6 @@ cython_debug/
|
|
172 |
|
173 |
# PyPI configuration file
|
174 |
.pypirc
|
|
|
|
|
|
|
|
172 |
|
173 |
# PyPI configuration file
|
174 |
.pypirc
|
175 |
+
|
176 |
+
*.pdf
|
177 |
+
|
Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
RUN apt-get update && apt-get install -y \
|
6 |
+
build-essential \
|
7 |
+
curl \
|
8 |
+
software-properties-common \
|
9 |
+
git \
|
10 |
+
&& rm -rf /var/lib/apt/lists/*
|
11 |
+
|
12 |
+
COPY requirements.txt ./
|
13 |
+
COPY src/ ./src/
|
14 |
+
|
15 |
+
RUN pip3 install -r requirements.txt
|
16 |
+
|
17 |
+
EXPOSE 8501
|
18 |
+
|
19 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
20 |
+
|
21 |
+
ENTRYPOINT ["streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
README.md
CHANGED
@@ -1 +1,20 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Hwc Llm
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: red
|
6 |
+
sdk: docker
|
7 |
+
app_port: 8501
|
8 |
+
tags:
|
9 |
+
- streamlit
|
10 |
+
pinned: false
|
11 |
+
short_description: Human-Wildlife Conflict LLM
|
12 |
+
license: bsd
|
13 |
+
---
|
14 |
+
|
15 |
+
# Welcome to Streamlit!
|
16 |
+
|
17 |
+
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
18 |
+
|
19 |
+
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
20 |
+
forums](https://discuss.streamlit.io).
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
langchain-chroma
|
3 |
+
bs4
|
4 |
+
langchain
|
5 |
+
langchain-chroma
|
6 |
+
langchain-community
|
7 |
+
langchain-core
|
8 |
+
langchain-core
|
9 |
+
langchain_openai
|
10 |
+
langchain-text-splitters
|
11 |
+
os
|
12 |
+
requests
|
13 |
+
zipfile
|
14 |
+
pathlib
|
src/app.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain_community.document_loaders import PyPDFLoader
|
3 |
+
|
4 |
+
## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
|
5 |
+
import os
|
6 |
+
api_key = os.getenv("LITELLM_KEY")
|
7 |
+
if api_key is None:
|
8 |
+
api_key = st.secrets["LITELLM_KEY"]
|
9 |
+
cirrus_key = os.getenv("CIRRUS_KEY")
|
10 |
+
if cirrus_key is None:
|
11 |
+
cirrus_key = st.secrets["CIRRUS_KEY"]
|
12 |
+
|
13 |
+
st.title("HWC LLM Testing")
|
14 |
+
|
15 |
+
|
16 |
+
'''
|
17 |
+
(Demo will take a while to load first while processing all data! Will be pre-processed in future...)
|
18 |
+
'''
|
19 |
+
|
20 |
+
# +
|
21 |
+
import bs4
|
22 |
+
from langchain import hub
|
23 |
+
from langchain_chroma import Chroma
|
24 |
+
from langchain_community.document_loaders import WebBaseLoader
|
25 |
+
from langchain_core.output_parsers import StrOutputParser
|
26 |
+
from langchain_core.runnables import RunnablePassthrough
|
27 |
+
from langchain_openai import OpenAIEmbeddings
|
28 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
29 |
+
|
30 |
+
import os
|
31 |
+
import requests
|
32 |
+
import zipfile
|
33 |
+
|
34 |
+
def download_and_unzip(url, output_dir):
|
35 |
+
if not os.path.exists(output_dir):
|
36 |
+
os.makedirs(output_dir)
|
37 |
+
response = requests.get(url)
|
38 |
+
zip_file_path = os.path.basename(url)
|
39 |
+
with open(zip_file_path, 'wb') as f:
|
40 |
+
f.write(response.content)
|
41 |
+
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
|
42 |
+
zip_ref.extractall(output_dir)
|
43 |
+
os.remove(zip_file_path)
|
44 |
+
|
45 |
+
url = "https://minio.carlboettiger.info/public-data/hwc.zip"
|
46 |
+
output_dir = "hwc"
|
47 |
+
download_and_unzip(url, "hwc")
|
48 |
+
|
49 |
+
import pathlib
|
50 |
+
@st.cache_data
|
51 |
+
def pdf_loader(path):
|
52 |
+
all_documents = []
|
53 |
+
docs_dir = pathlib.Path(path)
|
54 |
+
for file in docs_dir.iterdir():
|
55 |
+
loader = PyPDFLoader(file)
|
56 |
+
documents = loader.load()
|
57 |
+
all_documents.extend(documents)
|
58 |
+
return all_documents
|
59 |
+
|
60 |
+
docs = pdf_loader('hwc/')
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
# Set up the language model
|
65 |
+
from langchain_openai import ChatOpenAI
|
66 |
+
llm = ChatOpenAI(model = "llama3", api_key = api_key, base_url = "https://llm.nrp-nautilus.io", temperature=0)
|
67 |
+
## Cirrus instead:
|
68 |
+
embedding = OpenAIEmbeddings(
|
69 |
+
model = "cirrus",
|
70 |
+
api_key = cirrus_key,
|
71 |
+
base_url = "https://llm.cirrus.carlboettiger.info/v1",
|
72 |
+
)
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
+
# Build a retrival agent
|
77 |
+
from langchain_core.vectorstores import InMemoryVectorStore
|
78 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
79 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
80 |
+
splits = text_splitter.split_documents(docs)
|
81 |
+
vectorstore = InMemoryVectorStore.from_documents(documents=splits, embedding=embedding)
|
82 |
+
retriever = vectorstore.as_retriever()
|
83 |
+
|
84 |
+
from langchain.chains import create_retrieval_chain
|
85 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
86 |
+
from langchain_core.prompts import ChatPromptTemplate
|
87 |
+
system_prompt = (
|
88 |
+
"You are an assistant for question-answering tasks. "
|
89 |
+
"Use the following pieces of retrieved context to answer "
|
90 |
+
"the question. If you don't know the answer, say that you "
|
91 |
+
"don't know. Use three sentences maximum and keep the "
|
92 |
+
"answer concise."
|
93 |
+
"\n\n"
|
94 |
+
"{context}"
|
95 |
+
)
|
96 |
+
prompt = ChatPromptTemplate.from_messages(
|
97 |
+
[
|
98 |
+
("system", system_prompt),
|
99 |
+
("human", "{input}"),
|
100 |
+
]
|
101 |
+
)
|
102 |
+
question_answer_chain = create_stuff_documents_chain(llm, prompt)
|
103 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
104 |
+
|
105 |
+
|
106 |
+
# Place agent inside a streamlit application:
|
107 |
+
|
108 |
+
if prompt := st.chat_input("What is the goal of CA 30x30?"):
|
109 |
+
with st.chat_message("user"):
|
110 |
+
st.markdown(prompt)
|
111 |
+
|
112 |
+
with st.chat_message("assistant"):
|
113 |
+
results = rag_chain.invoke({"input": prompt})
|
114 |
+
st.write(results['answer'])
|
115 |
+
|
116 |
+
with st.expander("See context matched"):
|
117 |
+
st.write(results['context'][0].page_content)
|
118 |
+
st.write(results['context'][0].metadata)
|
119 |
+
|
120 |
+
|
121 |
+
# adapt for memory / multi-question interaction with:
|
122 |
+
# https://python.langchain.com/docs/tutorials/qa_chat_history/
|
123 |
+
|
124 |
+
# Also see structured outputs.
|