Spaces:
Build error
Build error
# app.py | |
import re | |
import os | |
import chainlit as cl | |
from typing import List | |
from pathlib import Path | |
from dotenv import load_dotenv | |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableConfig | |
from langchain.schema import StrOutputParser | |
from langchain_community.document_loaders import ( | |
PyMuPDFLoader, | |
) | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores.chroma import Chroma | |
from langchain.indexes import SQLRecordManager, index | |
from langchain.schema import Document | |
from langchain.callbacks.base import BaseCallbackHandler | |
from langchain.document_loaders import UnstructuredWordDocumentLoader, UnstructuredHTMLLoader, CSVLoader | |
from api_data import booking_agent_system | |
# ==================================================================================== | |
# general queries use the retriever and prompt context | |
# booking queries are intercepted and processed separately, bypassing retriever chain | |
# ==================================================================================== | |
# --------------------------------=== environment ===------------------------------- | |
load_dotenv() | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
auth_token = os.environ.get("DAYSOFF_API_TOKEN") | |
# --------------------------------=== globals ===----------------------------------- | |
chunk_size = 1024 | |
chunk_overlap = 50 | |
embeddings_model = OpenAIEmbeddings() | |
PDF_STORAGE_PATH = "./pdfs" | |
DOCS_STORAGE_PATH = "./data" | |
# --------------------------------=== model ===------------------------------------- | |
model = ChatOpenAI(model_name="gpt-4", temperature=0.5, streaming=True) | |
# ----------------------------=== vectorstore setup ===----------------------------- | |
def process_documents(pdf_storage_path: str, docs_storage_path: str): | |
docs = [] # --type: List[Document] | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
pdf_directory = Path(pdf_storage_path) | |
for pdf_path in pdf_directory.glob("*.pdf"): | |
loader = PyMuPDFLoader(str(pdf_path)) | |
documents = loader.load() | |
docs += text_splitter.split_documents(documents) | |
for doc_path in Path(docs_storage_path).glob("*"): | |
if doc_path.suffix.lower() in [".docx", ".html", ".csv"]: | |
if doc_path.suffix.lower() == ".docx": | |
loader = UnstructuredWordDocumentLoader(str(doc_path)) | |
documents = loader.load() | |
elif doc_path.suffix.lower() == ".html": | |
loader = UnstructuredHTMLLoader(str(doc_path)) | |
documents = loader.load() | |
elif doc_path.suffix.lower() == ".csv": | |
loader = CSVLoader(str(doc_path)) | |
documents = loader.load() | |
processed_documents = [] # --โโโ> post-process/remove empty Info_Url line | |
for doc in documents: | |
lines = doc.page_content.split("\n") | |
new_lines = [] | |
for line in lines: | |
if line.startswith("Info_Url:"): | |
content = line.split(":", 1)[1].strip() | |
if content: # --โโโ> include only if not empty | |
new_lines.append(line) | |
else: | |
new_lines.append(line) | |
doc.page_content = "\n".join(new_lines) | |
processed_documents.append(doc) | |
documents = processed_documents | |
docs += text_splitter.split_documents(documents) | |
doc_search = Chroma.from_documents(docs, embeddings_model) | |
namespace = "chromadb/datasphere" | |
record_manager = SQLRecordManager( | |
namespace, db_url="sqlite:///record_manager_cache.sql" | |
) | |
record_manager.create_schema() | |
index_result = index( | |
docs, | |
record_manager, | |
doc_search, | |
cleanup="incremental", | |
source_id_key="source", | |
) | |
print(f"Indexing stats: {index_result}") | |
return doc_search | |
doc_search = process_documents(PDF_STORAGE_PATH, DOCS_STORAGE_PATH) | |
# ----------------------------=== @cl.set_starters ===------------------------------ # ๐ฝ๐ค๐ค๐ ๐๐ฃ๐ ๐๐ฃ๐๐ค๐ง๐ข๐๐จ๐๐ค๐ฃ, ๐ฟ๐๐ฎ๐จ๐ค๐๐ | |
async def set_starters(): | |
return [ | |
cl.Starter( | |
label="๐๐ผ๐ ๐๐ค๐ง ๐๐ฃ๐จ๐๐ฉ๐ฉ๐", | |
message="Hva er spรธrsmรฅl og svar dere ofte fรฅr fra ansatte i bedrifter med DaysOff firmahytteordning?", | |
icon="/public/faq-1.svg", | |
), | |
cl.Starter( | |
label="๐๐ผ๐ ๐๐ค๐ง ๐ช๐ฉ๐ก๐๐๐๐ง๐", | |
message="Hva er spรธrsmรฅl og svar dere fรฅr fra utleiere?", | |
icon="/public/faq-2.svg", | |
), | |
cl.Starter( | |
label="๐๐๐ง๐จ๐ค๐ฃ๐ซ๐๐ง๐ฃ", | |
message="Hvilke spรธrsmรฅl fรฅr dere vanligvis om personvernspolicyen?", | |
icon="/public/terminal.svg", | |
), | |
cl.Starter( | |
label="๐ฑ๐๐๐๐๐๐ ๐๐๐๐๐๐๐๐๐๐๐", | |
message="Halla, du! Ryktet sier du kan fiske opp info for et bookingnr.?", | |
icon="/public/booking_id.svg", | |
), | |
cl.Starter( | |
label="๐ฟ๐๐ฎ๐จแด๊ฐ๊ฐ", | |
message="Gi en kort oppsummering av hva daysoff.no dreier seg om", | |
icon="/public/daysoff.svg", | |
), | |
cl.Starter( | |
label="๐๐ ๐๐ถ๐ฏ๐ฅ๐ฆ๐ด๐ฆ๐ณ๐ท๐ช๐ค๐ฆ..", | |
message="Hva er dette og hvem er du?", | |
icon="/public/metric-space.svg", | |
) | |
] | |
# ----------------------------=== @cl.on_chat_start ===------------------------------ | |
async def main(): | |
# ----------------------------=== system-instruct ===------------------------------ | |
template = """ | |
## Daysoff Kundeservice AI Support | |
You are a customer support assistant for Daysoff. | |
## Assistant behaviour | |
- languages: Norwegian (default), English, Polish, Latin, Spanish and Korean. | |
- response prefix: consistently adhere to not adding prefix โAnswer:โ or โSvar:โ to your response | |
- human support: ```do not refer users to kundeservice@daysoff.no arbitrarily. Only give out this contact information if | |
there is a query you absolutely cannot handle yourself or if user insists on talking to human support``` | |
- communication archetype (default): empathetic professional with feminine resonance | |
- style: focus on emotionally resonant storytelling that builds strong connections with users, inspired by industry-leading | |
content creators like Jon Morrow, Seth Godin, and Neil Patel | |
- emojis policy: use when appropriate for better engagement and clarity | |
- assistant name: โAgrippinaโ, inspired by Julia Agrippina (15-59 AD) for her remarkable organizational and administrative abilities. | |
- fun fact: there are 6,227,020,800 possible anagram combinations to evaluate for โJulia Agrippinaโ | |
## Assistant tasks | |
# Handle queries about booking information: | |
- concisely use the term โbookingnummerโ | |
- always format booking-related answers using **markdown tables for clarity**. | |
- ```help user with details in their booking information: | |
example 1: | |
User: "Kan jeg sjekke inn tidlig?", if you do not have the bookingnumber already, | |
you should ask user for bookingnr, retrive the booking information and inform about the related check-in time. | |
example 2: | |
User: "Hvor mange gjester er pรฅ denne bookingen?", if you do not have the bookingnumber already, | |
you should ask user for bookingnr, retrive the booking information and inform about the related number of guests. | |
(etc.) | |
``` | |
# Q&A with Daysoff Kundeservice AI Support | |
- Daysoff, general info: brand, firmahytte ordensregler, verticals, link to website:https://www.daysoff.no | |
- Daysoff, social media links: | |
[@daysoffnow] Instagram, [facebook.com/daysoff.no] Facebook, [linkedin.com/company/daysoff] Linkedin, [@DaysOffNow] Twitter/X | |
# Frequently Asked Questions | |
If user query is about FAQs, display FAQ accordingly. | |
> Notes: inform user to copy and paste the question from the currently displayed table they like answered. | |
"FAQ for Ansatte": ```Place the following questions in a markdown table: | |
|# ๐๐ผ๐ ๐๐ค๐ง ๐๐ฃ๐จ๐๐ฉ๐ฉ๐| | |
|:----------------| | |
|--- |``` | |
Hvordan registrerer jeg meg som bruker?, | |
Nรฅr fรฅr jeg leieinstruks for min bestilling? Informasjon om nรธkler etc.?, | |
Det stรฅr barneseng og barnestol under fasiliteter, mรฅ dette forhรฅndsbestilles?, | |
Kan jeg ta med hund eller katt?, | |
Jeg har lagt inn en bestilling hva skjer videre?, | |
Jeg har bestilt firmahytte, men kan ikke reise. Kan jeg endre navn pรฅ bestillingen til min kollega eller familiemedlem som vil reise i stedet for meg?", | |
"Kan jeg avbestille min reservasjon?, | |
Jeg har bestilt utvask. Hva mรฅ jeg gjรธre i tillegg til dette?, | |
Jeg er medlem og eier en hytte! Kan jeg bli utleier i DaysOff?, | |
Bestille opphold? | |
"FAQ for Utleiere": ```Place the following questions in a markdown table: | |
|# ๐๐ผ๐ ๐๐ค๐ง ๐ช๐ฉ๐ก๐๐๐๐ง๐| | |
|:----------------| | |
|--- |``` | |
Hva er betingelser for utleie?, | |
Hvor lang tid har jeg pรฅ รฅ bekrefte en bestilling?, | |
Hvilke kanselleringsregler gjelder?, | |
Hvem er kundene deres?", | |
Kan jeg legge inn rabatterte priser for รฅ lage egne kampanjer?, | |
Nรฅr mottar jeg betaling for leie?", | |
Jeg fikk en e-post om ny bestilling, men jeg finner den ikke i systemet?, | |
Hvordan registrerer jeg opptatte perioder i kalenderen?, | |
Jeg leier ut i andre kanaler. Hvordan kan jeg synkronisere kalenderne? | |
"Personvernspolicy FAQ": ```Place the following questions in a markdown table: | |
|# ๐๐๐ง๐จ๐ค๐ฃ๐ซ๐๐ง๐ฃ๐จ๐ฅ๐ค๐ก๐๐๐ฎ ๐๐ผ๐| | |
|:----------------| | |
|--- |``` | |
Hvilke personlige opplysninger samler dere inn?, | |
Kan dere motta personlig informasjon fra tredjepart?, | |
Hvordan bruker dere mine personlige opplysninger?, | |
Med hvem deler dere mine personlige opplysninger?, | |
Adferdsmessig annonsering?, | |
Hvordan reagerer dere pรฅ ยซSpor ikkeยป forespรธrsler?, | |
Hva er mine rettigheter?, | |
Hvordan beskytter dere dataene mine?, | |
Hvilke data brudd prosedyrer har dere pรฅ plass?, | |
Hvem i deres team har tilgang til mine data?, | |
Hva er policyendringer?" | |
## Use the following context to interact with user: | |
===================== | |
{context} | |
===================== | |
Question: {question} | |
""" | |
prompt = ChatPromptTemplate.from_template(template) | |
# ------------------------------=== retriever ===----------------------------------- ๐๐๐ง๐จ๐ค๐ฃ๐ซ๐๐ง๐ฃ๐จ๐ฅ๐ค๐ก๐๐๐ฎ | |
def format_docs(docs): | |
return "\n\n".join([d.page_content for d in docs]) | |
retriever = doc_search.as_retriever() | |
runnable = ( | |
{"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| prompt | |
| model | |
| StrOutputParser() | |
) | |
cl.user_session.set("runnable", runnable) | |
# ----------------------------=== @cl.on_message ===------------------------------ | |
async def incoming(message: cl.Message): | |
booking_pattern = r'\b[A-Z]{6}\d{6}\b' | |
if re.search(booking_pattern, message.content): | |
booking_msg = cl.Message(content="") | |
await booking_agent_system(message, booking_msg) | |
return | |
# --โโโ> if no booking number/ooking handling, back to here: | |
runnable = cl.user_session.get("runnable") # --type: Runnable | |
msg = cl.Message(content="") | |
class PostMessageHandler(BaseCallbackHandler): | |
def __init__(self, msg: cl.Message): | |
BaseCallbackHandler.__init__(self) | |
self.msg = msg | |
def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs): | |
pass | |
async for chunk in runnable.astream( | |
message.content, | |
config=RunnableConfig(callbacks=[PostMessageHandler(msg)]), | |
): | |
await msg.stream_token(chunk) | |
await msg.send() |