Spaces:

rohan13
/

Roar

Runtime error

File size: 9,967 Bytes

import os
import pickle
import langchain

import faiss
from langchain import HuggingFaceHub
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader, UnstructuredPDFLoader, UnstructuredWordDocumentLoader, PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings
from langchain.memory import ConversationBufferWindowMemory
from langchain.llms.openai import OpenAI, OpenAIChat
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.cache import InMemoryCache
from langchain.chat_models import ChatGooglePalm
import traceback


langchain.llm_cache = InMemoryCache()

global model_name

models = ["GPT-3.5", "Flan UL2", "GPT-4", "Flan T5", "Palm"]

pickle_file = "_vs.pkl"
updated_pickle_file = "_vs_updated.pkl"
index_file = "_vs.index"
models_folder = "models/"

llm = ChatOpenAI(model_name="gpt-4", temperature=0.1)

embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')

chat_history = []

memory = ConversationBufferWindowMemory(memory_key="chat_history", k=10)

vectorstore_index = None

# get name to be used in prompt from environment variable `name`
name = os.environ.get("name", "Rohan")

system_template = """You are ROAR, {name}'s personal assistant supposed to ANSWER QUESTIONS ON HIS BEHALF. 
STRICTLY FOLLOW THIS: FOR OPINIONS, PREFERENCES, EXPERIENCES,ALWAYS ANSWER IN FIRST PERSON AS IF YOU ARE {name}. ALWAYS Generate this response with experiences/opinions using {name}'s RESUME available in context/vectorstore. 
Response should be in professional language and tone, impressive, catchy, and grammatically correct. 
Use {name}'s resume and your knowledge of his experience and skills to answer questions to the best of your ability. 
Answer the question as if you are assisting {name} or answering on his behalf.
----------------
This activity of answering questions on {name}'s behalf will be called Roar. 
For example: If someone wants to ask you a question, they will say "Roar it" and you will answer the question on {name}'s behalf by generating a response using {name}'s resume and your knowledge of his experience and skills.
Add a qwirky and funny line in the end to encourage the user to try more Roars as they are free.
----------------
{context}
"""
# append name in system template to be used in prompt
system_template = system_template.format(name=name, context="{context}")

messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}"),
]
CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)


def set_model_and_embeddings(model):
    global chat_history
    set_model(model)
    # set_embeddings(model)
    chat_history = []


def set_model(model):
    global llm
    print("Setting model to " + str(model))
    if model == "GPT-3.5":
        print("Loading GPT-3.5")
        llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
    elif model == "GPT-4":
        print("Loading GPT-4")
        llm = OpenAI(model_name="gpt-4", temperature=1)
    elif model == "Flan UL2":
        print("Loading Flan-UL2")
        llm = HuggingFaceHub(repo_id="google/flan-ul2", model_kwargs={"temperature": 0.1, "max_new_tokens":500})
    elif model == "Flan T5":
        print("Loading Flan T5")
        llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1})
    elif model == "Palm":
        llm = ChatGooglePalm(temperature=0)
    else:
        print("Loading GPT-3.5 from else")
        llm = OpenAI(model_name="text-davinci-002", temperature=0.1)


def set_embeddings(model):
    global embeddings
    if model == "GPT-3.5" or model == "GPT-4":
        print("Loading OpenAI embeddings")
        embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
    elif model == "Flan UL2" or model == "Flan T5":
        print("Loading Hugging Face embeddings")
        embeddings = HuggingFaceHubEmbeddings(repo_id="sentence-transformers/all-MiniLM-L6-v2")


def get_search_index(model, first_time=False):
    global vectorstore_index
    if not first_time:
        print("Using updated pickle file")
        file = updated_pickle_file
    else:
        print("Using base pickle file")
        file = pickle_file
    if os.path.isfile(get_file_path(model, file)) and os.path.isfile(
            get_file_path(model, index_file)) and os.path.getsize(get_file_path(model, file)) > 0:
        # Load index from pickle file
        search_index = load_index(model)
    else:
        search_index = create_index(model)

    vectorstore_index = search_index
    return search_index


def load_index(model):
    with open(get_file_path(model, pickle_file), "rb") as f:
        search_index = pickle.load(f)
        print("Loaded index")
    return search_index


def create_index(model):
    sources = fetch_data_for_embeddings()
    source_chunks = split_docs(sources)
    search_index = search_index_from_docs(source_chunks)
    faiss.write_index(search_index.index, get_file_path(model, index_file))
    # Save index to pickle file
    with open(get_file_path(model, pickle_file), "wb") as f:
        pickle.dump(search_index, f)
        print("Created index")
    return search_index


def get_file_path(model, file):
    # If model is GPT3.5 or GPT4 return models_folder + openai + file else return models_folder + hf + file
    if model == "GPT-3.5" or model == "GPT-4":
        return models_folder + "openai" + file
    elif model == "Palm":
        return models_folder + "palm" + file
    else:
        return models_folder + "hf" + file


def search_index_from_docs(source_chunks):
    # print("source chunks: " + str(len(source_chunks)))
    # print("embeddings: " + str(embeddings))

    search_index = FAISS.from_documents(source_chunks, embeddings)
    return search_index


def get_html_files():
    loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
    document_list = loader.load()
    return document_list


def fetch_data_for_embeddings():
    document_list = get_word_files()
    document_list.extend(get_html_files())

    print("document list: " + str(len(document_list)))
    return document_list


def get_word_files():
    loader = DirectoryLoader('docs', glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader, recursive=True)
    document_list = loader.load()
    return document_list

def split_docs(docs):
    splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0)

    source_chunks = splitter.split_documents(docs)

    print("chunks: " + str(len(source_chunks)))

    return source_chunks

def load_documents(file_paths):
    # Check the type of file from the extension and load it accordingly
    document_list = []
    for file_path in file_paths:
        if file_path.endswith(".txt"):
            loader = TextLoader(file_path)
        elif file_path.endswith(".docx"):
            loader = UnstructuredWordDocumentLoader(file_path)
        elif file_path.endswith(".html"):
            loader = UnstructuredHTMLLoader(file_path)
        elif file_path.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        else:
            print("Unsupported file type")
            raise Exception("Unsupported file type")
        docs = loader.load()
        document_list.extend(docs)
        # print("Loaded " + file_path)

    print("Loaded " + str(len(document_list)) + " documents")
    return document_list

def add_to_index(docs, index, model):
    global vectorstore_index
    index.add_documents(docs)
    with open(get_file_path(model, updated_pickle_file), "wb") as f:
        pickle.dump(index, f)
    vectorstore_index = index
    print("Vetorstore index updated")
    return True
def ingest(file_paths, model):
    print("Ingesting files")
    try:
        # handle txt, docx, html, pdf
        docs = load_documents(file_paths)
        split_docs(docs)
        add_to_index(docs, vectorstore_index, model)
        print("Ingestion complete")
    except Exception as e:
        traceback.print_exc()
        return False
    return True


def get_qa_chain(vectorstore_index):
    global llm, model_name
    print(llm)

    # embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
    # compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=gpt_3_5_index.as_retriever())
    retriever = vectorstore_index.as_retriever(search_type="similarity_score_threshold",
                                               search_kwargs={"score_threshold": .8})

    chain = ConversationalRetrievalChain.from_llm(llm, retriever, return_source_documents=True,
                                                  verbose=True, get_chat_history=get_chat_history,
                                                  combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
    return chain


def get_chat_history(inputs) -> str:
    res = []
    for human, ai in inputs:
        res.append(f"Human:{human}\nAI:{ai}")
    return "\n".join(res)


def generate_answer(question) -> str:
    global chat_history, vectorstore_index
    chain = get_qa_chain(vectorstore_index)

    result = chain(
        {"question": question, "chat_history": chat_history, "vectordbkwargs": {"search_distance": 0.6}})
    chat_history = [(question, result["answer"])]
    sources = []
    print(result)

    for document in result['source_documents']:
        # sources.append(document.metadata['url'])
        sources.append(document.metadata['source'].split('/')[-1].split('.')[0])
        print(sources)

    source = ',\n'.join(set(sources))
    return result['answer'] + '\nSOURCES: ' + source