Spaces:
Sleeping
Sleeping
import os | |
import logging | |
from langchain_community.retrievers import BM25Retriever, TavilySearchAPIRetriever | |
from langchain.retrievers import EnsembleRetriever | |
from langchain_core.output_parsers import StrOutputParser | |
from basic_chain import get_model | |
from rag_chain import make_rag_chain | |
from remote_loader import load_web_page | |
from splitter import split_documents | |
from vector_store import create_vector_db | |
from dotenv import load_dotenv | |
def ensemble_retriever_from_docs(docs, embeddings=None): | |
texts = split_documents(docs) | |
vs = create_vector_db(texts, embeddings) | |
vs_retriever = vs.as_retriever() | |
bm25_retriever = BM25Retriever.from_texts([t.page_content for t in texts]) | |
# tavily_retriever = TavilySearchAPIRetriever(k=3, include_domains=['https://ilibrary.ru/text/107']) | |
tavily_retriever = MyTavilySearchAPIRetriever(k=3, include_domains=['https://equitygroupholdings.com']) | |
ensemble_retriever = EnsembleRetriever( | |
retrievers=[bm25_retriever, vs_retriever, tavily_retriever], | |
weights=[0.5, 0.5, 0.5]) | |
return ensemble_retriever | |
class MyTavilySearchAPIRetriever(TavilySearchAPIRetriever): | |
def _get_relevant_documents( | |
self, query: str, *, run_manager | |
): | |
try: | |
return super()._get_relevant_documents(query, run_manager=run_manager) | |
except Exception as e: | |
logging.error(f"TavilySearch error: {e}") | |
return [] | |
def main(): | |
load_dotenv() | |
problems_of_philosophy_by_russell = "https://www.gutenberg.org/ebooks/5827.html.images" | |
docs = load_web_page(problems_of_philosophy_by_russell) | |
ensemble_retriever = ensemble_retriever_from_docs(docs) | |
model = get_model("ChatGPT") | |
chain = make_rag_chain(model, ensemble_retriever) | StrOutputParser() | |
result = chain.invoke("What are the key problems of philosophy according to Russell?") | |
print(result) | |
if __name__ == "__main__": | |
# this is to quite parallel tokenizers warning. | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
main() | |