Spaces:
Sleeping
Sleeping
from openai import OpenAI | |
import streamlit as st | |
from langchain_openai import ChatOpenAI | |
from langchain_openai.embeddings import OpenAIEmbeddings | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
import markdown | |
from operator import itemgetter | |
from langchain.schema.runnable import RunnablePassthrough | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain.schema import Document | |
from dotenv import load_dotenv | |
from langchain_community.vectorstores import Qdrant | |
from PIL import Image, ImageEnhance | |
from tools import sentiment_analysis_util | |
#from langchain_qdrant import Qdrant | |
import os | |
import pandas as pd | |
import numpy as np | |
import datetime | |
# App config | |
load_dotenv() | |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] | |
base_llm = ChatOpenAI(model="gpt-4o") | |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small") | |
# Page config | |
st.set_page_config( | |
page_title="Narrativ π°", | |
layout="wide", | |
initial_sidebar_state="expanded", | |
page_icon="π", | |
) | |
# Load environment variables | |
load_dotenv() | |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] | |
base_llm = ChatOpenAI(model="gpt-4o") | |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small") | |
uploaded_file = None | |
topic='employment' | |
date='2025-02-15' | |
# Custom CSS for centered content | |
st.markdown(""" | |
<style> | |
.main-container { | |
max-width: 800px; | |
margin: 0 auto; | |
padding: 20px; | |
} | |
.stSelectbox { | |
max-width: 400px; | |
margin: 0 auto; | |
} | |
/* Center all text elements */ | |
.centered-text { | |
text-align: center; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Header | |
col1, col2, col3, col4,col5 = st.columns([1, 1, 2, 1, 1]) | |
from PIL import Image, ImageEnhance | |
with col3: | |
st.markdown("<h1 class='centered-text'>Search Narrativ</h1>", unsafe_allow_html=True) | |
# Suggestions | |
topic_suggestions = [ | |
"employment", | |
"remote work", | |
"unemployment" | |
] | |
data=pd.read_csv('./data/sentiment_index_hr_index_final2.csv', | |
index_col='index', | |
parse_dates=True | |
) | |
# Convert the index to datetime, if not already done | |
data.index = pd.to_datetime(data.index) | |
# Generate a sorted list of unique dates | |
sorted_dates = sorted(pd.unique(data.index)) | |
# Format the sorted dates as string 'YYYY-MM-DD' | |
date_suggestions = [pd.Timestamp(date).strftime('%Y-%m-%d') for date in sorted_dates] | |
date_suggestions=np.append('',date_suggestions) | |
# Create centered container for search | |
# Define the allowed date range | |
start_date = datetime.date(2025, 1, 15) | |
end_date = datetime.date(2025, 1, 21) | |
sidebar=st.sidebar | |
with sidebar: | |
st.subheader("π° News") | |
topic = st.selectbox( | |
"Topic:", | |
options=[""] + topic_suggestions, | |
index=0, | |
key="topic_select", | |
placeholder="Select or type a topic..." | |
) | |
date = st.selectbox( | |
"Date (optional):", | |
options=date_suggestions, | |
index=0, | |
key="date_select", | |
placeholder="Select or type a date..." | |
) | |
date=str(date) | |
prompt = st.button("Summarize News", key="chat_button", use_container_width=True) | |
st.subheader("π Survey") | |
uploaded_file = st.file_uploader("π Upload Pulse Survey (.txt)", type="txt") | |
prompt_survey = st.button("Survey results", key="chat_button1", use_container_width=True) | |
# Handle search submission | |
if 'messages' not in st.session_state: | |
st.session_state.messages = [] | |
st.session_state.messages.append({"role": "assistant", "content": f'{date} {prompt}'}) | |
if prompt: | |
image = Image.open('./data/Sentiment_index_hr.png') | |
enhancer = ImageEnhance.Brightness(image) | |
#darker_image = enhancer.enhance(0.5) # Adjust the brightness factor as needed | |
st.image(image, output_format="PNG", clamp=True) | |
if date: | |
try: | |
data=pd.read_csv('./data/sentiment_index_hr_index_final2.csv', | |
index_col='index', | |
parse_dates=True, | |
infer_datetime_format=True | |
) | |
data = data.loc[data.index == date] | |
filtered_data = data[data.apply(lambda row: row.astype(str).str.contains(topic, na=False).any(), axis=1)] | |
data_all = filtered_data.values.flatten() | |
docs = data_all | |
if len(docs)<1: | |
st.warning("No articles found that contain the prompt string.") | |
# Create markdown formatted text from the matching articles. | |
# docs_text = "\n".join([f"- {article}" for article in data_prompt if article]) | |
# docs = [Document(page_content=docs_text)] | |
except Exception as e: | |
st.write('Please, enter a topic into the side panel.') | |
else: | |
try: | |
data = pd.read_csv( | |
'./data/sentiment_index_hr_index_final2.csv', | |
index_col='index', | |
parse_dates=True, | |
infer_datetime_format=True | |
) | |
filtered_data = data[data.apply(lambda row: row.astype(str).str.contains(topic, na=False).any(), axis=1)] | |
if len(filtered_data)<1: | |
filtered_data=data[data.apply(lambda row: row.astype(str), axis=1)] | |
data_all = filtered_data.values.flatten() | |
docs = data_all | |
if len(docs)<1: | |
st.warning("No articles found that contain the prompt string.") | |
except Exception as e: | |
st.write('Please, enter a topic into the side panel.') | |
# scrape in real time reddit news | |
reddit_news_articles=sentiment_analysis_util.fetch_reddit_news('') | |
docs_text = "\n".join([f"- {value}" for value in data_all if not pd.isna(value)]) | |
docs_text_reddit = "\n".join([f"- {value}" for value in reddit_news_articles if not pd.isna(value)]) | |
docs_text=docs_text+'\n'+'Reddit news:'+'\n'+docs_text_reddit | |
docs = [Document(page_content=docs_text)] | |
with open('./data/reddit.txt', 'w') as file: | |
file.write(docs_text_reddit) | |
split_documents = RecursiveCharacterTextSplitter.from_tiktoken_encoder( | |
chunk_size=1000, | |
chunk_overlap=20 | |
).split_documents(docs) | |
vectorstore = Qdrant.from_documents( | |
split_documents, | |
embedding_model, | |
location=":memory:", | |
collection_name="langchainblogs" | |
) | |
retriever = vectorstore.as_retriever() | |
print("Loaded Vectorstore") | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": topic}) | |
# Display user message in chat message container | |
with st.chat_message("user"): | |
st.markdown(topic) | |
# Generate summarized message rationalize dominant sentiment | |
RAG_PROMPT ="""You are an HR analyst specializing in employment trends, workforce dynamics, and remote work adoption. Your task is to analyze news articles provided by a client on a specific topic. You will receive the full text of relevant articles, along with key data points. Your goal is to evaluate labor market conditions and provide insights into workforce changes. | |
Your Tasks: | |
1. Summarize Opinions: | |
Extract the key opinions and perspectives from the provided news articles, reddit posts and linkedin posts. | |
The news articles will include: title, URL, date, text, article source, sentiment index created by the company, sentiment index using HF (Hugging Face) model, and confidence for the HF index. | |
The reddit posts will include: title, URL, date, text. | |
Highlight any significant patterns, agreements, or disagreements across sources regarding job trends, hiring, layoffs, wages, or remote work policies. | |
Include sentiment from reddit articles! Explicitly mention the reddit source in the summary. | |
2. Analyze Sentiment: | |
Determine the overall sentiment (positive, negative, neutral) about labor market conditions based on the extracted opinions. | |
Provide a clear explanation of your sentiment conclusion, referencing specific points or trends from the articles. | |
3. Provide Chain-of-Thought Reasoning: | |
Detail your reasoning process step by step. Explain how you interpreted the articles, derived insights, and reached your sentiment conclusion. | |
Ensure the reasoning is logical, transparent, and grounded in the content provided. | |
4. Collect URL Sources: | |
From the provided context, select 5 critical and recent URL sources related to labor market trends and remote work policies. | |
Output Format: | |
Summary of Opinions: [Concise summary of key opinions] | |
Sentiment Analysis: | |
Sentiment: [Positive/Negative/Neutral] | |
Reasoning: [Detailed explanation here] | |
Chain-of-Thought Reasoning: [Step-by-step explanation] | |
Sources: [URLs for 5 most critical and recent articles on this topic] | |
Guidelines: | |
Maintain objectivity and precision in your analysis. | |
Focus on labor market trends, job market shifts, and remote work dynamics. | |
Use professional and analytical language suitable for client reports. | |
Respond in the language of the article (mostly English). | |
CONTEXT: | |
{context} | |
QUERY: | |
{question} | |
Use the provided context to answer the user's question. Only use the provided context to answer the question. If you do not know the answer, respond with "I don't know.""" | |
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT) | |
# RAG CHAIN | |
lcel_rag_chain = ( | |
{"context": itemgetter("question") | retriever, "question": itemgetter("question")} | |
| RunnablePassthrough.assign(context=itemgetter("context")) | |
| {"response": rag_prompt | base_llm, "context": itemgetter("context")} | |
) | |
try: | |
summary = lcel_rag_chain.invoke({"question": topic}) | |
print(summary) | |
st.chat_message("assistant").write((summary['response'].content)) | |
except Exception as e: | |
st.error(f"Error generating summary: {e}") | |
if date: | |
with open('./data/sentiment_index_hr_index_final_date.md', 'w') as file: | |
file.write(str(data_all)) | |
else: | |
with open('./data/sentiment_index_hr_index_final1.md', 'w') as file: | |
file.write(str(data_all)) | |
if prompt_survey: | |
import survey_summary | |
st.session_state['uploaded_file'] = uploaded_file | |
analysis = survey_summary.survey_agent('',uploaded_file) | |
st.chat_message("assistant").write(str(analysis)) | |
client = OpenAI(api_key=OPENAI_API_KEY) | |
if "openai_model" not in st.session_state: | |
st.session_state["openai_model"] = "gpt-4o" | |
prompt1 = st.chat_input("Type your additional questions here...") | |
# Suggested keywords with enhanced styling | |
suggested_keywords = ["Latest News", "News on remote work", f"Survey sentiment", f"Employee satisfaction", f"How many employees?"] | |
st.markdown("**Suggested Keywords:**") | |
cols = st.columns(len(suggested_keywords)) | |
for idx, keyword in enumerate(suggested_keywords): | |
if cols[idx].button(keyword, key=keyword): | |
prompt1 = keyword | |
if prompt1: | |
st.session_state.messages.append({"role": "user", "content": prompt1}) | |
with open('./data/employee_pulse_survey.txt', 'r') as file: | |
survey_txt = file.read() | |
# Decide if call SQL agent, SURVEY agent or SENTIMENT agent | |
database_columns=pd.read_csv('./data/hr_data.csv').columns | |
response = base_llm.invoke(f"""You are the Supervisor of the company. In your team you have, general conversation analyst, data analyst, survey analyst and news article analyst. | |
If the question {prompt1} can be answered from the history of the conversation:{st.session_state.messages[-10:]} or you can use your knowledge and do not need to call the team members, respond 'history'. | |
If not: decide if the question: '{prompt1}' is about data available in the database, based on the following columns: {database_columns}, it has information about all employees. If yes, respond 'data'. | |
If not: decide if the question is asking about the survey: {survey_txt}. If yes, respond 'survey'. | |
If not: decide if the question is asking about news articles on employment trends or remote work. If yes, respond 'news'. | |
Your response will be either 'history' or 'data' or 'survey' or 'news'. | |
Don't answer anything else. | |
Survey: {survey_txt}""") | |
st.write(response.content) | |
if 'data' in response.content.lower(): | |
# SQL AGENT | |
import sql_agent | |
response = sql_agent.sql_agent(f'the question is: {prompt1} and the history is: {st.session_state.messages[-10:]}') | |
st.session_state.messages.append({"role": "sql_agent", "content": response}) | |
elif 'news' in response.content.lower(): | |
# SENTIMENT AGENT | |
if date: | |
file_path = f'./data/sentiment_index_hr_index_final_date.md' | |
else: | |
file_path = f'./data/sentiment_index_hr_index_final1.md' | |
try: | |
with open(file_path, "r", encoding="utf-8") as file_content: | |
docs = file_content.read() | |
except Exception as e: | |
st.error(f"Error loading context: {e}") | |
docs = "" | |
# Display user message in chat message container | |
response = base_llm.invoke(f"""You are a data analyst, the question is: {prompt1}, the conversation history is: {st.session_state.messages[-10:]} and the context is from {docs}""") | |
st.session_state.messages.append({"role": "news_agent", "content": response}) | |
# st.chat_message("assistant").write(str(response)) | |
elif 'survey' in response.content.lower(): | |
# SURVEY AGENT | |
with open('./data/employee_pulse_survey.txt', 'r') as file: | |
survey_text = file.read() | |
import survey_agent1 | |
response = survey_agent1.analyze_survey_document(survey_text, f'the question is: {prompt1} and the history is: {st.session_state.messages[-10:]}') | |
st.session_state.messages.append({"role": "survey_agent", "content": response}) | |
# st.chat_message("assistant").write(str(response)) | |
# Go back to the MAIN SUPERVISOR | |
# Display user message in chat message container | |
print('History:',st.session_state.messages[-10:]) | |
response = base_llm.invoke(f"""You are a supervisor, who collects the answers from the team and give the final answer to the user. | |
Take the last response, 'response', from your team member: SQL agent, SURVEY agent or SENTIMENT agent and give the final answer to the user. | |
The user's question is: {prompt1}, | |
the responses from the team are: {st.session_state.messages[-10:]}""") | |
st.chat_message("supervisor").write(str(response.content)) | |
st.session_state.messages.append({"role": "supervisor", "content": response.content}) | |
# with st.chat_message("user"): | |
# st.markdown(prompt1) | |
# # Display assistant response in chat message container | |
# with st.chat_message("assistant"): | |
# try: | |
# stream = client.chat.completions.create( | |
# model=st.session_state["openai_model"], | |
# messages=[ | |
# {"role": m["role"], "content": m["content"]} | |
# for m in st.session_state.messages[:-10] | |
# ], | |
# stream=True, | |
# ) | |
# response = st.write_stream(stream) | |
# st.session_state.messages.append({"role": "supervisor", "content": response}) | |
# except Exception as e: | |
# st.error(f"Error generating response: {e}") | |