Spaces:
Sleeping
Sleeping
adding files
Browse files- .DS_Store +0 -0
- README.md +56 -7
- app_social.py +383 -0
- data/sentiment_index_hr.md +0 -0
- requirements.txt +12 -0
- sql_agent.py +34 -0
- survey_agent1.py +125 -0
- survey_summary.py +93 -0
- tools/.DS_Store +0 -0
- tools/__pycache__/sentiment_analysis_util.cpython-311.pyc +0 -0
- tools/sentiment_analysis_util.py +260 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
README.md
CHANGED
@@ -1,10 +1,59 @@
|
|
1 |
---
|
2 |
-
title: Narrativ
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
7 |
-
|
8 |
---
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Narrativ
|
3 |
+
emoji: 📉
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.37.1 # The latest supported version
|
8 |
---
|
9 |
|
10 |
+
|
11 |
+
# Narrativ - Employment Trends and Employee Satisfaction Analyzer
|
12 |
+
|
13 |
+
**Narrativ** aggregates and processes data related to employment trends and employee satisfaction from various sources, including internal HR databases and pulse surveys. The app analyzes the data, provides insights into employee satisfaction, and summarizes key trends in employment.
|
14 |
+
|
15 |
+
## Features:
|
16 |
+
|
17 |
+
- **Data Aggregation**: The app collects data from multiple sources, including internal HR databases and employee pulse surveys.
|
18 |
+
- **Employee Satisfaction Analysis**: Each survey response is analyzed to gauge employee satisfaction, with an index indicating overall satisfaction levels.
|
19 |
+
- **Trend Summarization**: The app provides concise summaries of employment trends, highlighting key information about workforce dynamics.
|
20 |
+
- **Comprehensive Insights**: The app generates a report with satisfaction analysis and key employment trends for quick understanding.
|
21 |
+
|
22 |
+
## Supported Sources:
|
23 |
+
|
24 |
+
- **Internal HR Databases**: Data from company HR systems.
|
25 |
+
- **Pulse Surveys**: Regular surveys conducted to assess employee satisfaction and engagement.
|
26 |
+
|
27 |
+
## Tasks:
|
28 |
+
|
29 |
+
### 1. Aggregate Data:
|
30 |
+
- The app collects data from the above sources.
|
31 |
+
- It processes the data for further analysis.
|
32 |
+
|
33 |
+
### 2. Satisfaction Analysis:
|
34 |
+
- The app applies analysis algorithms to determine overall employee satisfaction levels.
|
35 |
+
- It generates a satisfaction index for each survey, reflecting the strength of employee sentiment.
|
36 |
+
|
37 |
+
### 3. Summarize Trends:
|
38 |
+
- The app extracts and summarizes the most important employment trends or key points discussed in the data.
|
39 |
+
- The summary highlights major workforce dynamics and provides a quick overview of the trends.
|
40 |
+
|
41 |
+
### 4. Display Insights:
|
42 |
+
- Each survey’s satisfaction index and trend summary are displayed, making it easy to analyze and understand workforce dynamics.
|
43 |
+
|
44 |
+
## Output Format:
|
45 |
+
|
46 |
+
- **Data Source:** [Name of source, e.g., Internal HR Database, Pulse Survey]
|
47 |
+
- **Title:** [Title of the report]
|
48 |
+
- **Satisfaction Index:** [Satisfaction index created by HR Assistant]
|
49 |
+
- **Summary:** [Concise summary of key employment trends]
|
50 |
+
- **Full Report:** [Link to the full report or the report body]
|
51 |
+
|
52 |
+
## Installation:
|
53 |
+
|
54 |
+
1. Clone the repository:
|
55 |
+
```bash
|
56 |
+
git clone <repository-url>
|
57 |
+
|
58 |
+
### 💬 Feedback
|
59 |
+
Try it out!
|
app_social.py
ADDED
@@ -0,0 +1,383 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
import streamlit as st
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
5 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
6 |
+
import markdown
|
7 |
+
from operator import itemgetter
|
8 |
+
from langchain.schema.runnable import RunnablePassthrough
|
9 |
+
from langchain_core.prompts import ChatPromptTemplate
|
10 |
+
from langchain.schema import Document
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
from langchain_community.vectorstores import Qdrant
|
13 |
+
from PIL import Image, ImageEnhance
|
14 |
+
from tools import sentiment_analysis_util
|
15 |
+
#from langchain_qdrant import Qdrant
|
16 |
+
import os
|
17 |
+
import pandas as pd
|
18 |
+
import numpy as np
|
19 |
+
import datetime
|
20 |
+
|
21 |
+
|
22 |
+
# App config
|
23 |
+
load_dotenv()
|
24 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
25 |
+
base_llm = ChatOpenAI(model="gpt-4o")
|
26 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
27 |
+
|
28 |
+
# Page config
|
29 |
+
|
30 |
+
st.set_page_config(
|
31 |
+
page_title="Narrativ 📰",
|
32 |
+
layout="wide",
|
33 |
+
initial_sidebar_state="expanded",
|
34 |
+
page_icon="🔍",
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
# Load environment variables
|
39 |
+
load_dotenv()
|
40 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
41 |
+
base_llm = ChatOpenAI(model="gpt-4o")
|
42 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
43 |
+
uploaded_file = None
|
44 |
+
topic='employment'
|
45 |
+
date='2025-02-15'
|
46 |
+
|
47 |
+
# Custom CSS for centered content
|
48 |
+
st.markdown("""
|
49 |
+
<style>
|
50 |
+
.main-container {
|
51 |
+
max-width: 800px;
|
52 |
+
margin: 0 auto;
|
53 |
+
padding: 20px;
|
54 |
+
}
|
55 |
+
|
56 |
+
.stSelectbox {
|
57 |
+
max-width: 400px;
|
58 |
+
margin: 0 auto;
|
59 |
+
}
|
60 |
+
|
61 |
+
/* Center all text elements */
|
62 |
+
.centered-text {
|
63 |
+
text-align: center;
|
64 |
+
}
|
65 |
+
</style>
|
66 |
+
""", unsafe_allow_html=True)
|
67 |
+
|
68 |
+
# Header
|
69 |
+
col1, col2, col3, col4,col5 = st.columns([1, 1, 2, 1, 1])
|
70 |
+
from PIL import Image, ImageEnhance
|
71 |
+
|
72 |
+
with col3:
|
73 |
+
st.markdown("<h1 class='centered-text'>Search Narrativ</h1>", unsafe_allow_html=True)
|
74 |
+
|
75 |
+
|
76 |
+
# Suggestions
|
77 |
+
topic_suggestions = [
|
78 |
+
"employment",
|
79 |
+
"remote work",
|
80 |
+
"unemployment"
|
81 |
+
]
|
82 |
+
|
83 |
+
data=pd.read_csv('./data/sentiment_index_hr_index_final2.csv',
|
84 |
+
index_col='index',
|
85 |
+
parse_dates=True
|
86 |
+
)
|
87 |
+
|
88 |
+
# Convert the index to datetime, if not already done
|
89 |
+
data.index = pd.to_datetime(data.index)
|
90 |
+
|
91 |
+
# Generate a sorted list of unique dates
|
92 |
+
sorted_dates = sorted(pd.unique(data.index))
|
93 |
+
|
94 |
+
# Format the sorted dates as string 'YYYY-MM-DD'
|
95 |
+
date_suggestions = [pd.Timestamp(date).strftime('%Y-%m-%d') for date in sorted_dates]
|
96 |
+
date_suggestions=np.append('',date_suggestions)
|
97 |
+
# Create centered container for search
|
98 |
+
# Define the allowed date range
|
99 |
+
start_date = datetime.date(2025, 1, 15)
|
100 |
+
end_date = datetime.date(2025, 1, 21)
|
101 |
+
|
102 |
+
sidebar=st.sidebar
|
103 |
+
|
104 |
+
with sidebar:
|
105 |
+
st.subheader("📰 News")
|
106 |
+
topic = st.selectbox(
|
107 |
+
"Topic:",
|
108 |
+
options=[""] + topic_suggestions,
|
109 |
+
index=0,
|
110 |
+
key="topic_select",
|
111 |
+
placeholder="Select or type a topic..."
|
112 |
+
)
|
113 |
+
|
114 |
+
date = st.selectbox(
|
115 |
+
"Date (optional):",
|
116 |
+
options=date_suggestions,
|
117 |
+
index=0,
|
118 |
+
key="date_select",
|
119 |
+
placeholder="Select or type a date..."
|
120 |
+
)
|
121 |
+
date=str(date)
|
122 |
+
prompt = st.button("Summarize News", key="chat_button", use_container_width=True)
|
123 |
+
|
124 |
+
st.subheader("📊 Survey")
|
125 |
+
uploaded_file = st.file_uploader("📂 Upload Pulse Survey (.txt)", type="txt")
|
126 |
+
prompt_survey = st.button("Survey results", key="chat_button1", use_container_width=True)
|
127 |
+
|
128 |
+
# Handle search submission
|
129 |
+
|
130 |
+
if 'messages' not in st.session_state:
|
131 |
+
st.session_state.messages = []
|
132 |
+
|
133 |
+
st.session_state.messages.append({"role": "assistant", "content": f'{date} {prompt}'})
|
134 |
+
|
135 |
+
|
136 |
+
if prompt:
|
137 |
+
image = Image.open('./data/Sentiment_index_hr.png')
|
138 |
+
enhancer = ImageEnhance.Brightness(image)
|
139 |
+
#darker_image = enhancer.enhance(0.5) # Adjust the brightness factor as needed
|
140 |
+
st.image(image, output_format="PNG", clamp=True)
|
141 |
+
|
142 |
+
if date:
|
143 |
+
try:
|
144 |
+
data=pd.read_csv('./data/sentiment_index_hr_index_final2.csv',
|
145 |
+
index_col='index',
|
146 |
+
parse_dates=True,
|
147 |
+
infer_datetime_format=True
|
148 |
+
)
|
149 |
+
|
150 |
+
data = data.loc[data.index == date]
|
151 |
+
filtered_data = data[data.apply(lambda row: row.astype(str).str.contains(topic, na=False).any(), axis=1)]
|
152 |
+
data_all = filtered_data.values.flatten()
|
153 |
+
docs = data_all
|
154 |
+
if len(docs)<1:
|
155 |
+
st.warning("No articles found that contain the prompt string.")
|
156 |
+
|
157 |
+
# Create markdown formatted text from the matching articles.
|
158 |
+
# docs_text = "\n".join([f"- {article}" for article in data_prompt if article])
|
159 |
+
# docs = [Document(page_content=docs_text)]
|
160 |
+
except Exception as e:
|
161 |
+
st.write('Please, enter a topic into the side panel.')
|
162 |
+
|
163 |
+
else:
|
164 |
+
try:
|
165 |
+
data = pd.read_csv(
|
166 |
+
'./data/sentiment_index_hr_index_final2.csv',
|
167 |
+
index_col='index',
|
168 |
+
parse_dates=True,
|
169 |
+
infer_datetime_format=True
|
170 |
+
)
|
171 |
+
filtered_data = data[data.apply(lambda row: row.astype(str).str.contains(topic, na=False).any(), axis=1)]
|
172 |
+
if len(filtered_data)<1:
|
173 |
+
filtered_data=data[data.apply(lambda row: row.astype(str), axis=1)]
|
174 |
+
data_all = filtered_data.values.flatten()
|
175 |
+
docs = data_all
|
176 |
+
if len(docs)<1:
|
177 |
+
st.warning("No articles found that contain the prompt string.")
|
178 |
+
|
179 |
+
except Exception as e:
|
180 |
+
st.write('Please, enter a topic into the side panel.')
|
181 |
+
|
182 |
+
# scrape in real time reddit news
|
183 |
+
reddit_news_articles=sentiment_analysis_util.fetch_reddit_news('')
|
184 |
+
docs_text = "\n".join([f"- {value}" for value in data_all if not pd.isna(value)])
|
185 |
+
docs_text_reddit = "\n".join([f"- {value}" for value in reddit_news_articles if not pd.isna(value)])
|
186 |
+
docs_text=docs_text+'\n'+'Reddit news:'+'\n'+docs_text_reddit
|
187 |
+
docs = [Document(page_content=docs_text)]
|
188 |
+
|
189 |
+
with open('./data/reddit.txt', 'w') as file:
|
190 |
+
file.write(docs_text_reddit)
|
191 |
+
|
192 |
+
split_documents = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
193 |
+
chunk_size=1000,
|
194 |
+
chunk_overlap=20
|
195 |
+
).split_documents(docs)
|
196 |
+
|
197 |
+
vectorstore = Qdrant.from_documents(
|
198 |
+
split_documents,
|
199 |
+
embedding_model,
|
200 |
+
location=":memory:",
|
201 |
+
collection_name="langchainblogs"
|
202 |
+
)
|
203 |
+
|
204 |
+
retriever = vectorstore.as_retriever()
|
205 |
+
|
206 |
+
print("Loaded Vectorstore")
|
207 |
+
|
208 |
+
# Add user message to chat history
|
209 |
+
st.session_state.messages.append({"role": "user", "content": topic})
|
210 |
+
# Display user message in chat message container
|
211 |
+
with st.chat_message("user"):
|
212 |
+
st.markdown(topic)
|
213 |
+
|
214 |
+
# Generate summarized message rationalize dominant sentiment
|
215 |
+
RAG_PROMPT ="""You are an HR analyst specializing in employment trends, workforce dynamics, and remote work adoption. Your task is to analyze news articles provided by a client on a specific topic. You will receive the full text of relevant articles, along with key data points. Your goal is to evaluate labor market conditions and provide insights into workforce changes.
|
216 |
+
Your Tasks:
|
217 |
+
1. Summarize Opinions:
|
218 |
+
|
219 |
+
Extract the key opinions and perspectives from the provided news articles, reddit posts and linkedin posts.
|
220 |
+
The news articles will include: title, URL, date, text, article source, sentiment index created by the company, sentiment index using HF (Hugging Face) model, and confidence for the HF index.
|
221 |
+
The reddit posts will include: title, URL, date, text.
|
222 |
+
Highlight any significant patterns, agreements, or disagreements across sources regarding job trends, hiring, layoffs, wages, or remote work policies.
|
223 |
+
Include sentiment from reddit articles! Explicitly mention the reddit source in the summary.
|
224 |
+
|
225 |
+
2. Analyze Sentiment:
|
226 |
+
|
227 |
+
Determine the overall sentiment (positive, negative, neutral) about labor market conditions based on the extracted opinions.
|
228 |
+
Provide a clear explanation of your sentiment conclusion, referencing specific points or trends from the articles.
|
229 |
+
|
230 |
+
3. Provide Chain-of-Thought Reasoning:
|
231 |
+
|
232 |
+
Detail your reasoning process step by step. Explain how you interpreted the articles, derived insights, and reached your sentiment conclusion.
|
233 |
+
Ensure the reasoning is logical, transparent, and grounded in the content provided.
|
234 |
+
|
235 |
+
4. Collect URL Sources:
|
236 |
+
|
237 |
+
From the provided context, select 5 critical and recent URL sources related to labor market trends and remote work policies.
|
238 |
+
|
239 |
+
Output Format:
|
240 |
+
|
241 |
+
Summary of Opinions: [Concise summary of key opinions]
|
242 |
+
Sentiment Analysis:
|
243 |
+
Sentiment: [Positive/Negative/Neutral]
|
244 |
+
Reasoning: [Detailed explanation here]
|
245 |
+
Chain-of-Thought Reasoning: [Step-by-step explanation]
|
246 |
+
Sources: [URLs for 5 most critical and recent articles on this topic]
|
247 |
+
|
248 |
+
Guidelines:
|
249 |
+
|
250 |
+
Maintain objectivity and precision in your analysis.
|
251 |
+
Focus on labor market trends, job market shifts, and remote work dynamics.
|
252 |
+
Use professional and analytical language suitable for client reports.
|
253 |
+
Respond in the language of the article (mostly English).
|
254 |
+
|
255 |
+
CONTEXT:
|
256 |
+
|
257 |
+
{context}
|
258 |
+
QUERY:
|
259 |
+
|
260 |
+
{question}
|
261 |
+
|
262 |
+
Use the provided context to answer the user's question. Only use the provided context to answer the question. If you do not know the answer, respond with "I don't know."""
|
263 |
+
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
|
264 |
+
# RAG CHAIN
|
265 |
+
lcel_rag_chain = (
|
266 |
+
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
|
267 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
268 |
+
| {"response": rag_prompt | base_llm, "context": itemgetter("context")}
|
269 |
+
)
|
270 |
+
|
271 |
+
try:
|
272 |
+
summary = lcel_rag_chain.invoke({"question": topic})
|
273 |
+
print(summary)
|
274 |
+
st.chat_message("assistant").write((summary['response'].content))
|
275 |
+
except Exception as e:
|
276 |
+
st.error(f"Error generating summary: {e}")
|
277 |
+
|
278 |
+
if date:
|
279 |
+
with open('./data/sentiment_index_hr_index_final_date.md', 'w') as file:
|
280 |
+
file.write(str(data_all))
|
281 |
+
else:
|
282 |
+
with open('./data/sentiment_index_hr_index_final1.md', 'w') as file:
|
283 |
+
file.write(str(data_all))
|
284 |
+
|
285 |
+
if prompt_survey:
|
286 |
+
import survey_summary
|
287 |
+
st.session_state['uploaded_file'] = uploaded_file
|
288 |
+
analysis = survey_summary.survey_agent('',uploaded_file)
|
289 |
+
st.chat_message("assistant").write(str(analysis))
|
290 |
+
|
291 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
292 |
+
|
293 |
+
if "openai_model" not in st.session_state:
|
294 |
+
st.session_state["openai_model"] = "gpt-4o"
|
295 |
+
|
296 |
+
prompt1 = st.chat_input("Type your additional questions here...")
|
297 |
+
|
298 |
+
# Suggested keywords with enhanced styling
|
299 |
+
suggested_keywords = ["Latest News", "News on remote work", f"Survey sentiment", f"Employee satisfaction", f"How many employees?"]
|
300 |
+
st.markdown("**Suggested Keywords:**")
|
301 |
+
cols = st.columns(len(suggested_keywords))
|
302 |
+
for idx, keyword in enumerate(suggested_keywords):
|
303 |
+
if cols[idx].button(keyword, key=keyword):
|
304 |
+
prompt1 = keyword
|
305 |
+
|
306 |
+
if prompt1:
|
307 |
+
st.session_state.messages.append({"role": "user", "content": prompt1})
|
308 |
+
with open('./data/employee_pulse_survey.txt', 'r') as file:
|
309 |
+
survey_txt = file.read()
|
310 |
+
# Decide if call SQL agent, SURVEY agent or SENTIMENT agent
|
311 |
+
database_columns=pd.read_csv('./data/hr_data.csv').columns
|
312 |
+
response = base_llm.invoke(f"""You are the Supervisor of the company. In your team you have, general conversation analyst, data analyst, survey analyst and news article analyst.
|
313 |
+
If the question {prompt1} can be answered from the history of the conversation:{st.session_state.messages[-10:]} or you can use your knowledge and do not need to call the team members, respond 'history'.
|
314 |
+
If not: decide if the question: '{prompt1}' is about data available in the database, based on the following columns: {database_columns}, it has information about all employees. If yes, respond 'data'.
|
315 |
+
If not: decide if the question is asking about the survey: {survey_txt}. If yes, respond 'survey'.
|
316 |
+
If not: decide if the question is asking about news articles on employment trends or remote work. If yes, respond 'news'.
|
317 |
+
Your response will be either 'history' or 'data' or 'survey' or 'news'.
|
318 |
+
Don't answer anything else.
|
319 |
+
Survey: {survey_txt}""")
|
320 |
+
st.write(response.content)
|
321 |
+
if 'data' in response.content.lower():
|
322 |
+
# SQL AGENT
|
323 |
+
import sql_agent
|
324 |
+
|
325 |
+
response = sql_agent.sql_agent(f'the question is: {prompt1} and the history is: {st.session_state.messages[-10:]}')
|
326 |
+
st.session_state.messages.append({"role": "sql_agent", "content": response})
|
327 |
+
|
328 |
+
elif 'news' in response.content.lower():
|
329 |
+
# SENTIMENT AGENT
|
330 |
+
if date:
|
331 |
+
file_path = f'./data/sentiment_index_hr_index_final_date.md'
|
332 |
+
else:
|
333 |
+
file_path = f'./data/sentiment_index_hr_index_final1.md'
|
334 |
+
|
335 |
+
try:
|
336 |
+
with open(file_path, "r", encoding="utf-8") as file_content:
|
337 |
+
docs = file_content.read()
|
338 |
+
except Exception as e:
|
339 |
+
st.error(f"Error loading context: {e}")
|
340 |
+
docs = ""
|
341 |
+
|
342 |
+
# Display user message in chat message container
|
343 |
+
response = base_llm.invoke(f"""You are a data analyst, the question is: {prompt1}, the conversation history is: {st.session_state.messages[-10:]} and the context is from {docs}""")
|
344 |
+
st.session_state.messages.append({"role": "news_agent", "content": response})
|
345 |
+
# st.chat_message("assistant").write(str(response))
|
346 |
+
|
347 |
+
elif 'survey' in response.content.lower():
|
348 |
+
# SURVEY AGENT
|
349 |
+
with open('./data/employee_pulse_survey.txt', 'r') as file:
|
350 |
+
survey_text = file.read()
|
351 |
+
import survey_agent1
|
352 |
+
response = survey_agent1.analyze_survey_document(survey_text, f'the question is: {prompt1} and the history is: {st.session_state.messages[-10:]}')
|
353 |
+
st.session_state.messages.append({"role": "survey_agent", "content": response})
|
354 |
+
# st.chat_message("assistant").write(str(response))
|
355 |
+
|
356 |
+
# Go back to the MAIN SUPERVISOR
|
357 |
+
# Display user message in chat message container
|
358 |
+
print('History:',st.session_state.messages[-10:])
|
359 |
+
response = base_llm.invoke(f"""You are a supervisor, who collects the answers from the team and give the final answer to the user.
|
360 |
+
Take the last response, 'response', from your team member: SQL agent, SURVEY agent or SENTIMENT agent and give the final answer to the user.
|
361 |
+
The user's question is: {prompt1},
|
362 |
+
the responses from the team are: {st.session_state.messages[-10:]}""")
|
363 |
+
|
364 |
+
st.chat_message("supervisor").write(str(response.content))
|
365 |
+
st.session_state.messages.append({"role": "supervisor", "content": response.content})
|
366 |
+
|
367 |
+
# with st.chat_message("user"):
|
368 |
+
# st.markdown(prompt1)
|
369 |
+
# # Display assistant response in chat message container
|
370 |
+
# with st.chat_message("assistant"):
|
371 |
+
# try:
|
372 |
+
# stream = client.chat.completions.create(
|
373 |
+
# model=st.session_state["openai_model"],
|
374 |
+
# messages=[
|
375 |
+
# {"role": m["role"], "content": m["content"]}
|
376 |
+
# for m in st.session_state.messages[:-10]
|
377 |
+
# ],
|
378 |
+
# stream=True,
|
379 |
+
# )
|
380 |
+
# response = st.write_stream(stream)
|
381 |
+
# st.session_state.messages.append({"role": "supervisor", "content": response})
|
382 |
+
# except Exception as e:
|
383 |
+
# st.error(f"Error generating response: {e}")
|
data/sentiment_index_hr.md
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.37.1
|
2 |
+
cohere==4.37
|
3 |
+
openai<2.0.0
|
4 |
+
tiktoken<1
|
5 |
+
python-dotenv==1.0.0
|
6 |
+
langchain==0.2.5
|
7 |
+
langchain_core==0.2.9
|
8 |
+
langchain_community==0.2.5
|
9 |
+
langchain-text-splitters==0.2.1
|
10 |
+
langchain_openai==0.1.8
|
11 |
+
qdrant-client==1.13.0
|
12 |
+
Markdown==3.7
|
sql_agent.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sqlalchemy import create_engine
|
2 |
+
from langchain_openai import ChatOpenAI
|
3 |
+
from langchain_community.utilities import SQLDatabase
|
4 |
+
from langchain_community.agent_toolkits.sql.base import create_sql_agent # Updated Import
|
5 |
+
import streamlit as st
|
6 |
+
import os
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
def sql_agent(prompt):
|
12 |
+
# Set OpenAI API key
|
13 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
14 |
+
|
15 |
+
DATABASE_URL = "sqlite:///data/main.db" # Change to your database URL
|
16 |
+
engine = create_engine(DATABASE_URL)
|
17 |
+
db = SQLDatabase(engine)
|
18 |
+
|
19 |
+
# Initialize OpenAI model
|
20 |
+
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
|
21 |
+
|
22 |
+
# Create SQL Agent
|
23 |
+
agent_executor = create_sql_agent(
|
24 |
+
llm=llm,
|
25 |
+
db=db,
|
26 |
+
agent_type="openai-tools", # Updated agent type
|
27 |
+
verbose=True
|
28 |
+
)
|
29 |
+
|
30 |
+
# Example query
|
31 |
+
# query = "How many Senior Engineers are in the company?"
|
32 |
+
response = agent_executor.invoke({"input": prompt})['output']
|
33 |
+
|
34 |
+
return response
|
survey_agent1.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import openai
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from langchain_openai import ChatOpenAI
|
6 |
+
from langchain_core.prompts import ChatPromptTemplate
|
7 |
+
from langchain_core.runnables import RunnablePassthrough
|
8 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
9 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
10 |
+
from langchain_community.vectorstores import Qdrant
|
11 |
+
from langchain_core.documents import Document
|
12 |
+
from operator import itemgetter
|
13 |
+
|
14 |
+
|
15 |
+
# Load API key from .env file
|
16 |
+
load_dotenv()
|
17 |
+
base_llm = ChatOpenAI(model="gpt-4o")
|
18 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
19 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
20 |
+
|
21 |
+
|
22 |
+
def analyze_survey_document(survey_text, question):
|
23 |
+
"""Send entire survey document to OpenAI for RAG-based analysis."""
|
24 |
+
prompt = """
|
25 |
+
You are an HR analyst answering questions about employee pulse survey responses. Below is the entire survey with questions and corresponding employee answers.
|
26 |
+
|
27 |
+
**context:**
|
28 |
+
{context}
|
29 |
+
|
30 |
+
**question:**
|
31 |
+
{question}
|
32 |
+
|
33 |
+
Answer all questions based on the context. If you don't know say I don't know.
|
34 |
+
"""
|
35 |
+
rag_prompt = ChatPromptTemplate.from_template(prompt)
|
36 |
+
|
37 |
+
# Token length function
|
38 |
+
|
39 |
+
# Text splitter for chunking per page
|
40 |
+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
41 |
+
chunk_size=500, # Adjust chunk size based on contract structure
|
42 |
+
chunk_overlap=50
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
+
chunks = text_splitter.split_text(survey_text)
|
47 |
+
|
48 |
+
# Initialize Qdrant Vector Store for each page
|
49 |
+
qdrant_vectorstore = Qdrant.from_texts(
|
50 |
+
chunks, # Pass as a Document object
|
51 |
+
embedding_model, # Ensure embedding_model is defined
|
52 |
+
location=":memory:",
|
53 |
+
collection_name="formatted_text"
|
54 |
+
)
|
55 |
+
retriever = qdrant_vectorstore.as_retriever()
|
56 |
+
|
57 |
+
# Define retrieval and response generation chain
|
58 |
+
lcel_rag_chain = (
|
59 |
+
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
|
60 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
61 |
+
| {"response": rag_prompt | base_llm, "context": itemgetter("context")}
|
62 |
+
)
|
63 |
+
response = lcel_rag_chain.invoke({"context": survey_text, "question": question})['response'].content
|
64 |
+
|
65 |
+
return response
|
66 |
+
|
67 |
+
def chat_with_ai(chat_history):
|
68 |
+
"""Chat interface with OpenAI model."""
|
69 |
+
try:
|
70 |
+
response = openai.chat.completions.create(
|
71 |
+
model="gpt-4o",
|
72 |
+
messages=chat_history
|
73 |
+
)
|
74 |
+
return response.choices[0].message.content
|
75 |
+
except Exception as e:
|
76 |
+
return f"Error: {str(e)}"
|
77 |
+
|
78 |
+
def survey_agent(prompt,uploaded_file):
|
79 |
+
st.subheader("📊 Employee Pulse Survey Analysis & AI Chat")
|
80 |
+
|
81 |
+
with open('./data/employee_pulse_survey.txt', 'r') as file:
|
82 |
+
survey_text_base = file.read()
|
83 |
+
|
84 |
+
#uploaded_file = st.file_uploader("📂 Upload Pulse Survey (.txt)", type="txt")
|
85 |
+
if not uploaded_file:
|
86 |
+
uploaded_file1 = survey_text_base
|
87 |
+
|
88 |
+
#check1 = st.button(f"Generate summary")
|
89 |
+
|
90 |
+
#if check1:
|
91 |
+
if uploaded_file or uploaded_file1:
|
92 |
+
st.write("✅ File uploaded successfully! Analyzing responses...")
|
93 |
+
if uploaded_file:
|
94 |
+
survey_text = uploaded_file.read().decode("utf-8").strip()
|
95 |
+
else:
|
96 |
+
survey_text = uploaded_file1
|
97 |
+
|
98 |
+
with st.spinner("🔍 Analyzing entire survey..."):
|
99 |
+
analysis = analyze_survey_document(survey_text)
|
100 |
+
st.session_state["survey_summary"] = analysis
|
101 |
+
st.markdown(analysis)
|
102 |
+
|
103 |
+
# AI Chat with the survey analysis
|
104 |
+
st.header("💬 Chat with AI about the Survey")
|
105 |
+
st.write("Ask questions about the pulse survey insights.")
|
106 |
+
|
107 |
+
if "messages" not in st.session_state:
|
108 |
+
st.session_state["messages"] = [
|
109 |
+
{"role": "system", "content": "You are an HR expert analyzing a pulse survey. Provide insights based on the summary below."},
|
110 |
+
{"role": "user", "content": st.session_state.get("survey_summary", "No survey uploaded yet.")}
|
111 |
+
]
|
112 |
+
|
113 |
+
user_input = st.text_input("🔍 Ask a question about the survey results:")
|
114 |
+
|
115 |
+
if st.button("Ask AI"):
|
116 |
+
if user_input:
|
117 |
+
st.session_state["messages"].append({"role": "user", "content": user_input})
|
118 |
+
with st.spinner("🔍 Thinking..."):
|
119 |
+
ai_response = chat_with_ai(st.session_state["messages"])
|
120 |
+
st.session_state["messages"].append({"role": "assistant", "content": ai_response})
|
121 |
+
st.markdown(f"**AI:** {ai_response}")
|
122 |
+
else:
|
123 |
+
st.warning("⚠️ Please enter a question.")
|
124 |
+
|
125 |
+
return ai_response
|
survey_summary.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import openai
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import pandas as pd
|
6 |
+
# Load API key from .env file
|
7 |
+
load_dotenv()
|
8 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
9 |
+
|
10 |
+
def analyze_survey_document(survey_text):
|
11 |
+
"""Send entire survey document to OpenAI for RAG-based analysis."""
|
12 |
+
prompt = f"""
|
13 |
+
You are an HR analyst evaluating employee pulse survey responses.
|
14 |
+
Below is the entire survey with questions and corresponding employee answers.
|
15 |
+
Also, you have access to LinkedIn posts for your employees Katerina Gawthorpe, Steven Gawthorpe, Deepali Khalkar, and Tyler Robinson. Look at her posts and include them in your analysis about what your employees is up to. Thier role is AI expert, notice if they are personally interested in AI and mention it.
|
16 |
+
|
17 |
+
**Your tasks:**
|
18 |
+
1. **Summarize responses for each question** - Capture overall themes and sentiments.
|
19 |
+
2. **Identify key areas of dissatisfaction** - Highlight the most frequent complaints.
|
20 |
+
3. **Provide recommendations** - Suggest how the company can address these concerns.
|
21 |
+
4. **Include LinkedIn posts** - Look at their posts and include them in your analysis about what your employees are up to. Their roles are data scinetists and AI experts, notice if they are personally interested in AI and mention it.
|
22 |
+
**Survey Responses:**
|
23 |
+
{survey_text}
|
24 |
+
|
25 |
+
**Your analysis should include:**
|
26 |
+
- **Summary for each question**
|
27 |
+
- **Overall themes across all questions**
|
28 |
+
- **Most common complaints**
|
29 |
+
- **Actionable recommendations**
|
30 |
+
"""
|
31 |
+
|
32 |
+
try:
|
33 |
+
response = openai.chat.completions.create(
|
34 |
+
model="gpt-4o",
|
35 |
+
messages=[{"role": "system", "content": "You are an HR analyst summarizing an employee pulse survey."},
|
36 |
+
{"role": "user", "content": prompt}]
|
37 |
+
)
|
38 |
+
return response.choices[0].message.content
|
39 |
+
except Exception as e:
|
40 |
+
return f"Error: {str(e)}"
|
41 |
+
|
42 |
+
def chat_with_ai(chat_history):
|
43 |
+
"""Chat interface with OpenAI model."""
|
44 |
+
try:
|
45 |
+
response = openai.chat.completions.create(
|
46 |
+
model="gpt-4o",
|
47 |
+
messages=chat_history
|
48 |
+
)
|
49 |
+
return response.choices[0].message.content
|
50 |
+
except Exception as e:
|
51 |
+
return f"Error: {str(e)}"
|
52 |
+
|
53 |
+
def survey_agent(prompt,uploaded_file):
|
54 |
+
st.subheader("📊 Employee Pulse Survey Analysis & AI Chat")
|
55 |
+
|
56 |
+
with open('./data/employee_pulse_survey.txt', 'r') as file:
|
57 |
+
survey_text_base = file.read()
|
58 |
+
|
59 |
+
#uploaded_file = st.file_uploader("📂 Upload Pulse Survey (.txt)", type="txt")
|
60 |
+
if not uploaded_file:
|
61 |
+
uploaded_file1 = survey_text_base
|
62 |
+
|
63 |
+
# Get LinkedIn posts
|
64 |
+
df_linkedin=pd.read_csv('./data/linkedin_post_result.csv', usecols=['postTimestamp','postUrl','postContent','author'])
|
65 |
+
df_linkedin=df_linkedin.dropna()
|
66 |
+
df_linkedin_selected=pd.DataFrame()
|
67 |
+
df_linkedin_selected['text']=df_linkedin['postContent'].str.replace('\n', ' ')
|
68 |
+
df_linkedin_selected['author']=df_linkedin['author'].str.replace('\n', ' ')
|
69 |
+
df_linkedin_selected['date']=pd.to_datetime(df_linkedin['postTimestamp']).dt.strftime('%Y-%m-%d')
|
70 |
+
df_linkedin_selected['url']=df_linkedin['postUrl']
|
71 |
+
|
72 |
+
df_linkedin_selected=df_linkedin_selected.values.flatten()
|
73 |
+
docs_text_linkedin = "\n".join([f"- {value}" for value in df_linkedin_selected if not pd.isna(value)])
|
74 |
+
|
75 |
+
uploaded_file1=uploaded_file1+'\n'+'LinkedIn posts:'+'\n'+docs_text_linkedin
|
76 |
+
|
77 |
+
#check1 = st.button(f"Generate summary")
|
78 |
+
|
79 |
+
#if check1:
|
80 |
+
if uploaded_file or uploaded_file1:
|
81 |
+
st.write("✅ File uploaded successfully! Analyzing responses...")
|
82 |
+
if uploaded_file:
|
83 |
+
survey_text = uploaded_file.read().decode("utf-8").strip()
|
84 |
+
survey_text=survey_text+'\n'+'LinkedIn posts:'+'\n'+docs_text_linkedin
|
85 |
+
else:
|
86 |
+
survey_text = uploaded_file1
|
87 |
+
|
88 |
+
with st.spinner("🔍 Analyzing entire survey..."):
|
89 |
+
analysis = analyze_survey_document(survey_text)
|
90 |
+
#st.session_state["survey_summary"] = analysis
|
91 |
+
#st.markdown(analysis)
|
92 |
+
|
93 |
+
return analysis
|
tools/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
tools/__pycache__/sentiment_analysis_util.cpython-311.pyc
ADDED
Binary file (6.07 kB). View file
|
|
tools/sentiment_analysis_util.py
ADDED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from transformers import pipeline
|
5 |
+
import pandas as pd
|
6 |
+
#from langchain_openai import ChatOpenAI
|
7 |
+
import praw
|
8 |
+
from datetime import datetime
|
9 |
+
import numpy as np
|
10 |
+
#from tavily import TavilyClient
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
#TAVILY_API_KEY = os.environ["TAVILY_API_KEY"]
|
14 |
+
|
15 |
+
# def fetch_news(topic):
|
16 |
+
|
17 |
+
# """ Fetches news articles within a specified date range.
|
18 |
+
|
19 |
+
# Args:
|
20 |
+
# - topic (str): Topic of interest
|
21 |
+
|
22 |
+
# Returns:
|
23 |
+
# - list: A list of dictionaries containing news. """
|
24 |
+
|
25 |
+
# load_dotenv()
|
26 |
+
# days_to_fetch_news = os.environ["DAYS_TO_FETCH_NEWS"]
|
27 |
+
|
28 |
+
# googlenews = GoogleNews()
|
29 |
+
# googlenews.set_period(days_to_fetch_news)
|
30 |
+
# googlenews.get_news(topic)
|
31 |
+
# news_json=googlenews.get_texts()
|
32 |
+
# urls=googlenews.get_links()
|
33 |
+
|
34 |
+
# no_of_news_articles_to_fetch = os.environ["NO_OF_NEWS_ARTICLES_TO_FETCH"]
|
35 |
+
# news_article_list = []
|
36 |
+
# counter = 0
|
37 |
+
# for article in news_json:
|
38 |
+
|
39 |
+
# if(counter >= int(no_of_news_articles_to_fetch)):
|
40 |
+
# break
|
41 |
+
|
42 |
+
# relevant_info = {
|
43 |
+
# 'News_Article': article,
|
44 |
+
# 'URL': urls[counter]
|
45 |
+
# }
|
46 |
+
# news_article_list.append(relevant_info)
|
47 |
+
# counter+=1
|
48 |
+
# return news_article_list
|
49 |
+
|
50 |
+
def fetch_tavily_news(topic):
|
51 |
+
""" Fetches news articles.
|
52 |
+
|
53 |
+
Args:
|
54 |
+
- topic (str): Topic of interest
|
55 |
+
|
56 |
+
Returns:
|
57 |
+
- list: A list of dictionaries containing news. """
|
58 |
+
|
59 |
+
# Step 1. Instantiating your TavilyClient
|
60 |
+
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
|
61 |
+
|
62 |
+
#response = tavily_client.search(topic)
|
63 |
+
# Step 2.1. Executing a context search query
|
64 |
+
answer = tavily_client.get_search_context(query=f"Give me news on {topic}")
|
65 |
+
|
66 |
+
line=[]
|
67 |
+
tavily_news=[]
|
68 |
+
|
69 |
+
for i in range(len(answer.split("url")))[1:]:
|
70 |
+
https_link=(answer.split("url")[i].split("\\\\\\")[2]).split('"')[1]
|
71 |
+
topic_answer=answer.split("url")[i].split("\\\\\\")[-3]
|
72 |
+
tavily_news=np.append(tavily_news,{'https':https_link,'topic_answer':topic_answer})
|
73 |
+
|
74 |
+
return tavily_news
|
75 |
+
|
76 |
+
def fetch_reddit_news(topic):
|
77 |
+
load_dotenv()
|
78 |
+
REDDIT_USER_AGENT= os.environ["REDDIT_USER_AGENT"]
|
79 |
+
REDDIT_CLIENT_ID= os.environ["REDDIT_CLIENT_ID"]
|
80 |
+
REDDIT_CLIENT_SECRET= os.environ["REDDIT_CLIENT_SECRET"]
|
81 |
+
#https://medium.com/geekculture/a-complete-guide-to-web-scraping-reddit-with-python-16e292317a52
|
82 |
+
user_agent = REDDIT_USER_AGENT
|
83 |
+
reddit = praw.Reddit (
|
84 |
+
client_id= REDDIT_CLIENT_ID,
|
85 |
+
client_secret= REDDIT_CLIENT_SECRET,
|
86 |
+
user_agent=user_agent
|
87 |
+
)
|
88 |
+
|
89 |
+
headlines = set ( )
|
90 |
+
for submission in reddit.subreddit('nova').search('job',time_filter='day'):
|
91 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
92 |
+
|
93 |
+
for submission in reddit.subreddit('fednews').search('labor',time_filter='day'):
|
94 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
95 |
+
|
96 |
+
for submission in reddit.subreddit('fednews').search('job',time_filter='day'):
|
97 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
98 |
+
|
99 |
+
for submission in reddit.subreddit('fednews').search('employment',time_filter='day'):
|
100 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
101 |
+
|
102 |
+
for submission in reddit.subreddit('fednews').search('layoff',time_filter='day'):
|
103 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
104 |
+
|
105 |
+
|
106 |
+
for submission in reddit.subreddit('washingtondc').search('job',time_filter='day'):
|
107 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
108 |
+
|
109 |
+
#if len(headlines)<10:
|
110 |
+
# for submission in reddit.subreddit('washingtondc').search(topic,time_filter='year'):
|
111 |
+
# headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
112 |
+
#if len(headlines)<10:
|
113 |
+
# for submission in reddit.subreddit('washingtondc').search(topic): #,time_filter='week'):
|
114 |
+
# headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
115 |
+
|
116 |
+
return headlines
|
117 |
+
|
118 |
+
def analyze_sentiment(article):
|
119 |
+
"""
|
120 |
+
Analyzes the sentiment of a given news article.
|
121 |
+
|
122 |
+
Args:
|
123 |
+
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
- dict: A dictionary containing sentiment analysis results.
|
127 |
+
"""
|
128 |
+
|
129 |
+
#Analyze sentiment using default model
|
130 |
+
#classifier = pipeline('sentiment-analysis')
|
131 |
+
|
132 |
+
#Analyze sentiment using specific model
|
133 |
+
classifier = pipeline(model='tabularisai/robust-sentiment-analysis') #mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
|
134 |
+
sentiment_result = classifier(str(article))
|
135 |
+
|
136 |
+
analysis_result = {
|
137 |
+
'News_Article': article,
|
138 |
+
'Sentiment': sentiment_result
|
139 |
+
}
|
140 |
+
|
141 |
+
return analysis_result
|
142 |
+
|
143 |
+
|
144 |
+
# def generate_summary_of_sentiment(sentiment_analysis_results): #, dominant_sentiment):
|
145 |
+
|
146 |
+
|
147 |
+
# news_article_sentiment = str(sentiment_analysis_results)
|
148 |
+
# print("News article sentiment : " + news_article_sentiment)
|
149 |
+
|
150 |
+
|
151 |
+
# OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
152 |
+
# model = ChatOpenAI(
|
153 |
+
# model="gpt-4o",
|
154 |
+
# temperature=0,
|
155 |
+
# max_tokens=None,
|
156 |
+
# timeout=None,
|
157 |
+
# max_retries=2,
|
158 |
+
# api_key=OPENAI_API_KEY, # if you prefer to pass api key in directly instaed of using env vars
|
159 |
+
# # base_url="...",
|
160 |
+
# # organization="...",
|
161 |
+
# # other params...
|
162 |
+
# )
|
163 |
+
|
164 |
+
# messages=[
|
165 |
+
# {"role": "system", "content": "You are a helpful assistant that looks at all news articles with their sentiment, hyperlink and date in front of the article text, the articles MUST be ordered by date!, and generate a summary rationalizing dominant sentiment. At the end of the summary, add URL links with dates for all the articles in the markdown format for streamlit. Make sure the articles as well as the links are ordered descending by Date!!!!!!! Example of adding the URLs: The Check out the links: [link](%s) % url, 2024-03-01. "},
|
166 |
+
# {"role": "user", "content": f"News articles and their sentiments: {news_article_sentiment}"} #, and dominant sentiment is: {dominant_sentiment}"}
|
167 |
+
# ]
|
168 |
+
# response = model.invoke(messages)
|
169 |
+
|
170 |
+
|
171 |
+
# summary = response.content
|
172 |
+
# print ("+++++++++++++++++++++++++++++++++++++++++++++++")
|
173 |
+
# print(summary)
|
174 |
+
# print ("+++++++++++++++++++++++++++++++++++++++++++++++")
|
175 |
+
# return summary
|
176 |
+
|
177 |
+
|
178 |
+
# def plot_sentiment_graph(sentiment_analysis_results):
|
179 |
+
# """
|
180 |
+
# Plots a sentiment analysis graph
|
181 |
+
|
182 |
+
# Args:
|
183 |
+
# - sentiment_analysis_result): (dict): Dictionary containing 'Review Title : Summary', 'Rating', and 'Sentiment' keys.
|
184 |
+
|
185 |
+
# Returns:
|
186 |
+
# - dict: A dictionary containing sentiment analysis results.
|
187 |
+
# """
|
188 |
+
# df = pd.DataFrame(sentiment_analysis_results)
|
189 |
+
# print(df)
|
190 |
+
|
191 |
+
# #Group by Rating, sentiment value count
|
192 |
+
# grouped = df['Sentiment'].value_counts()
|
193 |
+
|
194 |
+
# sentiment_counts = df['Sentiment'].value_counts()
|
195 |
+
|
196 |
+
# # Plotting pie chart
|
197 |
+
# # fig = plt.figure(figsize=(5, 3))
|
198 |
+
# # plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140)
|
199 |
+
# # plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
|
200 |
+
|
201 |
+
# #Open below when u running this program locally and c
|
202 |
+
# #plt.show()
|
203 |
+
|
204 |
+
# return sentiment_counts
|
205 |
+
|
206 |
+
|
207 |
+
# def get_dominant_sentiment (sentiment_analysis_results):
|
208 |
+
# """
|
209 |
+
# Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
|
210 |
+
|
211 |
+
# Args:
|
212 |
+
# - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
213 |
+
|
214 |
+
# Returns:
|
215 |
+
# - dict: A dictionary containing sentiment analysis results.
|
216 |
+
# """
|
217 |
+
# df = pd.DataFrame(sentiment_analysis_results)
|
218 |
+
|
219 |
+
# # Group by the 'sentiment' column and count the occurrences of each sentiment value
|
220 |
+
# print(df)
|
221 |
+
# print(df['Sentiment'])
|
222 |
+
# sentiment_counts = df['Sentiment'].value_counts().reset_index()
|
223 |
+
# sentiment_counts.columns = ['sentiment', 'count']
|
224 |
+
# print(sentiment_counts)
|
225 |
+
|
226 |
+
# # Find the sentiment with the highest count
|
227 |
+
# dominant_sentiment = sentiment_counts.loc[sentiment_counts['count'].idxmax()]
|
228 |
+
|
229 |
+
# return dominant_sentiment['sentiment']
|
230 |
+
|
231 |
+
# #starting point of the program
|
232 |
+
# if __name__ == '__main__':
|
233 |
+
|
234 |
+
# #fetch news
|
235 |
+
# news_articles = fetch_news('AAPL')
|
236 |
+
|
237 |
+
# analysis_results = []
|
238 |
+
|
239 |
+
# #Perform sentiment analysis for each product review
|
240 |
+
# for article in news_articles:
|
241 |
+
# sentiment_analysis_result = analyze_sentiment(article['News_Article'])
|
242 |
+
|
243 |
+
# # Display sentiment analysis results
|
244 |
+
# print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
|
245 |
+
|
246 |
+
# result = {
|
247 |
+
# 'News_Article': sentiment_analysis_result["News_Article"],
|
248 |
+
# 'Sentiment': sentiment_analysis_result["Sentiment"][0]['label']
|
249 |
+
# }
|
250 |
+
|
251 |
+
# analysis_results.append(result)
|
252 |
+
|
253 |
+
|
254 |
+
# #Graph dominant sentiment based on sentiment analysis data of reviews
|
255 |
+
# dominant_sentiment = get_dominant_sentiment(analysis_results)
|
256 |
+
# print(dominant_sentiment)
|
257 |
+
|
258 |
+
# #Plot graph
|
259 |
+
# plot_sentiment_graph(analysis_results)
|
260 |
+
|