Spaces:
Sleeping
Sleeping
Commit
·
4ff6c1a
1
Parent(s):
1f0f3cb
progress more (3.1)
Browse files- app.py +83 -32
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -11,6 +11,14 @@ from langchain_community.chat_models import ChatOpenAI
|
|
| 11 |
from langchain.prompts import PromptTemplate
|
| 12 |
from langchain_core.runnables import RunnablePassthrough
|
| 13 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Initialize sentiment analyzers
|
| 16 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
|
@@ -261,25 +269,62 @@ def create_analysis_data(df):
|
|
| 261 |
def create_output_file(df, uploaded_file):
|
| 262 |
wb = load_workbook("sample_file.xlsx")
|
| 263 |
|
| 264 |
-
#
|
| 265 |
-
|
| 266 |
'Объект': df['Объект'].unique(),
|
| 267 |
-
'Всего
|
| 268 |
'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size().fillna(0).astype(int),
|
| 269 |
-
'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size().fillna(0).astype(int)
|
| 270 |
-
|
| 271 |
-
lambda x: x.value_counts().index[0] if len(x) > 0 else 'Неопределенный'
|
| 272 |
-
)
|
| 273 |
-
})
|
| 274 |
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
-
#
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
# Update '
|
| 281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
if 'Тех.приложение' not in wb.sheetnames:
|
| 284 |
wb.create_sheet('Тех.приложение')
|
| 285 |
ws = wb['Тех.приложение']
|
|
@@ -293,25 +338,27 @@ def create_output_file(df, uploaded_file):
|
|
| 293 |
return output
|
| 294 |
|
| 295 |
def main():
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
|
|
|
|
|
|
| 315 |
|
| 316 |
if 'processed_df' not in st.session_state:
|
| 317 |
st.session_state.processed_df = None
|
|
@@ -338,6 +385,10 @@ def main():
|
|
| 338 |
formatted_time = format_elapsed_time(elapsed_time)
|
| 339 |
st.success(f"Обработка и анализ завершены за {formatted_time}.")
|
| 340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
st.download_button(
|
| 342 |
label="Скачать результат анализа",
|
| 343 |
data=output,
|
|
|
|
| 11 |
from langchain.prompts import PromptTemplate
|
| 12 |
from langchain_core.runnables import RunnablePassthrough
|
| 13 |
from transformers import pipeline
|
| 14 |
+
from reportlab.lib import colors
|
| 15 |
+
from reportlab.lib.pagesizes import letter
|
| 16 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
| 17 |
+
from reportlab.lib.styles import getSampleStyleSheet
|
| 18 |
+
from io import StringIO
|
| 19 |
+
import contextlib
|
| 20 |
+
|
| 21 |
+
|
| 22 |
|
| 23 |
# Initialize sentiment analyzers
|
| 24 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
|
|
|
| 269 |
def create_output_file(df, uploaded_file):
|
| 270 |
wb = load_workbook("sample_file.xlsx")
|
| 271 |
|
| 272 |
+
# Sort entities by number of negative publications
|
| 273 |
+
entity_stats = pd.DataFrame({
|
| 274 |
'Объект': df['Объект'].unique(),
|
| 275 |
+
'Всего': df.groupby('Объект').size(),
|
| 276 |
'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size().fillna(0).astype(int),
|
| 277 |
+
'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size().fillna(0).astype(int)
|
| 278 |
+
}).sort_values('Негативные', ascending=False)
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
+
# Calculate most negative impact for each entity
|
| 281 |
+
entity_impacts = {}
|
| 282 |
+
for entity in df['Объект'].unique():
|
| 283 |
+
entity_df = df[df['Объект'] == entity]
|
| 284 |
+
negative_impacts = entity_df[entity_df['Sentiment'] == 'Negative']['Impact']
|
| 285 |
+
entity_impacts[entity] = negative_impacts.iloc[0] if len(negative_impacts) > 0 else 'Неопределенный эффект'
|
| 286 |
|
| 287 |
+
# Update 'Сводка' sheet
|
| 288 |
+
ws = wb['Сводка']
|
| 289 |
+
for idx, (entity, row) in enumerate(entity_stats.iterrows(), start=4):
|
| 290 |
+
ws.cell(row=idx, column=5, value=entity) # Column E
|
| 291 |
+
ws.cell(row=idx, column=6, value=row['Всего']) # Column F
|
| 292 |
+
ws.cell(row=idx, column=7, value=row['Негативные']) # Column G
|
| 293 |
+
ws.cell(row=idx, column=8, value=row['Позитивные']) # Column H
|
| 294 |
+
ws.cell(row=idx, column=9, value=entity_impacts[entity]) # Column I
|
| 295 |
|
| 296 |
+
# Update 'Значимые' sheet
|
| 297 |
+
ws = wb['Значимые']
|
| 298 |
+
row_idx = 3
|
| 299 |
+
for _, row in df.iterrows():
|
| 300 |
+
if row['Sentiment'] in ['Negative', 'Positive']:
|
| 301 |
+
ws.cell(row=row_idx, column=3, value=row['Объект']) # Column C
|
| 302 |
+
ws.cell(row=row_idx, column=4, value='релевантно') # Column D
|
| 303 |
+
ws.cell(row=row_idx, column=5, value=row['Sentiment']) # Column E
|
| 304 |
+
ws.cell(row=row_idx, column=6, value=row['Impact']) # Column F
|
| 305 |
+
ws.cell(row=row_idx, column=7, value=row['Заголовок']) # Column G
|
| 306 |
+
ws.cell(row=row_idx, column=8, value=row['Выдержки из текста']) # Column H
|
| 307 |
+
row_idx += 1
|
| 308 |
+
|
| 309 |
+
# Copy 'Публикации' sheet
|
| 310 |
+
original_df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
| 311 |
+
ws = wb['Публикации']
|
| 312 |
+
for r_idx, row in enumerate(dataframe_to_rows(original_df, index=False, header=True), start=1):
|
| 313 |
+
for c_idx, value in enumerate(row, start=1):
|
| 314 |
+
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 315 |
|
| 316 |
+
# Update 'Анализ' sheet
|
| 317 |
+
ws = wb['Анализ']
|
| 318 |
+
row_idx = 4
|
| 319 |
+
for _, row in df[df['Sentiment'] == 'Negative'].iterrows():
|
| 320 |
+
ws.cell(row=row_idx, column=5, value=row['Объект']) # Column E
|
| 321 |
+
ws.cell(row=row_idx, column=6, value=row['Заголовок']) # Column F
|
| 322 |
+
ws.cell(row=row_idx, column=7, value="Риск убытка") # Column G
|
| 323 |
+
ws.cell(row=row_idx, column=9, value=row['Выдержки из текста']) # Column I
|
| 324 |
+
row_idx += 1
|
| 325 |
+
|
| 326 |
+
# Update 'Тех.приложение' sheet
|
| 327 |
+
tech_df = df[['Объект', 'Заголовок', 'Выдержки из текста', 'Translated', 'Sentiment', 'Impact', 'Reasoning']]
|
| 328 |
if 'Тех.приложение' not in wb.sheetnames:
|
| 329 |
wb.create_sheet('Тех.приложение')
|
| 330 |
ws = wb['Тех.приложение']
|
|
|
|
| 338 |
return output
|
| 339 |
|
| 340 |
def main():
|
| 341 |
+
# Capture all output for PDF
|
| 342 |
+
with capture_streamlit_output() as output:
|
| 343 |
+
st.markdown(
|
| 344 |
+
"""
|
| 345 |
+
<style>
|
| 346 |
+
.signature {
|
| 347 |
+
position: fixed;
|
| 348 |
+
right: 12px;
|
| 349 |
+
bottom: 12px;
|
| 350 |
+
font-size: 14px;
|
| 351 |
+
color: #FF0000;
|
| 352 |
+
opacity: 0.9;
|
| 353 |
+
z-index: 999;
|
| 354 |
+
}
|
| 355 |
+
</style>
|
| 356 |
+
<div class="signature">denis.pokrovsky.npff</div>
|
| 357 |
+
""",
|
| 358 |
+
unsafe_allow_html=True
|
| 359 |
+
)
|
| 360 |
+
|
| 361 |
+
st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.1):::")
|
| 362 |
|
| 363 |
if 'processed_df' not in st.session_state:
|
| 364 |
st.session_state.processed_df = None
|
|
|
|
| 385 |
formatted_time = format_elapsed_time(elapsed_time)
|
| 386 |
st.success(f"Обработка и анализ завершены за {formatted_time}.")
|
| 387 |
|
| 388 |
+
if st.session_state.processed_df is not None:
|
| 389 |
+
save_to_pdf(output) # Save the captured output to PDF
|
| 390 |
+
|
| 391 |
+
|
| 392 |
st.download_button(
|
| 393 |
label="Скачать результат анализа",
|
| 394 |
data=output,
|
requirements.txt
CHANGED
|
@@ -16,3 +16,4 @@ huggingface_hub
|
|
| 16 |
accelerate>=0.26.0
|
| 17 |
openai
|
| 18 |
wordcloud
|
|
|
|
|
|
| 16 |
accelerate>=0.26.0
|
| 17 |
openai
|
| 18 |
wordcloud
|
| 19 |
+
reportlab==4.1.0
|