Spaces:
Sleeping
Sleeping
import gradio as gr | |
import spaces | |
import pandas as pd | |
import torch | |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer | |
from transformers import AutoModelForCausalLM | |
import time | |
import plotly.graph_objects as go | |
from datetime import datetime | |
from deep_translator import GoogleTranslator | |
from googletrans import Translator as LegacyTranslator | |
import io | |
from openpyxl import load_workbook | |
from openpyxl.utils.dataframe import dataframe_to_rows | |
class EventDetector: | |
def __init__(self): | |
self.model_name = "google/mt5-small" | |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name) | |
self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
self.model = self.model.to(self.device) | |
# Initialize sentiment analyzers | |
self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device) | |
self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device) | |
self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device) | |
def detect_events(self, text, entity): | |
if not text or not entity: | |
return "Нет", "Invalid input" | |
try: | |
prompt = f"""<s>Analyze the following news about {entity}: | |
Text: {text} | |
Task: Identify the main event type and provide a brief summary.</s>""" | |
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, | |
truncation=True, max_length=512).to(self.device) | |
outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1) | |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Event type classification logic | |
event_type = "Нет" | |
if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']): | |
event_type = "Отчетность" | |
elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']): | |
event_type = "РЦБ" | |
elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']): | |
event_type = "Суд" | |
return event_type, response | |
except Exception as e: | |
return "Нет", f"Error: {str(e)}" | |
def analyze_sentiment(self, text): | |
try: | |
results = [] | |
results.append(self._get_sentiment(self.finbert(text)[0])) | |
results.append(self._get_sentiment(self.roberta(text)[0])) | |
results.append(self._get_sentiment(self.finbert_tone(text)[0])) | |
# Return majority sentiment | |
sentiment_counts = pd.Series(results).value_counts() | |
return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral" | |
except Exception as e: | |
return "Neutral" | |
def _get_sentiment(self, result): | |
label = result['label'].lower() | |
if label in ["positive", "label_2", "pos"]: | |
return "Positive" | |
elif label in ["negative", "label_0", "neg"]: | |
return "Negative" | |
return "Neutral" | |
def process_file(file): | |
try: | |
df = pd.read_excel(file.name) | |
detector = EventDetector() | |
processed_rows = [] | |
for _, row in df.iterrows(): | |
text = str(row.get('Выдержки из текста', '')) | |
entity = str(row.get('Объект', '')) | |
event_type, event_summary = detector.detect_events(text, entity) | |
sentiment = detector.analyze_sentiment(text) | |
processed_rows.append({ | |
'Объект': entity, | |
'Заголовок': str(row.get('Заголовок', '')), | |
'Sentiment': sentiment, | |
'Event_Type': event_type, | |
'Event_Summary': event_summary, | |
'Текст': text | |
}) | |
return pd.DataFrame(processed_rows) | |
except Exception as e: | |
# Return empty DataFrame instead of string | |
return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст']) | |
def analyze(file): | |
if file is None: | |
return None, None, None | |
df = process_file(file) | |
if df.empty: | |
return df, None, None | |
try: | |
fig_sentiment, fig_events = create_visualizations(df) | |
return df, fig_sentiment, fig_events | |
except Exception as e: | |
return df, None, None | |
def create_visualizations(df): | |
if df is None or df.empty: | |
return None, None | |
# Create sentiment distribution plot | |
sentiments = df['Sentiment'].value_counts() | |
fig_sentiment = go.Figure(data=[go.Pie( | |
labels=sentiments.index, | |
values=sentiments.values, | |
marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6'] | |
)]) | |
# Create events distribution plot | |
events = df['Event_Type'].value_counts() | |
fig_events = go.Figure(data=[go.Bar( | |
x=events.index, | |
y=events.values, | |
marker_color='#2196F3' | |
)]) | |
return fig_sentiment, fig_events | |
def create_interface(): | |
with gr.Blocks() as app: | |
gr.Markdown("# AI-анализ мониторинга новостей") | |
with gr.Row(): | |
file_input = gr.File(label="Загрузите Excel файл") | |
with gr.Row(): | |
analyze_btn = gr.Button("Начать анализ") | |
with gr.Row(): | |
with gr.Column(): | |
stats = gr.DataFrame(label="Результаты анализа") | |
with gr.Row(): | |
with gr.Column(): | |
sentiment_plot = gr.Plot(label="Распределение тональности") | |
with gr.Column(): | |
events_plot = gr.Plot(label="Распределение событий") | |
def analyze(file): | |
if file is None: | |
return None, None, None | |
df = process_file(file) | |
fig_sentiment, fig_events = create_visualizations(df) | |
return df, fig_sentiment, fig_events | |
analyze_btn.click( | |
analyze, | |
inputs=[file_input], | |
outputs=[stats, sentiment_plot, events_plot] | |
) | |
return app | |
if __name__ == "__main__": | |
app = create_interface() | |
app.launch() |