import gradio as gr import spaces import pandas as pd import torch from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer from transformers import AutoModelForCausalLM import time import plotly.graph_objects as go from datetime import datetime from deep_translator import GoogleTranslator from googletrans import Translator as LegacyTranslator import io from openpyxl import load_workbook from openpyxl.utils.dataframe import dataframe_to_rows class EventDetector: def __init__(self): self.model_name = "google/mt5-small" self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name) self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model = self.model.to(self.device) # Initialize sentiment analyzers self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device) self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device) self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device) @spaces.GPU(duration=120) def detect_events(self, text, entity): if not text or not entity: return "Нет", "Invalid input" try: prompt = f"""Analyze the following news about {entity}: Text: {text} Task: Identify the main event type and provide a brief summary.""" inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device) outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Event type classification logic event_type = "Нет" if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']): event_type = "Отчетность" elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']): event_type = "РЦБ" elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']): event_type = "Суд" return event_type, response except Exception as e: return "Нет", f"Error: {str(e)}" @spaces.GPU(duration=60) def analyze_sentiment(self, text): try: results = [] results.append(self._get_sentiment(self.finbert(text)[0])) results.append(self._get_sentiment(self.roberta(text)[0])) results.append(self._get_sentiment(self.finbert_tone(text)[0])) # Return majority sentiment sentiment_counts = pd.Series(results).value_counts() return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral" except Exception as e: return "Neutral" def _get_sentiment(self, result): label = result['label'].lower() if label in ["positive", "label_2", "pos"]: return "Positive" elif label in ["negative", "label_0", "neg"]: return "Negative" return "Neutral" def process_file(file): try: df = pd.read_excel(file.name) detector = EventDetector() processed_rows = [] for _, row in df.iterrows(): text = str(row.get('Выдержки из текста', '')) entity = str(row.get('Объект', '')) event_type, event_summary = detector.detect_events(text, entity) sentiment = detector.analyze_sentiment(text) processed_rows.append({ 'Объект': entity, 'Заголовок': str(row.get('Заголовок', '')), 'Sentiment': sentiment, 'Event_Type': event_type, 'Event_Summary': event_summary, 'Текст': text }) return pd.DataFrame(processed_rows) except Exception as e: # Return empty DataFrame instead of string return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст']) def analyze(file): if file is None: return None, None, None df = process_file(file) if df.empty: return df, None, None try: fig_sentiment, fig_events = create_visualizations(df) return df, fig_sentiment, fig_events except Exception as e: return df, None, None def create_visualizations(df): if df is None or df.empty: return None, None # Create sentiment distribution plot sentiments = df['Sentiment'].value_counts() fig_sentiment = go.Figure(data=[go.Pie( labels=sentiments.index, values=sentiments.values, marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6'] )]) # Create events distribution plot events = df['Event_Type'].value_counts() fig_events = go.Figure(data=[go.Bar( x=events.index, y=events.values, marker_color='#2196F3' )]) return fig_sentiment, fig_events def create_interface(): with gr.Blocks() as app: gr.Markdown("# AI-анализ мониторинга новостей") with gr.Row(): file_input = gr.File(label="Загрузите Excel файл") with gr.Row(): analyze_btn = gr.Button("Начать анализ") with gr.Row(): with gr.Column(): stats = gr.DataFrame(label="Результаты анализа") with gr.Row(): with gr.Column(): sentiment_plot = gr.Plot(label="Распределение тональности") with gr.Column(): events_plot = gr.Plot(label="Распределение событий") def analyze(file): if file is None: return None, None, None df = process_file(file) fig_sentiment, fig_events = create_visualizations(df) return df, fig_sentiment, fig_events analyze_btn.click( analyze, inputs=[file_input], outputs=[stats, sentiment_plot, events_plot] ) return app if __name__ == "__main__": app = create_interface() app.launch()