Spaces:

pentarosarium
/

gprocess

Sleeping

File size: 6,860 Bytes

import gradio as gr
import spaces
import pandas as pd
import torch
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import AutoModelForCausalLM
import time
import plotly.graph_objects as go
from datetime import datetime
from deep_translator import GoogleTranslator
from googletrans import Translator as LegacyTranslator
import io
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows

class EventDetector:
    def __init__(self):
        self.model_name = "google/mt5-small"
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = self.model.to(self.device)
        
        # Initialize sentiment analyzers
        self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
        self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
        self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)

    @spaces.GPU(duration=120)
    def detect_events(self, text, entity):
        if not text or not entity:
            return "Нет", "Invalid input"
            
        try:
            prompt = f"""<s>Analyze the following news about {entity}:
            Text: {text}
            Task: Identify the main event type and provide a brief summary.</s>"""
            
            inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, 
                                  truncation=True, max_length=512).to(self.device)
            
            outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Event type classification logic
            event_type = "Нет"
            if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
                event_type = "Отчетность"
            elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']):
                event_type = "РЦБ"
            elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
                event_type = "Суд"
                
            return event_type, response
            
        except Exception as e:
            return "Нет", f"Error: {str(e)}"

    @spaces.GPU(duration=60)
    def analyze_sentiment(self, text):
        try:
            results = []
            results.append(self._get_sentiment(self.finbert(text)[0]))
            results.append(self._get_sentiment(self.roberta(text)[0]))
            results.append(self._get_sentiment(self.finbert_tone(text)[0]))
            
            # Return majority sentiment
            sentiment_counts = pd.Series(results).value_counts()
            return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
            
        except Exception as e:
            return "Neutral"

    def _get_sentiment(self, result):
        label = result['label'].lower()
        if label in ["positive", "label_2", "pos"]:
            return "Positive"
        elif label in ["negative", "label_0", "neg"]:
            return "Negative"
        return "Neutral"

def process_file(file):
    try:
        df = pd.read_excel(file.name)
        detector = EventDetector()
        processed_rows = []
        
        for _, row in df.iterrows():
            text = str(row.get('Выдержки из текста', ''))
            entity = str(row.get('Объект', ''))
            
            event_type, event_summary = detector.detect_events(text, entity)
            sentiment = detector.analyze_sentiment(text)
            
            processed_rows.append({
                'Объект': entity,
                'Заголовок': str(row.get('Заголовок', '')),
                'Sentiment': sentiment,
                'Event_Type': event_type,
                'Event_Summary': event_summary,
                'Текст': text
            })
            
        return pd.DataFrame(processed_rows)
        
    except Exception as e:
        # Return empty DataFrame instead of string
        return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст'])

def analyze(file):
    if file is None:
        return None, None, None
        
    df = process_file(file)
    if df.empty:
        return df, None, None
        
    try:
        fig_sentiment, fig_events = create_visualizations(df)
        return df, fig_sentiment, fig_events
    except Exception as e:
        return df, None, None

def create_visualizations(df):
    if df is None or df.empty:
        return None, None
        
    # Create sentiment distribution plot
    sentiments = df['Sentiment'].value_counts()
    fig_sentiment = go.Figure(data=[go.Pie(
        labels=sentiments.index,
        values=sentiments.values,
        marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6']
    )])
    
    # Create events distribution plot  
    events = df['Event_Type'].value_counts()
    fig_events = go.Figure(data=[go.Bar(
        x=events.index,
        y=events.values,
        marker_color='#2196F3'
    )])
    
    return fig_sentiment, fig_events

def create_interface():
    with gr.Blocks() as app:
        gr.Markdown("# AI-анализ мониторинга новостей")
        
        with gr.Row():
            file_input = gr.File(label="Загрузите Excel файл")
        
        with gr.Row():
            analyze_btn = gr.Button("Начать анализ")
        
        with gr.Row():
            with gr.Column():
                stats = gr.DataFrame(label="Результаты анализа")
            
        with gr.Row():
            with gr.Column():
                sentiment_plot = gr.Plot(label="Распределение тональности")
            with gr.Column():
                events_plot = gr.Plot(label="Распределение событий")
                
        def analyze(file):
            if file is None:
                return None, None, None
                
            df = process_file(file)
            fig_sentiment, fig_events = create_visualizations(df)
            
            return df, fig_sentiment, fig_events
            
        analyze_btn.click(
            analyze,
            inputs=[file_input],
            outputs=[stats, sentiment_plot, events_plot]
        )
        
    return app

if __name__ == "__main__":
    app = create_interface()
    app.launch()