Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 20, 2024

Commit

feb6866

1 Parent(s): 33771c2

v.1.17

Browse files

Files changed (1) hide show

app.py +114 -63

app.py CHANGED Viewed

@@ -211,17 +211,52 @@ class EventDetector:
             logger.error(f"Event detection error: {str(e)}")
             return "Нет", f"Error: {str(e)}"
-    @spaces.GPU(duration=20)  # Reduced duration
     def analyze_sentiment(self, text):
         try:
             current_time = time.time()
             if current_time - self.last_gpu_use < 2:
                 time.sleep(2)
-            # Rest of the method remains the same...
-            self.last_gpu_use = time.time()
-            return sentiment_result
         except Exception as e:
             logger.error(f"Sentiment analysis error: {e}")
@@ -270,67 +305,83 @@ def process_file(file_obj):
         processed_rows = []
         total = len(df)
-        # Process in smaller batches
-        BATCH_SIZE = 5
         for batch_start in range(0, total, BATCH_SIZE):
-            if control.should_stop():
-                break
-            batch_end = min(batch_start + BATCH_SIZE, total)
-            batch = df.iloc[batch_start:batch_end]
-            # Initialize models for this batch
-            detector.initialize_models()
-            for idx, row in batch.iterrows():
-                try:
-                    text = str(row.get('Выдержки из текста', ''))
-                    if not text.strip():
-                        continue
-                    entity = str(row.get('Объект', ''))
-                    if not entity.strip():
                         continue
-                    # Process event detection with GPU
-                    event_type, event_summary = detector.detect_events(text, entity)
-                    # Small delay to avoid quota issues
-                    time.sleep(0.5)
-                    # Process sentiment analysis with GPU
-                    sentiment = detector.analyze_sentiment(text)
-                    # Small delay after GPU operations
-                    time.sleep(0.5)
-                    processed_rows.append({
-                        'Объект': entity,
-                        'Заголовок': str(row.get('Заголовок', '')),
-                        'Sentiment': sentiment,
-                        'Event_Type': event_type,
-                        'Event_Summary': event_summary,
-                        'Текст': text[:1000]
-                    })
-                    logger.info(f"Processed {idx + 1}/{total} rows")
-                except Exception as e:
-                    logger.error(f"Error processing row {idx}: {str(e)}")
-                    if "GPU quota" in str(e):
-                        # Wait longer if we hit quota limits
-                        time.sleep(5)
-                    continue
-            # Release GPU resources after each batch
-            torch.cuda.empty_cache()
-            # Wait between batches
-            time.sleep(2)
-            # Create intermediate results
-            if processed_rows:
-                result_df = pd.DataFrame(processed_rows)
-                yield result_df, None, None, f"Обработано {len(processed_rows)}/{total} строк"
         # Final results
         if processed_rows:
@@ -348,7 +399,7 @@ def create_interface():
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.16")
         with gr.Row():
             file_input = gr.File(

             logger.error(f"Event detection error: {str(e)}")
             return "Нет", f"Error: {str(e)}"
+    @spaces.GPU(duration=20)
     def analyze_sentiment(self, text):
         try:
+            if not self.initialized:
+                if not self.initialize_models():
+                    return "Neutral"
             current_time = time.time()
             if current_time - self.last_gpu_use < 2:
                 time.sleep(2)
+            truncated_text = text[:500]
+            results = []
+            try:
+                inputs = [truncated_text]
+                sentiment_results = []
+                # Process each model separately with delay
+                if self.finbert:
+                    finbert_result = self.finbert(inputs, truncation=True, max_length=512)[0]
+                    results.append(self.get_sentiment_label(finbert_result))
+                    time.sleep(0.5)
+                if self.roberta:
+                    roberta_result = self.roberta(inputs, truncation=True, max_length=512)[0]
+                    results.append(self.get_sentiment_label(roberta_result))
+                    time.sleep(0.5)
+                if self.finbert_tone:
+                    finbert_tone_result = self.finbert_tone(inputs, truncation=True, max_length=512)[0]
+                    results.append(self.get_sentiment_label(finbert_tone_result))
+                # Get majority vote
+                if results:
+                    sentiment_counts = pd.Series(results).value_counts()
+                    final_sentiment = sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
+                else:
+                    final_sentiment = "Neutral"
+                self.last_gpu_use = time.time()
+                return final_sentiment
+            except Exception as e:
+                logger.error(f"Model inference error: {e}")
+                return "Neutral"
         except Exception as e:
             logger.error(f"Sentiment analysis error: {e}")
         processed_rows = []
         total = len(df)
+        # Process in smaller batches with quota management
+        BATCH_SIZE = 3  # Reduced batch size
+        QUOTA_WAIT_TIME = 60  # Wait time when quota is exceeded
         for batch_start in range(0, total, BATCH_SIZE):
+            try:
+                batch_end = min(batch_start + BATCH_SIZE, total)
+                batch = df.iloc[batch_start:batch_end]
+                # Initialize models for batch
+                if not detector.initialized:
+                    detector.initialize_models()
+                    time.sleep(1)  # Wait after initialization
+                for idx, row in batch.iterrows():
+                    try:
+                        text = str(row.get('Выдержки из текста', ''))
+                        if not text.strip():
+                            continue
+                        entity = str(row.get('Объект', ''))
+                        if not entity.strip():
+                            continue
+                        # Process with GPU quota management
+                        event_type = "Нет"
+                        event_summary = ""
+                        sentiment = "Neutral"
+                        try:
+                            event_type, event_summary = detector.detect_events(text, entity)
+                            time.sleep(1)  # Wait between GPU operations
+                            sentiment = detector.analyze_sentiment(text)
+                        except Exception as e:
+                            if "GPU quota" in str(e):
+                                logger.warning("GPU quota exceeded, waiting...")
+                                time.sleep(QUOTA_WAIT_TIME)
+                                continue
+                            else:
+                                raise e
+                        processed_rows.append({
+                            'Объект': entity,
+                            'Заголовок': str(row.get('Заголовок', '')),
+                            'Sentiment': sentiment,
+                            'Event_Type': event_type,
+                            'Event_Summary': event_summary,
+                            'Текст': text[:1000]
+                        })
+                        logger.info(f"Processed {idx + 1}/{total} rows")
+                    except Exception as e:
+                        logger.error(f"Error processing row {idx}: {str(e)}")
                         continue
+                # Create intermediate results
+                if processed_rows:
+                    intermediate_df = pd.DataFrame(processed_rows)
+                    yield (
+                        intermediate_df,
+                        None,
+                        None,
+                        f"Обработано {len(processed_rows)}/{total} строк"
+                    )
+                # Wait between batches
+                time.sleep(2)
+                # Cleanup GPU resources after each batch
+                torch.cuda.empty_cache()
+            except Exception as e:
+                logger.error(f"Batch processing error: {str(e)}")
+                if "GPU quota" in str(e):
+                    time.sleep(QUOTA_WAIT_TIME)
+                continue
         # Final results
         if processed_rows:
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# AI-анализ мониторинга новостей v.1.17")
         with gr.Row():
             file_input = gr.File(