pentarosarium commited on
Commit
7b3ce5a
·
1 Parent(s): 051d547
Files changed (1) hide show
  1. app.py +114 -114
app.py CHANGED
@@ -645,7 +645,7 @@ def create_interface():
645
  control = ProcessControl()
646
 
647
  with gr.Blocks(theme=gr.themes.Soft()) as app:
648
- gr.Markdown("# AI-анализ мониторинга новостей v.1.33")
649
 
650
  with gr.Row():
651
  file_input = gr.File(
@@ -704,130 +704,130 @@ def create_interface():
704
  control.request_stop()
705
  return "Остановка обработки..."
706
 
707
- @spaces.GPU(duration=300)
708
- def analyze(file_bytes):
709
- if file_bytes is None:
710
- gr.Warning("Пожалуйста, загрузите файл")
711
- return None, None, None, False, None, "Ожидание файла...", ""
712
-
713
- try:
714
- # Reset stop flag
715
- control.reset()
716
-
717
- file_obj = io.BytesIO(file_bytes)
718
- logger.info("File loaded into BytesIO successfully")
719
-
720
- detector = EventDetector()
721
-
722
- # Read and deduplicate data
723
- df = pd.read_excel(file_obj, sheet_name='Публикации')
724
- original_count = len(df)
725
- df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
726
- removed_count = original_count - len(df)
727
- dedup_message = f"Удалено {removed_count} дубликатов из {original_count} записей"
728
- logger.info(f"Removed {removed_count} duplicate entries")
729
-
730
- processed_rows = []
731
- total = len(df)
732
- batch_size = 3
733
-
734
- for batch_start in range(0, total, batch_size):
735
- if control.should_stop():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
736
  if processed_rows:
737
- result_df = pd.DataFrame(processed_rows)
738
- output_bytes_io = create_output_file(result_df, file_obj)
739
  if output_bytes_io:
740
  output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
741
- fig_sentiment, fig_events = create_visualizations(result_df)
742
  return (
743
- result_df,
744
  fig_sentiment,
745
  fig_events,
746
  True,
747
  output_bytes, # Return bytes instead of BytesIO
748
- f"Обработка остановлена. Обработано {len(processed_rows)}/{total} строк",
749
  dedup_message
750
  )
751
- break
752
-
753
- batch_end = min(batch_start + batch_size, total)
754
- batch = df.iloc[batch_start:batch_end]
755
-
756
- for idx, row in batch.iterrows():
757
- try:
758
- text = str(row.get('Выдержки из текста', '')).strip()
759
- entity = str(row.get('Объект', '')).strip()
760
-
761
- if not text or not entity:
762
- continue
763
-
764
- # Process with GPU
765
- results = detector.process_text(text, entity)
766
-
767
- processed_rows.append({
768
- 'Объект': entity,
769
- 'Заголовок': str(row.get('Заголовок', '')),
770
- 'Translated': results['translated_text'],
771
- 'Sentiment': results['sentiment'],
772
- 'Impact': results['impact'],
773
- 'Reasoning': results['reasoning'],
774
- 'Event_Type': results['event_type'],
775
- 'Event_Summary': results['event_summary'],
776
- 'Выдержки из текста': text[:1000]
777
- })
778
 
779
- except Exception as e:
780
- logger.error(f"Error processing row {idx}: {str(e)}")
781
- continue
782
-
783
- # Create intermediate results
784
- if processed_rows:
785
- result_df = pd.DataFrame(processed_rows)
786
- output_bytes_io = create_output_file(result_df, file_obj)
787
- if output_bytes_io:
788
- output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
789
- fig_sentiment, fig_events = create_visualizations(result_df)
790
- yield (
791
- result_df,
792
- fig_sentiment,
793
- fig_events,
794
- True,
795
- output_bytes, # Return bytes instead of BytesIO
796
- f"Обработано {len(processed_rows)}/{total} строк",
797
- dedup_message
798
- )
799
-
800
- # Cleanup GPU resources after batch
801
- torch.cuda.empty_cache()
802
- time.sleep(2)
803
-
804
- # Create final results
805
- if processed_rows:
806
- final_df = pd.DataFrame(processed_rows)
807
- output_bytes_io = create_output_file(final_df, file_obj)
808
- if output_bytes_io:
809
- output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
810
- fig_sentiment, fig_events = create_visualizations(final_df)
811
- return (
812
- final_df,
813
- fig_sentiment,
814
- fig_events,
815
- True,
816
- output_bytes, # Return bytes instead of BytesIO
817
- "Обработка завершена!",
818
- dedup_message
819
- )
820
- else:
821
- return None, None, None, False, None, "Нет обработанных данных", ""
822
-
823
- except Exception as e:
824
- error_msg = f"Ошибка анализа: {str(e)}"
825
- logger.error(error_msg)
826
- gr.Error(error_msg)
827
- return None, None, None, False, None, error_msg, ""
828
- finally:
829
- if detector:
830
- detector.cleanup()
831
 
832
  def trigger_download(show_button, file_content):
833
  """Handle download button visibility and file content"""
 
645
  control = ProcessControl()
646
 
647
  with gr.Blocks(theme=gr.themes.Soft()) as app:
648
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.34")
649
 
650
  with gr.Row():
651
  file_input = gr.File(
 
704
  control.request_stop()
705
  return "Остановка обработки..."
706
 
707
+ @spaces.GPU(duration=300)
708
+ def analyze(file_bytes):
709
+ if file_bytes is None:
710
+ gr.Warning("Пожалуйста, загрузите файл")
711
+ return None, None, None, False, None, "Ожидание файла...", ""
712
+
713
+ try:
714
+ # Reset stop flag
715
+ control.reset()
716
+
717
+ file_obj = io.BytesIO(file_bytes)
718
+ logger.info("File loaded into BytesIO successfully")
719
+
720
+ detector = EventDetector()
721
+
722
+ # Read and deduplicate data
723
+ df = pd.read_excel(file_obj, sheet_name='Публикации')
724
+ original_count = len(df)
725
+ df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
726
+ removed_count = original_count - len(df)
727
+ dedup_message = f"Удалено {removed_count} дубликатов из {original_count} записей"
728
+ logger.info(f"Removed {removed_count} duplicate entries")
729
+
730
+ processed_rows = []
731
+ total = len(df)
732
+ batch_size = 3
733
+
734
+ for batch_start in range(0, total, batch_size):
735
+ if control.should_stop():
736
+ if processed_rows:
737
+ result_df = pd.DataFrame(processed_rows)
738
+ output_bytes_io = create_output_file(result_df, file_obj)
739
+ if output_bytes_io:
740
+ output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
741
+ fig_sentiment, fig_events = create_visualizations(result_df)
742
+ return (
743
+ result_df,
744
+ fig_sentiment,
745
+ fig_events,
746
+ True,
747
+ output_bytes, # Return bytes instead of BytesIO
748
+ f"Обработка остановлена. Обработано {len(processed_rows)}/{total} строк",
749
+ dedup_message
750
+ )
751
+ break
752
+
753
+ batch_end = min(batch_start + batch_size, total)
754
+ batch = df.iloc[batch_start:batch_end]
755
+
756
+ for idx, row in batch.iterrows():
757
+ try:
758
+ text = str(row.get('Выдержки из текста', '')).strip()
759
+ entity = str(row.get('Объект', '')).strip()
760
+
761
+ if not text or not entity:
762
+ continue
763
+
764
+ # Process with GPU
765
+ results = detector.process_text(text, entity)
766
+
767
+ processed_rows.append({
768
+ 'Объект': entity,
769
+ 'Заголовок': str(row.get('Заголовок', '')),
770
+ 'Translated': results['translated_text'],
771
+ 'Sentiment': results['sentiment'],
772
+ 'Impact': results['impact'],
773
+ 'Reasoning': results['reasoning'],
774
+ 'Event_Type': results['event_type'],
775
+ 'Event_Summary': results['event_summary'],
776
+ 'Выдержки из текста': text[:1000]
777
+ })
778
+
779
+ except Exception as e:
780
+ logger.error(f"Error processing row {idx}: {str(e)}")
781
+ continue
782
+
783
+ # Create intermediate results
784
+ if processed_rows:
785
+ result_df = pd.DataFrame(processed_rows)
786
+ output_bytes_io = create_output_file(result_df, file_obj)
787
+ if output_bytes_io:
788
+ output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
789
+ fig_sentiment, fig_events = create_visualizations(result_df)
790
+ yield (
791
+ result_df,
792
+ fig_sentiment,
793
+ fig_events,
794
+ True,
795
+ output_bytes, # Return bytes instead of BytesIO
796
+ f"Обработано {len(processed_rows)}/{total} строк",
797
+ dedup_message
798
+ )
799
+
800
+ # Cleanup GPU resources after batch
801
+ torch.cuda.empty_cache()
802
+ time.sleep(2)
803
+
804
+ # Create final results
805
  if processed_rows:
806
+ final_df = pd.DataFrame(processed_rows)
807
+ output_bytes_io = create_output_file(final_df, file_obj)
808
  if output_bytes_io:
809
  output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
810
+ fig_sentiment, fig_events = create_visualizations(final_df)
811
  return (
812
+ final_df,
813
  fig_sentiment,
814
  fig_events,
815
  True,
816
  output_bytes, # Return bytes instead of BytesIO
817
+ "Обработка завершена!",
818
  dedup_message
819
  )
820
+ else:
821
+ return None, None, None, False, None, "Нет обработанных данных", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
 
823
+ except Exception as e:
824
+ error_msg = f"Ошибка анализа: {str(e)}"
825
+ logger.error(error_msg)
826
+ gr.Error(error_msg)
827
+ return None, None, None, False, None, error_msg, ""
828
+ finally:
829
+ if detector:
830
+ detector.cleanup()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
831
 
832
  def trigger_download(show_button, file_content):
833
  """Handle download button visibility and file content"""