pentarosarium commited on
Commit
feb6866
·
1 Parent(s): 33771c2
Files changed (1) hide show
  1. app.py +114 -63
app.py CHANGED
@@ -211,17 +211,52 @@ class EventDetector:
211
  logger.error(f"Event detection error: {str(e)}")
212
  return "Нет", f"Error: {str(e)}"
213
 
214
- @spaces.GPU(duration=20) # Reduced duration
215
  def analyze_sentiment(self, text):
216
  try:
 
 
 
 
217
  current_time = time.time()
218
  if current_time - self.last_gpu_use < 2:
219
  time.sleep(2)
220
-
221
- # Rest of the method remains the same...
222
 
223
- self.last_gpu_use = time.time()
224
- return sentiment_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  except Exception as e:
227
  logger.error(f"Sentiment analysis error: {e}")
@@ -270,67 +305,83 @@ def process_file(file_obj):
270
  processed_rows = []
271
  total = len(df)
272
 
273
- # Process in smaller batches
274
- BATCH_SIZE = 5
 
 
275
  for batch_start in range(0, total, BATCH_SIZE):
276
- if control.should_stop():
277
- break
 
278
 
279
- batch_end = min(batch_start + BATCH_SIZE, total)
280
- batch = df.iloc[batch_start:batch_end]
281
-
282
- # Initialize models for this batch
283
- detector.initialize_models()
284
-
285
- for idx, row in batch.iterrows():
286
- try:
287
- text = str(row.get('Выдержки из текста', ''))
288
- if not text.strip():
289
- continue
 
 
 
 
 
 
 
 
290
 
291
- entity = str(row.get('Объект', ''))
292
- if not entity.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  continue
294
-
295
- # Process event detection with GPU
296
- event_type, event_summary = detector.detect_events(text, entity)
297
-
298
- # Small delay to avoid quota issues
299
- time.sleep(0.5)
300
-
301
- # Process sentiment analysis with GPU
302
- sentiment = detector.analyze_sentiment(text)
303
-
304
- # Small delay after GPU operations
305
- time.sleep(0.5)
306
-
307
- processed_rows.append({
308
- 'Объект': entity,
309
- 'Заголовок': str(row.get('Заголовок', '')),
310
- 'Sentiment': sentiment,
311
- 'Event_Type': event_type,
312
- 'Event_Summary': event_summary,
313
- 'Текст': text[:1000]
314
- })
315
-
316
- logger.info(f"Processed {idx + 1}/{total} rows")
317
-
318
- except Exception as e:
319
- logger.error(f"Error processing row {idx}: {str(e)}")
320
- if "GPU quota" in str(e):
321
- # Wait longer if we hit quota limits
322
- time.sleep(5)
323
- continue
324
-
325
- # Release GPU resources after each batch
326
- torch.cuda.empty_cache()
327
- # Wait between batches
328
- time.sleep(2)
329
-
330
- # Create intermediate results
331
- if processed_rows:
332
- result_df = pd.DataFrame(processed_rows)
333
- yield result_df, None, None, f"Обработано {len(processed_rows)}/{total} строк"
334
 
335
  # Final results
336
  if processed_rows:
@@ -348,7 +399,7 @@ def create_interface():
348
  control = ProcessControl()
349
 
350
  with gr.Blocks(theme=gr.themes.Soft()) as app:
351
- gr.Markdown("# AI-анализ мониторинга новостей v.1.16")
352
 
353
  with gr.Row():
354
  file_input = gr.File(
 
211
  logger.error(f"Event detection error: {str(e)}")
212
  return "Нет", f"Error: {str(e)}"
213
 
214
+ @spaces.GPU(duration=20)
215
  def analyze_sentiment(self, text):
216
  try:
217
+ if not self.initialized:
218
+ if not self.initialize_models():
219
+ return "Neutral"
220
+
221
  current_time = time.time()
222
  if current_time - self.last_gpu_use < 2:
223
  time.sleep(2)
 
 
224
 
225
+ truncated_text = text[:500]
226
+ results = []
227
+
228
+ try:
229
+ inputs = [truncated_text]
230
+ sentiment_results = []
231
+
232
+ # Process each model separately with delay
233
+ if self.finbert:
234
+ finbert_result = self.finbert(inputs, truncation=True, max_length=512)[0]
235
+ results.append(self.get_sentiment_label(finbert_result))
236
+ time.sleep(0.5)
237
+
238
+ if self.roberta:
239
+ roberta_result = self.roberta(inputs, truncation=True, max_length=512)[0]
240
+ results.append(self.get_sentiment_label(roberta_result))
241
+ time.sleep(0.5)
242
+
243
+ if self.finbert_tone:
244
+ finbert_tone_result = self.finbert_tone(inputs, truncation=True, max_length=512)[0]
245
+ results.append(self.get_sentiment_label(finbert_tone_result))
246
+
247
+ # Get majority vote
248
+ if results:
249
+ sentiment_counts = pd.Series(results).value_counts()
250
+ final_sentiment = sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
251
+ else:
252
+ final_sentiment = "Neutral"
253
+
254
+ self.last_gpu_use = time.time()
255
+ return final_sentiment
256
+
257
+ except Exception as e:
258
+ logger.error(f"Model inference error: {e}")
259
+ return "Neutral"
260
 
261
  except Exception as e:
262
  logger.error(f"Sentiment analysis error: {e}")
 
305
  processed_rows = []
306
  total = len(df)
307
 
308
+ # Process in smaller batches with quota management
309
+ BATCH_SIZE = 3 # Reduced batch size
310
+ QUOTA_WAIT_TIME = 60 # Wait time when quota is exceeded
311
+
312
  for batch_start in range(0, total, BATCH_SIZE):
313
+ try:
314
+ batch_end = min(batch_start + BATCH_SIZE, total)
315
+ batch = df.iloc[batch_start:batch_end]
316
 
317
+ # Initialize models for batch
318
+ if not detector.initialized:
319
+ detector.initialize_models()
320
+ time.sleep(1) # Wait after initialization
321
+
322
+ for idx, row in batch.iterrows():
323
+ try:
324
+ text = str(row.get('Выдержки из текста', ''))
325
+ if not text.strip():
326
+ continue
327
+
328
+ entity = str(row.get('Объект', ''))
329
+ if not entity.strip():
330
+ continue
331
+
332
+ # Process with GPU quota management
333
+ event_type = "Нет"
334
+ event_summary = ""
335
+ sentiment = "Neutral"
336
 
337
+ try:
338
+ event_type, event_summary = detector.detect_events(text, entity)
339
+ time.sleep(1) # Wait between GPU operations
340
+ sentiment = detector.analyze_sentiment(text)
341
+ except Exception as e:
342
+ if "GPU quota" in str(e):
343
+ logger.warning("GPU quota exceeded, waiting...")
344
+ time.sleep(QUOTA_WAIT_TIME)
345
+ continue
346
+ else:
347
+ raise e
348
+
349
+ processed_rows.append({
350
+ 'Объект': entity,
351
+ 'Заголовок': str(row.get('Заголовок', '')),
352
+ 'Sentiment': sentiment,
353
+ 'Event_Type': event_type,
354
+ 'Event_Summary': event_summary,
355
+ 'Текст': text[:1000]
356
+ })
357
+
358
+ logger.info(f"Processed {idx + 1}/{total} rows")
359
+
360
+ except Exception as e:
361
+ logger.error(f"Error processing row {idx}: {str(e)}")
362
  continue
363
+
364
+ # Create intermediate results
365
+ if processed_rows:
366
+ intermediate_df = pd.DataFrame(processed_rows)
367
+ yield (
368
+ intermediate_df,
369
+ None,
370
+ None,
371
+ f"Обработано {len(processed_rows)}/{total} строк"
372
+ )
373
+
374
+ # Wait between batches
375
+ time.sleep(2)
376
+
377
+ # Cleanup GPU resources after each batch
378
+ torch.cuda.empty_cache()
379
+
380
+ except Exception as e:
381
+ logger.error(f"Batch processing error: {str(e)}")
382
+ if "GPU quota" in str(e):
383
+ time.sleep(QUOTA_WAIT_TIME)
384
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
  # Final results
387
  if processed_rows:
 
399
  control = ProcessControl()
400
 
401
  with gr.Blocks(theme=gr.themes.Soft()) as app:
402
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.17")
403
 
404
  with gr.Row():
405
  file_input = gr.File(