Spaces:
Sleeping
Sleeping
Commit
·
feb6866
1
Parent(s):
33771c2
v.1.17
Browse files
app.py
CHANGED
@@ -211,17 +211,52 @@ class EventDetector:
|
|
211 |
logger.error(f"Event detection error: {str(e)}")
|
212 |
return "Нет", f"Error: {str(e)}"
|
213 |
|
214 |
-
@spaces.GPU(duration=20)
|
215 |
def analyze_sentiment(self, text):
|
216 |
try:
|
|
|
|
|
|
|
|
|
217 |
current_time = time.time()
|
218 |
if current_time - self.last_gpu_use < 2:
|
219 |
time.sleep(2)
|
220 |
-
|
221 |
-
# Rest of the method remains the same...
|
222 |
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
except Exception as e:
|
227 |
logger.error(f"Sentiment analysis error: {e}")
|
@@ -270,67 +305,83 @@ def process_file(file_obj):
|
|
270 |
processed_rows = []
|
271 |
total = len(df)
|
272 |
|
273 |
-
# Process in smaller batches
|
274 |
-
BATCH_SIZE =
|
|
|
|
|
275 |
for batch_start in range(0, total, BATCH_SIZE):
|
276 |
-
|
277 |
-
|
|
|
278 |
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
|
291 |
-
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
continue
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
logger.info(f"Processed {idx + 1}/{total} rows")
|
317 |
-
|
318 |
-
except Exception as e:
|
319 |
-
logger.error(f"Error processing row {idx}: {str(e)}")
|
320 |
-
if "GPU quota" in str(e):
|
321 |
-
# Wait longer if we hit quota limits
|
322 |
-
time.sleep(5)
|
323 |
-
continue
|
324 |
-
|
325 |
-
# Release GPU resources after each batch
|
326 |
-
torch.cuda.empty_cache()
|
327 |
-
# Wait between batches
|
328 |
-
time.sleep(2)
|
329 |
-
|
330 |
-
# Create intermediate results
|
331 |
-
if processed_rows:
|
332 |
-
result_df = pd.DataFrame(processed_rows)
|
333 |
-
yield result_df, None, None, f"Обработано {len(processed_rows)}/{total} строк"
|
334 |
|
335 |
# Final results
|
336 |
if processed_rows:
|
@@ -348,7 +399,7 @@ def create_interface():
|
|
348 |
control = ProcessControl()
|
349 |
|
350 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
351 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.1.
|
352 |
|
353 |
with gr.Row():
|
354 |
file_input = gr.File(
|
|
|
211 |
logger.error(f"Event detection error: {str(e)}")
|
212 |
return "Нет", f"Error: {str(e)}"
|
213 |
|
214 |
+
@spaces.GPU(duration=20)
|
215 |
def analyze_sentiment(self, text):
|
216 |
try:
|
217 |
+
if not self.initialized:
|
218 |
+
if not self.initialize_models():
|
219 |
+
return "Neutral"
|
220 |
+
|
221 |
current_time = time.time()
|
222 |
if current_time - self.last_gpu_use < 2:
|
223 |
time.sleep(2)
|
|
|
|
|
224 |
|
225 |
+
truncated_text = text[:500]
|
226 |
+
results = []
|
227 |
+
|
228 |
+
try:
|
229 |
+
inputs = [truncated_text]
|
230 |
+
sentiment_results = []
|
231 |
+
|
232 |
+
# Process each model separately with delay
|
233 |
+
if self.finbert:
|
234 |
+
finbert_result = self.finbert(inputs, truncation=True, max_length=512)[0]
|
235 |
+
results.append(self.get_sentiment_label(finbert_result))
|
236 |
+
time.sleep(0.5)
|
237 |
+
|
238 |
+
if self.roberta:
|
239 |
+
roberta_result = self.roberta(inputs, truncation=True, max_length=512)[0]
|
240 |
+
results.append(self.get_sentiment_label(roberta_result))
|
241 |
+
time.sleep(0.5)
|
242 |
+
|
243 |
+
if self.finbert_tone:
|
244 |
+
finbert_tone_result = self.finbert_tone(inputs, truncation=True, max_length=512)[0]
|
245 |
+
results.append(self.get_sentiment_label(finbert_tone_result))
|
246 |
+
|
247 |
+
# Get majority vote
|
248 |
+
if results:
|
249 |
+
sentiment_counts = pd.Series(results).value_counts()
|
250 |
+
final_sentiment = sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
|
251 |
+
else:
|
252 |
+
final_sentiment = "Neutral"
|
253 |
+
|
254 |
+
self.last_gpu_use = time.time()
|
255 |
+
return final_sentiment
|
256 |
+
|
257 |
+
except Exception as e:
|
258 |
+
logger.error(f"Model inference error: {e}")
|
259 |
+
return "Neutral"
|
260 |
|
261 |
except Exception as e:
|
262 |
logger.error(f"Sentiment analysis error: {e}")
|
|
|
305 |
processed_rows = []
|
306 |
total = len(df)
|
307 |
|
308 |
+
# Process in smaller batches with quota management
|
309 |
+
BATCH_SIZE = 3 # Reduced batch size
|
310 |
+
QUOTA_WAIT_TIME = 60 # Wait time when quota is exceeded
|
311 |
+
|
312 |
for batch_start in range(0, total, BATCH_SIZE):
|
313 |
+
try:
|
314 |
+
batch_end = min(batch_start + BATCH_SIZE, total)
|
315 |
+
batch = df.iloc[batch_start:batch_end]
|
316 |
|
317 |
+
# Initialize models for batch
|
318 |
+
if not detector.initialized:
|
319 |
+
detector.initialize_models()
|
320 |
+
time.sleep(1) # Wait after initialization
|
321 |
+
|
322 |
+
for idx, row in batch.iterrows():
|
323 |
+
try:
|
324 |
+
text = str(row.get('Выдержки из текста', ''))
|
325 |
+
if not text.strip():
|
326 |
+
continue
|
327 |
+
|
328 |
+
entity = str(row.get('Объект', ''))
|
329 |
+
if not entity.strip():
|
330 |
+
continue
|
331 |
+
|
332 |
+
# Process with GPU quota management
|
333 |
+
event_type = "Нет"
|
334 |
+
event_summary = ""
|
335 |
+
sentiment = "Neutral"
|
336 |
|
337 |
+
try:
|
338 |
+
event_type, event_summary = detector.detect_events(text, entity)
|
339 |
+
time.sleep(1) # Wait between GPU operations
|
340 |
+
sentiment = detector.analyze_sentiment(text)
|
341 |
+
except Exception as e:
|
342 |
+
if "GPU quota" in str(e):
|
343 |
+
logger.warning("GPU quota exceeded, waiting...")
|
344 |
+
time.sleep(QUOTA_WAIT_TIME)
|
345 |
+
continue
|
346 |
+
else:
|
347 |
+
raise e
|
348 |
+
|
349 |
+
processed_rows.append({
|
350 |
+
'Объект': entity,
|
351 |
+
'Заголовок': str(row.get('Заголовок', '')),
|
352 |
+
'Sentiment': sentiment,
|
353 |
+
'Event_Type': event_type,
|
354 |
+
'Event_Summary': event_summary,
|
355 |
+
'Текст': text[:1000]
|
356 |
+
})
|
357 |
+
|
358 |
+
logger.info(f"Processed {idx + 1}/{total} rows")
|
359 |
+
|
360 |
+
except Exception as e:
|
361 |
+
logger.error(f"Error processing row {idx}: {str(e)}")
|
362 |
continue
|
363 |
+
|
364 |
+
# Create intermediate results
|
365 |
+
if processed_rows:
|
366 |
+
intermediate_df = pd.DataFrame(processed_rows)
|
367 |
+
yield (
|
368 |
+
intermediate_df,
|
369 |
+
None,
|
370 |
+
None,
|
371 |
+
f"Обработано {len(processed_rows)}/{total} строк"
|
372 |
+
)
|
373 |
+
|
374 |
+
# Wait between batches
|
375 |
+
time.sleep(2)
|
376 |
+
|
377 |
+
# Cleanup GPU resources after each batch
|
378 |
+
torch.cuda.empty_cache()
|
379 |
+
|
380 |
+
except Exception as e:
|
381 |
+
logger.error(f"Batch processing error: {str(e)}")
|
382 |
+
if "GPU quota" in str(e):
|
383 |
+
time.sleep(QUOTA_WAIT_TIME)
|
384 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
|
386 |
# Final results
|
387 |
if processed_rows:
|
|
|
399 |
control = ProcessControl()
|
400 |
|
401 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
402 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.1.17")
|
403 |
|
404 |
with gr.Row():
|
405 |
file_input = gr.File(
|