Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
"""
|
3 |
GAIA Benchmark AI Agent - Hugging Face Space
|
4 |
============================================
|
5 |
-
|
6 |
A Gradio-based web interface for running GAIA benchmark evaluations
|
7 |
on Hugging Face Spaces with GPU acceleration.
|
8 |
"""
|
@@ -45,16 +44,6 @@ logging.basicConfig(level=logging.INFO)
|
|
45 |
logger = logging.getLogger(__name__)
|
46 |
|
47 |
# ================================
|
48 |
-
# MAIN APPLICATION
|
49 |
-
# ================================
|
50 |
-
|
51 |
-
if __name__ == "__main__":
|
52 |
-
app = create_gaia_app()
|
53 |
-
app.launch(
|
54 |
-
server_name="0.0.0.0",
|
55 |
-
server_port=7860,
|
56 |
-
share=False
|
57 |
-
)
|
58 |
# CORE DATA STRUCTURES
|
59 |
# ================================
|
60 |
|
@@ -90,9 +79,7 @@ class GAIAPromptManager:
|
|
90 |
"""Manages GAIA-specific prompting and formatting"""
|
91 |
|
92 |
GAIA_SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
|
93 |
-
|
94 |
FINAL ANSWER: [YOUR FINAL ANSWER]
|
95 |
-
|
96 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
|
97 |
|
98 |
@staticmethod
|
@@ -453,18 +440,15 @@ class GAIASpaceAgent:
|
|
453 |
|
454 |
summary = f"""
|
455 |
# 📊 GAIA Evaluation Summary
|
456 |
-
|
457 |
## Overall Statistics
|
458 |
- **Total Questions**: {total}
|
459 |
- **Successful**: {successful}
|
460 |
- **Errors**: {errors}
|
461 |
- **Success Rate**: {(successful/total*100):.1f}%
|
462 |
-
|
463 |
## Performance Metrics
|
464 |
- **Average Processing Time**: {avg_time:.2f}s
|
465 |
- **Total Processing Time**: {total_time:.2f}s
|
466 |
- **Questions per Minute**: {(total/(total_time/60)):.1f}
|
467 |
-
|
468 |
## Model Information
|
469 |
- **Model**: {self.current_model}
|
470 |
- **Device**: {self.model_manager.device.upper() if self.model_manager else 'Unknown'}
|
@@ -480,17 +464,11 @@ class GAIASpaceAgent:
|
|
480 |
|
481 |
detailed += f"""
|
482 |
## Question {i}: {question.task_id} {status}
|
483 |
-
|
484 |
**Question**: {question.question}
|
485 |
-
|
486 |
**Model Answer**: {result.final_answer}
|
487 |
-
|
488 |
**Expected Answer**: {question.final_answer if question.final_answer else 'N/A'}
|
489 |
-
|
490 |
**Processing Time**: {result.processing_time:.2f}s
|
491 |
-
|
492 |
**Level**: {question.level}
|
493 |
-
|
494 |
---
|
495 |
"""
|
496 |
|
@@ -850,3 +828,14 @@ def create_gaia_app():
|
|
850 |
|
851 |
return app
|
852 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"""
|
3 |
GAIA Benchmark AI Agent - Hugging Face Space
|
4 |
============================================
|
|
|
5 |
A Gradio-based web interface for running GAIA benchmark evaluations
|
6 |
on Hugging Face Spaces with GPU acceleration.
|
7 |
"""
|
|
|
44 |
logger = logging.getLogger(__name__)
|
45 |
|
46 |
# ================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# CORE DATA STRUCTURES
|
48 |
# ================================
|
49 |
|
|
|
79 |
"""Manages GAIA-specific prompting and formatting"""
|
80 |
|
81 |
GAIA_SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
|
|
|
82 |
FINAL ANSWER: [YOUR FINAL ANSWER]
|
|
|
83 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
|
84 |
|
85 |
@staticmethod
|
|
|
440 |
|
441 |
summary = f"""
|
442 |
# 📊 GAIA Evaluation Summary
|
|
|
443 |
## Overall Statistics
|
444 |
- **Total Questions**: {total}
|
445 |
- **Successful**: {successful}
|
446 |
- **Errors**: {errors}
|
447 |
- **Success Rate**: {(successful/total*100):.1f}%
|
|
|
448 |
## Performance Metrics
|
449 |
- **Average Processing Time**: {avg_time:.2f}s
|
450 |
- **Total Processing Time**: {total_time:.2f}s
|
451 |
- **Questions per Minute**: {(total/(total_time/60)):.1f}
|
|
|
452 |
## Model Information
|
453 |
- **Model**: {self.current_model}
|
454 |
- **Device**: {self.model_manager.device.upper() if self.model_manager else 'Unknown'}
|
|
|
464 |
|
465 |
detailed += f"""
|
466 |
## Question {i}: {question.task_id} {status}
|
|
|
467 |
**Question**: {question.question}
|
|
|
468 |
**Model Answer**: {result.final_answer}
|
|
|
469 |
**Expected Answer**: {question.final_answer if question.final_answer else 'N/A'}
|
|
|
470 |
**Processing Time**: {result.processing_time:.2f}s
|
|
|
471 |
**Level**: {question.level}
|
|
|
472 |
---
|
473 |
"""
|
474 |
|
|
|
828 |
|
829 |
return app
|
830 |
|
831 |
+
# ================================
|
832 |
+
# MAIN APPLICATION
|
833 |
+
# ================================
|
834 |
+
|
835 |
+
if __name__ == "__main__":
|
836 |
+
app = create_gaia_app()
|
837 |
+
app.launch(
|
838 |
+
server_name="0.0.0.0",
|
839 |
+
server_port=7860,
|
840 |
+
share=False
|
841 |
+
)
|