Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -30,14 +30,14 @@ from transformers import (
|
|
30 |
from datasets import load_dataset
|
31 |
from huggingface_hub import HfApi, hf_hub_download
|
32 |
|
33 |
-
# Import leaderboard integration
|
34 |
from gaia_leaderboard_integration import (
|
35 |
enhanced_gaia_agent,
|
36 |
-
run_custom_benchmark_interface,
|
37 |
load_test_questions_interface,
|
38 |
-
preview_dataset_structure_interface,
|
39 |
get_leaderboard_info,
|
40 |
-
get_question_selection_info
|
41 |
)
|
42 |
|
43 |
# Setup logging
|
@@ -45,6 +45,16 @@ logging.basicConfig(level=logging.INFO)
|
|
45 |
logger = logging.getLogger(__name__)
|
46 |
|
47 |
# ================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
# CORE DATA STRUCTURES
|
49 |
# ================================
|
50 |
|
@@ -114,7 +124,6 @@ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma sepa
|
|
114 |
class HFSpaceModelManager:
|
115 |
"""Hugging Face Spaces optimized model manager"""
|
116 |
|
117 |
-
# Space-friendly models with different capabilities
|
118 |
SPACE_MODELS = {
|
119 |
"Fast & Light": {
|
120 |
"name": "microsoft/DialoGPT-medium",
|
@@ -160,7 +169,6 @@ class HFSpaceModelManager:
|
|
160 |
if progress_callback:
|
161 |
progress_callback(0.1, "Loading tokenizer...")
|
162 |
|
163 |
-
# Load tokenizer
|
164 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
165 |
if self.tokenizer.pad_token is None:
|
166 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
@@ -168,7 +176,6 @@ class HFSpaceModelManager:
|
|
168 |
if progress_callback:
|
169 |
progress_callback(0.3, "Configuring model...")
|
170 |
|
171 |
-
# Configure quantization for GPU spaces
|
172 |
quantization_config = None
|
173 |
if self.device == "cuda" and "7b" in self.model_name.lower():
|
174 |
quantization_config = BitsAndBytesConfig(
|
@@ -181,7 +188,6 @@ class HFSpaceModelManager:
|
|
181 |
if progress_callback:
|
182 |
progress_callback(0.6, "Loading model weights...")
|
183 |
|
184 |
-
# Load model
|
185 |
self.model = AutoModelForCausalLM.from_pretrained(
|
186 |
self.model_name,
|
187 |
quantization_config=quantization_config,
|
@@ -193,7 +199,6 @@ class HFSpaceModelManager:
|
|
193 |
if progress_callback:
|
194 |
progress_callback(0.9, "Creating pipeline...")
|
195 |
|
196 |
-
# Create pipeline
|
197 |
self.pipeline = pipeline(
|
198 |
"text-generation",
|
199 |
model=self.model,
|
@@ -221,7 +226,6 @@ class HFSpaceModelManager:
|
|
221 |
return "❌ Model not loaded. Please load a model first."
|
222 |
|
223 |
try:
|
224 |
-
# Truncate prompt if too long
|
225 |
max_input_length = 1000
|
226 |
if len(prompt) > max_input_length:
|
227 |
prompt = prompt[:max_input_length] + "..."
|
@@ -351,13 +355,10 @@ class GAIASpaceAgent:
|
|
351 |
self.model_manager = HFSpaceModelManager(model_choice)
|
352 |
self.current_model = model_choice
|
353 |
|
354 |
-
# Load model with progress updates
|
355 |
def progress_callback(value, desc):
|
356 |
progress(value, desc=desc)
|
357 |
|
358 |
result = self.model_manager.load_model(progress_callback)
|
359 |
-
|
360 |
-
# Clear any previous results when changing models
|
361 |
self.evaluation_results = []
|
362 |
|
363 |
return result
|
@@ -374,22 +375,15 @@ class GAIASpaceAgent:
|
|
374 |
|
375 |
try:
|
376 |
progress(0.2, desc="Creating GAIA prompt...")
|
377 |
-
|
378 |
-
# Create GAIA prompt
|
379 |
prompt = self.prompt_manager.create_gaia_prompt(question_text)
|
380 |
|
381 |
progress(0.4, desc="Generating response...")
|
382 |
-
|
383 |
-
# Generate response
|
384 |
raw_response = self.model_manager.generate_response(prompt)
|
385 |
|
386 |
progress(0.8, desc="Extracting final answer...")
|
387 |
-
|
388 |
-
# Extract final answer and reasoning
|
389 |
final_answer, reasoning = self.prompt_manager.extract_final_answer(raw_response)
|
390 |
|
391 |
processing_time = time.time() - start_time
|
392 |
-
|
393 |
progress(1.0, desc="Complete!")
|
394 |
|
395 |
return final_answer, raw_response, reasoning, processing_time
|
@@ -415,17 +409,11 @@ class GAIASpaceAgent:
|
|
415 |
desc=f"Processing question {i + 1}/{total_questions}: {question.task_id}")
|
416 |
|
417 |
start_time = time.time()
|
418 |
-
|
419 |
-
# Create prompt and generate response
|
420 |
prompt = self.prompt_manager.create_gaia_prompt(question.question)
|
421 |
raw_response = self.model_manager.generate_response(prompt)
|
422 |
-
|
423 |
-
# Extract final answer
|
424 |
final_answer, reasoning = self.prompt_manager.extract_final_answer(raw_response)
|
425 |
-
|
426 |
processing_time = time.time() - start_time
|
427 |
|
428 |
-
# Create response object
|
429 |
response = GAIAResponse(
|
430 |
task_id=question.task_id,
|
431 |
model_answer=raw_response,
|
@@ -449,13 +437,8 @@ class GAIASpaceAgent:
|
|
449 |
results.append(error_response)
|
450 |
self.evaluation_results.append(error_response)
|
451 |
|
452 |
-
# Generate summary
|
453 |
summary = self._generate_summary(results)
|
454 |
-
|
455 |
-
# Generate detailed results
|
456 |
detailed_results = self._generate_detailed_results(results, questions)
|
457 |
-
|
458 |
-
# Generate downloadable JSONL
|
459 |
jsonl_content = self._generate_jsonl(results)
|
460 |
|
461 |
return summary, detailed_results, jsonl_content
|
@@ -530,7 +513,6 @@ class GAIASpaceAgent:
|
|
530 |
# GLOBAL AGENT INSTANCE
|
531 |
# ================================
|
532 |
|
533 |
-
# Initialize global agent
|
534 |
gaia_agent = GAIASpaceAgent()
|
535 |
|
536 |
# ================================
|
@@ -562,20 +544,17 @@ def batch_evaluate_interface(dataset_choice: str, max_questions: int, progress=g
|
|
562 |
|
563 |
progress(0.1, desc="Loading dataset...")
|
564 |
|
565 |
-
# Load questions based on choice
|
566 |
if dataset_choice == "Sample Questions":
|
567 |
questions = GAIADatasetManager.get_sample_questions()
|
568 |
status_msg = f"✅ Loaded {len(questions)} sample questions"
|
569 |
else:
|
570 |
questions, status_msg = GAIADatasetManager.load_gaia_dataset("test", max_questions)
|
571 |
|
572 |
-
# Limit questions
|
573 |
if max_questions and len(questions) > max_questions:
|
574 |
questions = questions[:max_questions]
|
575 |
|
576 |
progress(0.2, desc=f"{status_msg}. Starting evaluation...")
|
577 |
|
578 |
-
# Run evaluation
|
579 |
summary, detailed, jsonl = gaia_agent.batch_evaluate(questions, progress)
|
580 |
|
581 |
return summary, detailed, jsonl
|
@@ -602,26 +581,11 @@ def create_gaia_app():
|
|
602 |
|
603 |
with gr.Blocks(
|
604 |
title="GAIA Benchmark AI Agent",
|
605 |
-
theme=gr.themes.Soft()
|
606 |
-
css="""
|
607 |
-
.gradio-container {
|
608 |
-
font-family: 'Arial', sans-serif;
|
609 |
-
}
|
610 |
-
.main-header {
|
611 |
-
text-align: center;
|
612 |
-
background: linear-gradient(45deg, #2196F3, #21CBF3);
|
613 |
-
-webkit-background-clip: text;
|
614 |
-
-webkit-text-fill-color: transparent;
|
615 |
-
font-size: 2.5em;
|
616 |
-
font-weight: bold;
|
617 |
-
margin-bottom: 20px;
|
618 |
-
}
|
619 |
-
"""
|
620 |
) as app:
|
621 |
|
622 |
-
# Header
|
623 |
gr.HTML("""
|
624 |
-
<div
|
625 |
🧠 GAIA Benchmark AI Agent
|
626 |
</div>
|
627 |
<p style="text-align: center; font-size: 1.2em; color: #666;">
|
@@ -631,9 +595,7 @@ def create_gaia_app():
|
|
631 |
|
632 |
with gr.Tabs():
|
633 |
|
634 |
-
# ===============================
|
635 |
# TAB 1: MODEL SETUP
|
636 |
-
# ===============================
|
637 |
with gr.Tab("🔧 Model Setup"):
|
638 |
gr.Markdown("## Choose and Load Your Model")
|
639 |
|
@@ -642,8 +604,7 @@ def create_gaia_app():
|
|
642 |
model_dropdown = gr.Dropdown(
|
643 |
choices=list(HFSpaceModelManager.SPACE_MODELS.keys()),
|
644 |
value="Fast & Light",
|
645 |
-
label="Select Model"
|
646 |
-
info="Choose based on your quality vs speed preference"
|
647 |
)
|
648 |
|
649 |
model_info = gr.Markdown(
|
@@ -655,7 +616,7 @@ def create_gaia_app():
|
|
655 |
|
656 |
with gr.Column(scale=1):
|
657 |
gpu_info = gr.Markdown(f"""
|
658 |
-
###
|
659 |
**CUDA Available**: {torch.cuda.is_available()}
|
660 |
{f"**GPU**: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "**Device**: CPU"}
|
661 |
""")
|
@@ -666,23 +627,19 @@ def create_gaia_app():
|
|
666 |
interactive=False
|
667 |
)
|
668 |
|
669 |
-
# Update model info when selection changes
|
670 |
model_dropdown.change(
|
671 |
fn=get_model_info,
|
672 |
inputs=[model_dropdown],
|
673 |
outputs=[model_info]
|
674 |
)
|
675 |
|
676 |
-
# Load model when button clicked
|
677 |
load_btn.click(
|
678 |
fn=load_model_interface,
|
679 |
inputs=[model_dropdown],
|
680 |
outputs=[model_status]
|
681 |
)
|
682 |
|
683 |
-
# ===============================
|
684 |
# TAB 2: SINGLE QUESTION
|
685 |
-
# ===============================
|
686 |
with gr.Tab("❓ Single Question"):
|
687 |
gr.Markdown("## Test Individual Questions")
|
688 |
|
@@ -696,8 +653,7 @@ def create_gaia_app():
|
|
696 |
|
697 |
process_btn = gr.Button("🤔 Process Question", variant="primary")
|
698 |
|
699 |
-
|
700 |
-
gr.Markdown("### 💡 Example Questions:")
|
701 |
example_questions = [
|
702 |
"What is the capital of France?",
|
703 |
"Calculate 144 divided by 12",
|
@@ -705,11 +661,8 @@ def create_gaia_app():
|
|
705 |
"Convert 100 degrees Celsius to Fahrenheit"
|
706 |
]
|
707 |
|
708 |
-
for
|
709 |
-
gr.Button(
|
710 |
-
f"📝 {example}",
|
711 |
-
size="sm"
|
712 |
-
).click(
|
713 |
lambda x=example: x,
|
714 |
outputs=[question_input]
|
715 |
)
|
@@ -739,16 +692,13 @@ def create_gaia_app():
|
|
739 |
interactive=False
|
740 |
)
|
741 |
|
742 |
-
# Process single question
|
743 |
process_btn.click(
|
744 |
fn=single_question_interface,
|
745 |
inputs=[question_input],
|
746 |
outputs=[final_answer_output, full_response, reasoning_trace, processing_time]
|
747 |
)
|
748 |
|
749 |
-
# ===============================
|
750 |
# TAB 3: BATCH EVALUATION
|
751 |
-
# ===============================
|
752 |
with gr.Tab("📊 Batch Evaluation"):
|
753 |
gr.Markdown("## Evaluate Multiple Questions")
|
754 |
|
@@ -756,8 +706,7 @@ def create_gaia_app():
|
|
756 |
dataset_choice = gr.Radio(
|
757 |
choices=["Sample Questions", "GAIA Test Set"],
|
758 |
value="Sample Questions",
|
759 |
-
label="Dataset Choice"
|
760 |
-
info="Start with sample questions to test your setup"
|
761 |
)
|
762 |
|
763 |
max_questions = gr.Slider(
|
@@ -765,8 +714,7 @@ def create_gaia_app():
|
|
765 |
maximum=50,
|
766 |
value=5,
|
767 |
step=1,
|
768 |
-
label="Max Questions"
|
769 |
-
info="Number of questions to evaluate"
|
770 |
)
|
771 |
|
772 |
evaluate_btn = gr.Button("🚀 Start Batch Evaluation", variant="primary", size="lg")
|
@@ -789,11 +737,9 @@ def create_gaia_app():
|
|
789 |
value="Run an evaluation to see detailed results"
|
790 |
)
|
791 |
|
792 |
-
# Batch evaluation
|
793 |
def batch_eval_with_download(*args):
|
794 |
summary, detailed, jsonl_content = batch_evaluate_interface(*args)
|
795 |
|
796 |
-
# Save JSONL for download
|
797 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
798 |
filename = f"gaia_results_{timestamp}.jsonl"
|
799 |
|
@@ -811,47 +757,29 @@ def create_gaia_app():
|
|
811 |
outputs=[download_output]
|
812 |
)
|
813 |
|
814 |
-
#
|
815 |
-
# TAB 4: FULL BENCHMARK (ENHANCED FOR 300 QUESTIONS)
|
816 |
-
# ===============================
|
817 |
with gr.Tab("🏆 Full Benchmark"):
|
818 |
gr.Markdown("## Official GAIA Leaderboard Benchmark")
|
819 |
|
820 |
with gr.Row():
|
821 |
with gr.Column():
|
822 |
-
gr.Markdown(get_leaderboard_info())
|
823 |
-
|
824 |
-
with gr.Column():
|
825 |
-
# Test questions preview
|
826 |
test_preview_btn = gr.Button("🔍 Preview Test Questions", variant="secondary")
|
827 |
test_preview_output = gr.Markdown(
|
828 |
value="Click above to preview official test questions"
|
829 |
)
|
830 |
|
831 |
-
# Dataset structure preview (NEW)
|
832 |
dataset_structure_btn = gr.Button("📁 Preview Dataset Structure", variant="secondary")
|
833 |
dataset_structure_output = gr.Markdown(
|
834 |
value="Click above to see actual GAIA dataset structure"
|
835 |
)
|
836 |
-
|
837 |
-
|
838 |
-
|
839 |
-
|
840 |
-
with gr.Row():
|
841 |
-
# Preset buttons for common configurations
|
842 |
-
quick_test_btn = gr.Button("🚀 Quick Test (20 questions)", variant="secondary")
|
843 |
-
medium_test_btn = gr.Button("📊 Medium Test (50 questions)", variant="secondary")
|
844 |
-
full_benchmark_btn = gr.Button("🏆 FULL BENCHMARK (300 questions)", variant="primary", size="lg")
|
845 |
-
|
846 |
-
# Advanced configuration (collapsible)
|
847 |
-
with gr.Accordion("🎛️ Advanced Configuration", open=False):
|
848 |
-
with gr.Row():
|
849 |
-
custom_count = gr.Slider(
|
850 |
minimum=10,
|
851 |
-
maximum=300,
|
852 |
-
value=
|
853 |
step=10,
|
854 |
-
label="
|
855 |
)
|
856 |
|
857 |
selection_strategy = gr.Dropdown(
|
@@ -859,22 +787,9 @@ def create_gaia_app():
|
|
859 |
value="balanced",
|
860 |
label="Selection Strategy"
|
861 |
)
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
# Show selection info
|
866 |
-
selection_info = gr.Markdown(get_question_selection_info())
|
867 |
-
|
868 |
-
# Warning message for full benchmark
|
869 |
-
gr.Markdown("""
|
870 |
-
**⚠️ Full 300-Question Benchmark Warning**:
|
871 |
-
- **Time**: 1-3 hours depending on model and hardware
|
872 |
-
- **Cost**: ~$1-3 on GPU (T4 Small recommended)
|
873 |
-
- **Purpose**: Official leaderboard submission
|
874 |
-
- **Recommendation**: Test with smaller batches first
|
875 |
-
""")
|
876 |
|
877 |
-
# Results section
|
878 |
benchmark_status = gr.Textbox(
|
879 |
label="📊 Benchmark Status",
|
880 |
value="Ready to run benchmark",
|
@@ -889,7 +804,6 @@ def create_gaia_app():
|
|
889 |
)
|
890 |
|
891 |
with gr.Column():
|
892 |
-
# Download files
|
893 |
submission_file = gr.File(
|
894 |
label="💾 Download Submission File (JSONL)",
|
895 |
visible=False
|
@@ -899,20 +813,8 @@ def create_gaia_app():
|
|
899 |
label="📋 Download Metadata File",
|
900 |
visible=False
|
901 |
)
|
902 |
-
|
903 |
-
gr.Markdown("""
|
904 |
-
### 📤 Leaderboard Submission Steps
|
905 |
-
1. **Download** the JSONL file above
|
906 |
-
2. **Visit** [GAIA Leaderboard](https://huggingface.co/spaces/gaia-benchmark/leaderboard)
|
907 |
-
3. **Upload** your submission file
|
908 |
-
4. **View** your model's ranking!
|
909 |
-
""")
|
910 |
|
911 |
-
#
|
912 |
-
# EVENT HANDLERS (FIXED FUNCTION CALLS)
|
913 |
-
# ================================
|
914 |
-
|
915 |
-
# Preview functions
|
916 |
test_preview_btn.click(
|
917 |
fn=lambda: load_test_questions_interface(max_questions=10, selection_type="balanced"),
|
918 |
outputs=[test_preview_output]
|
@@ -923,240 +825,29 @@ def create_gaia_app():
|
|
923 |
outputs=[dataset_structure_output]
|
924 |
)
|
925 |
|
926 |
-
|
927 |
-
def run_quick_test(progress=gr.Progress()):
|
928 |
-
return run_custom_benchmark_interface(20, "balanced", progress)
|
929 |
-
|
930 |
-
def run_medium_test(progress=gr.Progress()):
|
931 |
-
return run_custom_benchmark_interface(50, "balanced", progress)
|
932 |
-
|
933 |
-
def run_full_300_benchmark(progress=gr.Progress()):
|
934 |
-
return run_custom_benchmark_interface(300, "balanced", progress)
|
935 |
-
|
936 |
-
def run_custom_benchmark_wrapper(count, strategy, progress=gr.Progress()):
|
937 |
return run_custom_benchmark_interface(count, strategy, progress)
|
938 |
|
939 |
-
# Helper function to show download files
|
940 |
def show_download_files(status, report, sub_file, meta_file):
|
941 |
return (
|
942 |
status,
|
943 |
report,
|
944 |
sub_file,
|
945 |
meta_file,
|
946 |
-
gr.update(visible=True),
|
947 |
-
gr.update(visible=True)
|
948 |
)
|
949 |
|
950 |
-
|
951 |
-
|
952 |
-
|
953 |
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file]
|
954 |
).then(
|
955 |
fn=show_download_files,
|
956 |
inputs=[benchmark_status, benchmark_report, submission_file, metadata_file],
|
957 |
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file, submission_file, metadata_file]
|
958 |
)
|
959 |
-
|
960 |
-
medium_test_btn.click(
|
961 |
-
fn=run_medium_test,
|
962 |
-
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file]
|
963 |
-
).then(
|
964 |
-
fn=show_download_files,
|
965 |
-
inputs=[benchmark_status, benchmark_report, submission_file, metadata_file],
|
966 |
-
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file, submission_file, metadata_file]
|
967 |
-
)
|
968 |
-
|
969 |
-
# FULL 300-question benchmark
|
970 |
-
full_benchmark_btn.click(
|
971 |
-
fn=run_full_300_benchmark,
|
972 |
-
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file]
|
973 |
-
).then(
|
974 |
-
fn=show_download_files,
|
975 |
-
inputs=[benchmark_status, benchmark_report, submission_file, metadata_file],
|
976 |
-
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file, submission_file, metadata_file]
|
977 |
-
)
|
978 |
-
|
979 |
-
# Custom benchmark
|
980 |
-
custom_benchmark_btn.click(
|
981 |
-
fn=run_custom_benchmark_wrapper,
|
982 |
-
inputs=[custom_count, selection_strategy],
|
983 |
-
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file]
|
984 |
-
).then(
|
985 |
-
fn=show_download_files,
|
986 |
-
inputs=[benchmark_status, benchmark_report, submission_file, metadata_file],
|
987 |
-
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file, submission_file, metadata_file]
|
988 |
-
)
|
989 |
-
|
990 |
-
# ===============================
|
991 |
-
# TAB 5: INFORMATION (UPDATED)
|
992 |
-
# ===============================
|
993 |
-
with gr.Tab("ℹ️ Information"):
|
994 |
-
gr.Markdown("""
|
995 |
-
# 🧠 GAIA Benchmark AI Agent
|
996 |
-
|
997 |
-
## What is GAIA?
|
998 |
-
GAIA (General AI Assistant) is a benchmark designed to test AI assistants on real-world questions that require:
|
999 |
-
- **Reasoning**: Multi-step logical thinking
|
1000 |
-
- **Multi-modality**: Handling text, images, and other file types
|
1001 |
-
- **Web browsing**: Finding and using external information
|
1002 |
-
- **Tool use**: Calculator, code execution, etc.
|
1003 |
-
|
1004 |
-
## 🏆 GAIA Public Leaderboard
|
1005 |
-
GAIA provides a **public leaderboard hosted on Hugging Face** where you can:
|
1006 |
-
- Test your models against **300 official testing questions**
|
1007 |
-
- Compare performance with state-of-the-art systems
|
1008 |
-
- Track progress in AI reasoning capabilities
|
1009 |
-
- Contribute to research community benchmarks
|
1010 |
-
|
1011 |
-
**Leaderboard URL**: [https://huggingface.co/spaces/gaia-benchmark/leaderboard](https://huggingface.co/spaces/gaia-benchmark/leaderboard)
|
1012 |
-
|
1013 |
-
## 🎯 How to Use This Space
|
1014 |
-
|
1015 |
-
### 1. Model Setup
|
1016 |
-
- Choose a model based on your needs (speed vs quality)
|
1017 |
-
- Load the model (this may take a few minutes)
|
1018 |
-
- Wait for "Model loaded successfully" message
|
1019 |
-
|
1020 |
-
### 2. Test Single Questions
|
1021 |
-
- Start with the "Single Question" tab
|
1022 |
-
- Try example questions to verify everything works
|
1023 |
-
- Enter your own questions to test model capabilities
|
1024 |
-
|
1025 |
-
### 3. Batch Evaluation
|
1026 |
-
- Use "Sample Questions" first to test your setup
|
1027 |
-
- Then try "GAIA Test Set" for real benchmark evaluation
|
1028 |
-
- Download results in JSONL format for submission
|
1029 |
-
|
1030 |
-
### 4. Full Benchmark (Enhanced!)
|
1031 |
-
- **Quick Tests**: 20 or 50 questions for rapid iteration
|
1032 |
-
- **Custom Configuration**: Choose exact question count and strategy
|
1033 |
-
- **Full 300-Question Benchmark**: Complete official evaluation
|
1034 |
-
- **Leaderboard Ready**: Automatic JSONL generation for submission
|
1035 |
-
|
1036 |
-
## 📊 Model Recommendations
|
1037 |
-
|
1038 |
-
| Model | Best For | Memory | Speed | Quality | 300Q Time | Cost (T4) |
|
1039 |
-
|-------|----------|---------|-------|---------|-----------|-----------|
|
1040 |
-
| Fast & Light | Quick testing | Low | Fast | Good | 45-75 min | ~$0.60-1.00 |
|
1041 |
-
| Balanced | General use | Medium | Medium | Better | 60-120 min | ~$1.00-2.00 |
|
1042 |
-
| High Quality | Best results | High | Slow | Best | 90-180 min | ~$1.50-3.00 |
|
1043 |
-
| Instruction Following | Complex reasoning | High | Medium | Excellent | 75-150 min | ~$1.25-2.50 |
|
1044 |
-
|
1045 |
-
## 🏅 Benchmark Performance Expectations
|
1046 |
-
|
1047 |
-
Based on current leaderboard standings, expect these performance ranges:
|
1048 |
-
|
1049 |
-
| Difficulty Level | Top Models | Good Models | Baseline Models |
|
1050 |
-
|------------------|------------|-------------|-----------------|
|
1051 |
-
| **Level 1** (Basic) | 85-95% | 70-85% | 50-70% |
|
1052 |
-
| **Level 2** (Intermediate) | 65-80% | 45-65% | 25-45% |
|
1053 |
-
| **Level 3** (Advanced) | 35-60% | 20-35% | 10-20% |
|
1054 |
-
| **Overall Average** | 65-75% | 45-65% | 30-45% |
|
1055 |
-
|
1056 |
-
## 🚀 Flexible Benchmarking Features
|
1057 |
-
|
1058 |
-
### 🎯 **Custom Question Selection**
|
1059 |
-
- **Question Count**: Choose 10-300 questions
|
1060 |
-
- **Selection Strategies**: Balanced, Random, Sequential
|
1061 |
-
- **Level Distribution**: Automatic balancing across difficulties
|
1062 |
-
- **Reproducible**: Consistent results with same settings
|
1063 |
-
|
1064 |
-
### 📊 **Smart Sampling**
|
1065 |
-
- **Balanced**: Realistic distribution (40% L1, 35% L2, 25% L3)
|
1066 |
-
- **Representative**: Questions from all difficulty levels
|
1067 |
-
- **Efficient**: Test fewer questions while maintaining quality
|
1068 |
-
|
1069 |
-
### ⚡ **Quick Options**
|
1070 |
-
- **Quick Test (20Q)**: 5-15 minutes, ~$0.10-0.25
|
1071 |
-
- **Medium Test (50Q)**: 15-30 minutes, ~$0.25-0.50
|
1072 |
-
- **Full Benchmark (300Q)**: 1-3 hours, ~$1-3
|
1073 |
-
|
1074 |
-
## 🔄 Continuous Benchmarking Workflow
|
1075 |
-
|
1076 |
-
1. **Development**: Start with Quick Test (20 questions)
|
1077 |
-
2. **Validation**: Use Medium Test (50 questions) for validation
|
1078 |
-
3. **Optimization**: Iterate on model improvements
|
1079 |
-
4. **Benchmarking**: Run Full Benchmark (300 questions) when ready
|
1080 |
-
5. **Submission**: Upload to official GAIA leaderboard
|
1081 |
-
6. **Analysis**: Compare with other models and iterate
|
1082 |
-
|
1083 |
-
## 📋 Official Dataset Integration
|
1084 |
-
|
1085 |
-
### **Metadata.jsonl Structure**
|
1086 |
-
- **Questions**: Stored in `2023/validation/metadata.jsonl` and `2023/test/metadata.jsonl`
|
1087 |
-
- **Additional Files**: Some questions reference images, documents, or data files
|
1088 |
-
- **Format**: Each line contains one question in JSON format
|
1089 |
-
- **Fields**: `task_id`, `Question`, `Level`, `file_name` (optional), `Final answer` (validation only)
|
1090 |
-
|
1091 |
-
### **Submission Format**
|
1092 |
-
Results are saved in official GAIA leaderboard format:
|
1093 |
-
```json
|
1094 |
-
{"task_id": "gaia_001", "model_answer": "[FULL RESPONSE]", "reasoning_trace": "[STEP-BY-STEP REASONING]"}
|
1095 |
-
{"task_id": "gaia_002", "model_answer": "[FULL RESPONSE]", "reasoning_trace": "[STEP-BY-STEP REASONING]"}
|
1096 |
-
```
|
1097 |
-
|
1098 |
-
## ⚡ Pro Tips for Best Results
|
1099 |
-
|
1100 |
-
### **Performance Optimization**
|
1101 |
-
1. **Start Small**: Always test with Quick Test first
|
1102 |
-
2. **Choose Wisely**: Balance speed vs quality based on your goals
|
1103 |
-
3. **Monitor Resources**: Use GPU acceleration for larger models
|
1104 |
-
4. **Validate Format**: Ensure JSONL files are properly formatted
|
1105 |
-
|
1106 |
-
### **Leaderboard Strategy**
|
1107 |
-
1. **Baseline First**: Get initial results with Quick Test
|
1108 |
-
2. **Iterate Quickly**: Test improvements on Medium Test
|
1109 |
-
3. **Full Benchmark**: Run complete evaluation when ready
|
1110 |
-
4. **Compare Results**: Analyze performance across difficulty levels
|
1111 |
-
5. **Document Approach**: Include model details and methodology
|
1112 |
-
|
1113 |
-
### **Cost Management**
|
1114 |
-
- **Development**: Use Quick Test (20Q) for rapid iteration (~$0.10-0.25)
|
1115 |
-
- **Validation**: Use Medium Test (50Q) for validation (~$0.25-0.50)
|
1116 |
-
- **Production**: Use Full Benchmark (300Q) for final submission (~$1-3)
|
1117 |
-
- **Hardware**: T4 Small GPU recommended for best price/performance
|
1118 |
-
|
1119 |
-
### **Common Pitfalls to Avoid**
|
1120 |
-
- Don't run full benchmark on untested models
|
1121 |
-
- Ensure stable internet connection for long evaluations
|
1122 |
-
- Verify submission file format before uploading
|
1123 |
-
- Check GPU memory usage for large models
|
1124 |
-
- Save intermediate results during long runs
|
1125 |
-
|
1126 |
-
## 🎯 Getting Started Checklist
|
1127 |
-
|
1128 |
-
- [ ] **Load Model**: Choose and load a model in "Model Setup"
|
1129 |
-
- [ ] **Test Single**: Try example questions in "Single Question"
|
1130 |
-
- [ ] **Quick Test**: Run 20-question benchmark to verify setup
|
1131 |
-
- [ ] **Preview Dataset**: Check "Preview Test Questions" in Full Benchmark
|
1132 |
-
- [ ] **Medium Test**: Run 50-question validation benchmark
|
1133 |
-
- [ ] **Full Benchmark**: Run complete 300-question evaluation when ready
|
1134 |
-
- [ ] **Download Files**: Get JSONL submission and metadata files
|
1135 |
-
- [ ] **Submit**: Upload to GAIA leaderboard
|
1136 |
-
- [ ] **Compare**: Analyze your results against other models!
|
1137 |
-
|
1138 |
-
## 🔗 Resources
|
1139 |
-
- [GAIA Paper](https://arxiv.org/abs/2311.12983) - Original research paper
|
1140 |
-
- [GAIA Leaderboard](https://huggingface.co/spaces/gaia-benchmark/leaderboard) - Official rankings
|
1141 |
-
- [GAIA Dataset](https://huggingface.co/datasets/gaia-benchmark/GAIA) - Official dataset repository
|
1142 |
-
- [Hugging Face Spaces](https://huggingface.co/docs/hub/spaces) - Deployment documentation
|
1143 |
-
|
1144 |
-
---
|
1145 |
-
|
1146 |
-
**Ready to start benchmarking?** Begin with the Model Setup tab, then progress through Quick Test → Medium Test → Full Benchmark. Good luck climbing the leaderboard! 🚀
|
1147 |
-
""")
|
1148 |
|
1149 |
return app
|
1150 |
|
1151 |
-
# ================================
|
1152 |
-
# MAIN APPLICATION
|
1153 |
-
# ================================
|
1154 |
-
|
1155 |
-
if __name__ == "__main__":
|
1156 |
-
# Create and launch the Gradio app
|
1157 |
-
app = create_gaia_app()
|
1158 |
-
app.launch(
|
1159 |
-
server_name="0.0.0.0",
|
1160 |
-
server_port=7860,
|
1161 |
-
share=False
|
1162 |
-
)
|
|
|
30 |
from datasets import load_dataset
|
31 |
from huggingface_hub import HfApi, hf_hub_download
|
32 |
|
33 |
+
# Import leaderboard integration
|
34 |
from gaia_leaderboard_integration import (
|
35 |
enhanced_gaia_agent,
|
36 |
+
run_custom_benchmark_interface,
|
37 |
load_test_questions_interface,
|
38 |
+
preview_dataset_structure_interface,
|
39 |
get_leaderboard_info,
|
40 |
+
get_question_selection_info
|
41 |
)
|
42 |
|
43 |
# Setup logging
|
|
|
45 |
logger = logging.getLogger(__name__)
|
46 |
|
47 |
# ================================
|
48 |
+
# MAIN APPLICATION
|
49 |
+
# ================================
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
app = create_gaia_app()
|
53 |
+
app.launch(
|
54 |
+
server_name="0.0.0.0",
|
55 |
+
server_port=7860,
|
56 |
+
share=False
|
57 |
+
)
|
58 |
# CORE DATA STRUCTURES
|
59 |
# ================================
|
60 |
|
|
|
124 |
class HFSpaceModelManager:
|
125 |
"""Hugging Face Spaces optimized model manager"""
|
126 |
|
|
|
127 |
SPACE_MODELS = {
|
128 |
"Fast & Light": {
|
129 |
"name": "microsoft/DialoGPT-medium",
|
|
|
169 |
if progress_callback:
|
170 |
progress_callback(0.1, "Loading tokenizer...")
|
171 |
|
|
|
172 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
173 |
if self.tokenizer.pad_token is None:
|
174 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
|
|
176 |
if progress_callback:
|
177 |
progress_callback(0.3, "Configuring model...")
|
178 |
|
|
|
179 |
quantization_config = None
|
180 |
if self.device == "cuda" and "7b" in self.model_name.lower():
|
181 |
quantization_config = BitsAndBytesConfig(
|
|
|
188 |
if progress_callback:
|
189 |
progress_callback(0.6, "Loading model weights...")
|
190 |
|
|
|
191 |
self.model = AutoModelForCausalLM.from_pretrained(
|
192 |
self.model_name,
|
193 |
quantization_config=quantization_config,
|
|
|
199 |
if progress_callback:
|
200 |
progress_callback(0.9, "Creating pipeline...")
|
201 |
|
|
|
202 |
self.pipeline = pipeline(
|
203 |
"text-generation",
|
204 |
model=self.model,
|
|
|
226 |
return "❌ Model not loaded. Please load a model first."
|
227 |
|
228 |
try:
|
|
|
229 |
max_input_length = 1000
|
230 |
if len(prompt) > max_input_length:
|
231 |
prompt = prompt[:max_input_length] + "..."
|
|
|
355 |
self.model_manager = HFSpaceModelManager(model_choice)
|
356 |
self.current_model = model_choice
|
357 |
|
|
|
358 |
def progress_callback(value, desc):
|
359 |
progress(value, desc=desc)
|
360 |
|
361 |
result = self.model_manager.load_model(progress_callback)
|
|
|
|
|
362 |
self.evaluation_results = []
|
363 |
|
364 |
return result
|
|
|
375 |
|
376 |
try:
|
377 |
progress(0.2, desc="Creating GAIA prompt...")
|
|
|
|
|
378 |
prompt = self.prompt_manager.create_gaia_prompt(question_text)
|
379 |
|
380 |
progress(0.4, desc="Generating response...")
|
|
|
|
|
381 |
raw_response = self.model_manager.generate_response(prompt)
|
382 |
|
383 |
progress(0.8, desc="Extracting final answer...")
|
|
|
|
|
384 |
final_answer, reasoning = self.prompt_manager.extract_final_answer(raw_response)
|
385 |
|
386 |
processing_time = time.time() - start_time
|
|
|
387 |
progress(1.0, desc="Complete!")
|
388 |
|
389 |
return final_answer, raw_response, reasoning, processing_time
|
|
|
409 |
desc=f"Processing question {i + 1}/{total_questions}: {question.task_id}")
|
410 |
|
411 |
start_time = time.time()
|
|
|
|
|
412 |
prompt = self.prompt_manager.create_gaia_prompt(question.question)
|
413 |
raw_response = self.model_manager.generate_response(prompt)
|
|
|
|
|
414 |
final_answer, reasoning = self.prompt_manager.extract_final_answer(raw_response)
|
|
|
415 |
processing_time = time.time() - start_time
|
416 |
|
|
|
417 |
response = GAIAResponse(
|
418 |
task_id=question.task_id,
|
419 |
model_answer=raw_response,
|
|
|
437 |
results.append(error_response)
|
438 |
self.evaluation_results.append(error_response)
|
439 |
|
|
|
440 |
summary = self._generate_summary(results)
|
|
|
|
|
441 |
detailed_results = self._generate_detailed_results(results, questions)
|
|
|
|
|
442 |
jsonl_content = self._generate_jsonl(results)
|
443 |
|
444 |
return summary, detailed_results, jsonl_content
|
|
|
513 |
# GLOBAL AGENT INSTANCE
|
514 |
# ================================
|
515 |
|
|
|
516 |
gaia_agent = GAIASpaceAgent()
|
517 |
|
518 |
# ================================
|
|
|
544 |
|
545 |
progress(0.1, desc="Loading dataset...")
|
546 |
|
|
|
547 |
if dataset_choice == "Sample Questions":
|
548 |
questions = GAIADatasetManager.get_sample_questions()
|
549 |
status_msg = f"✅ Loaded {len(questions)} sample questions"
|
550 |
else:
|
551 |
questions, status_msg = GAIADatasetManager.load_gaia_dataset("test", max_questions)
|
552 |
|
|
|
553 |
if max_questions and len(questions) > max_questions:
|
554 |
questions = questions[:max_questions]
|
555 |
|
556 |
progress(0.2, desc=f"{status_msg}. Starting evaluation...")
|
557 |
|
|
|
558 |
summary, detailed, jsonl = gaia_agent.batch_evaluate(questions, progress)
|
559 |
|
560 |
return summary, detailed, jsonl
|
|
|
581 |
|
582 |
with gr.Blocks(
|
583 |
title="GAIA Benchmark AI Agent",
|
584 |
+
theme=gr.themes.Soft()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
585 |
) as app:
|
586 |
|
|
|
587 |
gr.HTML("""
|
588 |
+
<div style="text-align: center; font-size: 2.5em; font-weight: bold; margin-bottom: 20px;">
|
589 |
🧠 GAIA Benchmark AI Agent
|
590 |
</div>
|
591 |
<p style="text-align: center; font-size: 1.2em; color: #666;">
|
|
|
595 |
|
596 |
with gr.Tabs():
|
597 |
|
|
|
598 |
# TAB 1: MODEL SETUP
|
|
|
599 |
with gr.Tab("🔧 Model Setup"):
|
600 |
gr.Markdown("## Choose and Load Your Model")
|
601 |
|
|
|
604 |
model_dropdown = gr.Dropdown(
|
605 |
choices=list(HFSpaceModelManager.SPACE_MODELS.keys()),
|
606 |
value="Fast & Light",
|
607 |
+
label="Select Model"
|
|
|
608 |
)
|
609 |
|
610 |
model_info = gr.Markdown(
|
|
|
616 |
|
617 |
with gr.Column(scale=1):
|
618 |
gpu_info = gr.Markdown(f"""
|
619 |
+
### System Info
|
620 |
**CUDA Available**: {torch.cuda.is_available()}
|
621 |
{f"**GPU**: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "**Device**: CPU"}
|
622 |
""")
|
|
|
627 |
interactive=False
|
628 |
)
|
629 |
|
|
|
630 |
model_dropdown.change(
|
631 |
fn=get_model_info,
|
632 |
inputs=[model_dropdown],
|
633 |
outputs=[model_info]
|
634 |
)
|
635 |
|
|
|
636 |
load_btn.click(
|
637 |
fn=load_model_interface,
|
638 |
inputs=[model_dropdown],
|
639 |
outputs=[model_status]
|
640 |
)
|
641 |
|
|
|
642 |
# TAB 2: SINGLE QUESTION
|
|
|
643 |
with gr.Tab("❓ Single Question"):
|
644 |
gr.Markdown("## Test Individual Questions")
|
645 |
|
|
|
653 |
|
654 |
process_btn = gr.Button("🤔 Process Question", variant="primary")
|
655 |
|
656 |
+
gr.Markdown("### Example Questions:")
|
|
|
657 |
example_questions = [
|
658 |
"What is the capital of France?",
|
659 |
"Calculate 144 divided by 12",
|
|
|
661 |
"Convert 100 degrees Celsius to Fahrenheit"
|
662 |
]
|
663 |
|
664 |
+
for example in example_questions:
|
665 |
+
gr.Button(f"📝 {example}", size="sm").click(
|
|
|
|
|
|
|
666 |
lambda x=example: x,
|
667 |
outputs=[question_input]
|
668 |
)
|
|
|
692 |
interactive=False
|
693 |
)
|
694 |
|
|
|
695 |
process_btn.click(
|
696 |
fn=single_question_interface,
|
697 |
inputs=[question_input],
|
698 |
outputs=[final_answer_output, full_response, reasoning_trace, processing_time]
|
699 |
)
|
700 |
|
|
|
701 |
# TAB 3: BATCH EVALUATION
|
|
|
702 |
with gr.Tab("📊 Batch Evaluation"):
|
703 |
gr.Markdown("## Evaluate Multiple Questions")
|
704 |
|
|
|
706 |
dataset_choice = gr.Radio(
|
707 |
choices=["Sample Questions", "GAIA Test Set"],
|
708 |
value="Sample Questions",
|
709 |
+
label="Dataset Choice"
|
|
|
710 |
)
|
711 |
|
712 |
max_questions = gr.Slider(
|
|
|
714 |
maximum=50,
|
715 |
value=5,
|
716 |
step=1,
|
717 |
+
label="Max Questions"
|
|
|
718 |
)
|
719 |
|
720 |
evaluate_btn = gr.Button("🚀 Start Batch Evaluation", variant="primary", size="lg")
|
|
|
737 |
value="Run an evaluation to see detailed results"
|
738 |
)
|
739 |
|
|
|
740 |
def batch_eval_with_download(*args):
|
741 |
summary, detailed, jsonl_content = batch_evaluate_interface(*args)
|
742 |
|
|
|
743 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
744 |
filename = f"gaia_results_{timestamp}.jsonl"
|
745 |
|
|
|
757 |
outputs=[download_output]
|
758 |
)
|
759 |
|
760 |
+
# TAB 4: FULL BENCHMARK
|
|
|
|
|
761 |
with gr.Tab("🏆 Full Benchmark"):
|
762 |
gr.Markdown("## Official GAIA Leaderboard Benchmark")
|
763 |
|
764 |
with gr.Row():
|
765 |
with gr.Column():
|
|
|
|
|
|
|
|
|
766 |
test_preview_btn = gr.Button("🔍 Preview Test Questions", variant="secondary")
|
767 |
test_preview_output = gr.Markdown(
|
768 |
value="Click above to preview official test questions"
|
769 |
)
|
770 |
|
|
|
771 |
dataset_structure_btn = gr.Button("📁 Preview Dataset Structure", variant="secondary")
|
772 |
dataset_structure_output = gr.Markdown(
|
773 |
value="Click above to see actual GAIA dataset structure"
|
774 |
)
|
775 |
+
|
776 |
+
with gr.Column():
|
777 |
+
question_count = gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
778 |
minimum=10,
|
779 |
+
maximum=300,
|
780 |
+
value=20,
|
781 |
step=10,
|
782 |
+
label="Number of Questions"
|
783 |
)
|
784 |
|
785 |
selection_strategy = gr.Dropdown(
|
|
|
787 |
value="balanced",
|
788 |
label="Selection Strategy"
|
789 |
)
|
790 |
+
|
791 |
+
benchmark_btn = gr.Button("🎯 Run Benchmark", variant="primary", size="lg")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
792 |
|
|
|
793 |
benchmark_status = gr.Textbox(
|
794 |
label="📊 Benchmark Status",
|
795 |
value="Ready to run benchmark",
|
|
|
804 |
)
|
805 |
|
806 |
with gr.Column():
|
|
|
807 |
submission_file = gr.File(
|
808 |
label="💾 Download Submission File (JSONL)",
|
809 |
visible=False
|
|
|
813 |
label="📋 Download Metadata File",
|
814 |
visible=False
|
815 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
816 |
|
817 |
+
# Event handlers
|
|
|
|
|
|
|
|
|
818 |
test_preview_btn.click(
|
819 |
fn=lambda: load_test_questions_interface(max_questions=10, selection_type="balanced"),
|
820 |
outputs=[test_preview_output]
|
|
|
825 |
outputs=[dataset_structure_output]
|
826 |
)
|
827 |
|
828 |
+
def run_benchmark_wrapper(count, strategy, progress=gr.Progress()):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
829 |
return run_custom_benchmark_interface(count, strategy, progress)
|
830 |
|
|
|
831 |
def show_download_files(status, report, sub_file, meta_file):
|
832 |
return (
|
833 |
status,
|
834 |
report,
|
835 |
sub_file,
|
836 |
meta_file,
|
837 |
+
gr.update(visible=True),
|
838 |
+
gr.update(visible=True)
|
839 |
)
|
840 |
|
841 |
+
benchmark_btn.click(
|
842 |
+
fn=run_benchmark_wrapper,
|
843 |
+
inputs=[question_count, selection_strategy],
|
844 |
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file]
|
845 |
).then(
|
846 |
fn=show_download_files,
|
847 |
inputs=[benchmark_status, benchmark_report, submission_file, metadata_file],
|
848 |
outputs=[benchmark_status, benchmark_report, submission_file, metadata_file, submission_file, metadata_file]
|
849 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
850 |
|
851 |
return app
|
852 |
|
853 |
+
# ================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|