Spaces:
Configuration error
Configuration error
oremaz
commited on
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -533,38 +533,9 @@ class EnhancedGAIAAgent:
|
|
| 533 |
1. THINK: Analyze the GAIA question thoroughly
|
| 534 |
2. ACT: Use your specialist tools IF RELEVANT
|
| 535 |
3. OBSERVE: Review results from specialist tools
|
| 536 |
-
4. REPEAT: Continue until you have the final answer.
|
| 537 |
|
| 538 |
-
CRITICAL
|
| 539 |
-
**Numbers (no commas, no units unless specified):**
|
| 540 |
-
Question: "How many research papers were published by the university between 2010 and 2020?"
|
| 541 |
-
CORRECT: 156
|
| 542 |
-
WRONG: "The answer is 156 papers" or "156 papers" or "one hundred fifty-six" or " 156 research papers were published by the university between 2010 and 2020"
|
| 543 |
-
|
| 544 |
-
**Strings (exact words, no articles, no explanations):**
|
| 545 |
-
Question: "What is the last name of the software engineer mentioned in the report?"
|
| 546 |
-
CORRECT: Martinez
|
| 547 |
-
WRONG: "The last name is Martinez" or "Dr. Martinez" or "martinez"
|
| 548 |
-
|
| 549 |
-
**Lists (comma-separated with spaces, alphabetized when requested):**
|
| 550 |
-
Question: "List the programming languages from this job description, alphabetized:"
|
| 551 |
-
CORRECT: C++, Java, JavaScript, Python, Ruby, TypeScript
|
| 552 |
-
WRONG: "C++,Java,JavaScript" or "1. C++ 2. Java" or "[C++, Java]"
|
| 553 |
-
|
| 554 |
-
**First/Last names only:**
|
| 555 |
-
Question: "Give only the first name of the developer who created the framework."
|
| 556 |
-
CORRECT: Sarah
|
| 557 |
-
WRONG: "Sarah Johnson" or "The first name is Sarah"
|
| 558 |
-
|
| 559 |
-
**Country codes:**
|
| 560 |
-
Question: "Give the ISO country code as your answer."
|
| 561 |
-
CORRECT: FRA
|
| 562 |
-
WRONG: "The ISO code is FRA" or "France (FRA)"
|
| 563 |
-
|
| 564 |
-
**Technical notation:**
|
| 565 |
-
Question: "Provide your response in standard notation."
|
| 566 |
-
CORRECT: 3.14e+8
|
| 567 |
-
WRONG: "The value is 3.14e+8" or "314 million"
|
| 568 |
|
| 569 |
ABSOLUTE RULES:
|
| 570 |
- NO explanations, NO additional text, NO units unless specifically requested
|
|
@@ -577,25 +548,81 @@ class EnhancedGAIAAgent:
|
|
| 577 |
max_steps = 10,
|
| 578 |
verbose = True
|
| 579 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
|
| 581 |
async def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
|
| 582 |
question = question_data.get("Question", "")
|
| 583 |
task_id = question_data.get("task_id", "")
|
|
|
|
| 584 |
context_prompt = f"""
|
| 585 |
GAIA Task ID: {task_id}
|
| 586 |
Question: {question}
|
| 587 |
{f"Associated files: {question_data.get('file_name', '')}" if 'file_name' in question_data else 'No files provided'}
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
2. Use specialist tools ONLY when their specific expertise is needed
|
| 591 |
-
3. Provide a precise, exact answer in GAIA format
|
| 592 |
-
Begin your reasoning process:
|
| 593 |
"""
|
|
|
|
| 594 |
try:
|
| 595 |
from llama_index.core.workflow import Context
|
| 596 |
ctx = Context(self.coordinator)
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
except Exception as e:
|
| 601 |
return f"Error processing question: {str(e)}"
|
|
|
|
| 533 |
1. THINK: Analyze the GAIA question thoroughly
|
| 534 |
2. ACT: Use your specialist tools IF RELEVANT
|
| 535 |
3. OBSERVE: Review results from specialist tools
|
| 536 |
+
4. REPEAT: Continue until you have the final answer.
|
| 537 |
|
| 538 |
+
CRITICAL: Your final answer must be EXACT and CONCISE as required by GAIA format:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
ABSOLUTE RULES:
|
| 541 |
- NO explanations, NO additional text, NO units unless specifically requested
|
|
|
|
| 548 |
max_steps = 10,
|
| 549 |
verbose = True
|
| 550 |
)
|
| 551 |
+
|
| 552 |
+
def format_gaia_answer(self, raw_response: str, original_question: str) -> str:
|
| 553 |
+
"""
|
| 554 |
+
Post-process the agent response to extract the exact GAIA format answer
|
| 555 |
+
"""
|
| 556 |
+
format_prompt = f"""Extract the exact answer from the response below. Follow GAIA formatting rules strictly.
|
| 557 |
+
|
| 558 |
+
Examples:
|
| 559 |
+
|
| 560 |
+
Question: "How many research papers were published by the university between 2010 and 2020?"
|
| 561 |
+
Response: "Based on my analysis of the data, I found that the university published 156 research papers between 2010 and 2020."
|
| 562 |
+
Answer: 156
|
| 563 |
+
|
| 564 |
+
Question: "What is the last name of the software engineer mentioned in the report?"
|
| 565 |
+
Response: "After reviewing the document, the software engineer mentioned is Dr. Martinez who developed the system."
|
| 566 |
+
Answer: Martinez
|
| 567 |
+
|
| 568 |
+
Question: "List the programming languages from this job description, alphabetized:"
|
| 569 |
+
Response: "The job description mentions several programming languages including Python, Java, C++, and JavaScript. When alphabetized, these are: C++, Java, JavaScript, Python"
|
| 570 |
+
Answer: C++, Java, JavaScript, Python
|
| 571 |
+
|
| 572 |
+
Question: "Give only the first name of the developer who created the framework."
|
| 573 |
+
Response: "The framework was created by Sarah Johnson, a senior developer at the company."
|
| 574 |
+
Answer: Sarah
|
| 575 |
+
|
| 576 |
+
Question: "Give the ISO country code as your answer."
|
| 577 |
+
Response: "The country in question is France, which has the ISO code FRA."
|
| 578 |
+
Answer: FRA
|
| 579 |
+
|
| 580 |
+
Question: "Provide your response in standard notation."
|
| 581 |
+
Response: "The calculated value is 314 million, which in standard notation is 3.14e+8"
|
| 582 |
+
Answer: 3.14e+8
|
| 583 |
+
|
| 584 |
+
Now extract the exact answer:
|
| 585 |
+
|
| 586 |
+
Question: {original_question}
|
| 587 |
+
Response: {raw_response}
|
| 588 |
+
Answer:"""
|
| 589 |
+
|
| 590 |
+
try:
|
| 591 |
+
# Use a simple, fast LLM for formatting
|
| 592 |
+
formatting_response = proj_llm.complete(format_prompt)
|
| 593 |
+
answer = str(formatting_response).strip()
|
| 594 |
+
|
| 595 |
+
return answer
|
| 596 |
+
|
| 597 |
+
except Exception as e:
|
| 598 |
+
print(f"Error in formatting: {e}")
|
| 599 |
+
return self._extract_fallback_answer(raw_response)
|
| 600 |
+
|
| 601 |
|
| 602 |
async def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
|
| 603 |
question = question_data.get("Question", "")
|
| 604 |
task_id = question_data.get("task_id", "")
|
| 605 |
+
|
| 606 |
context_prompt = f"""
|
| 607 |
GAIA Task ID: {task_id}
|
| 608 |
Question: {question}
|
| 609 |
{f"Associated files: {question_data.get('file_name', '')}" if 'file_name' in question_data else 'No files provided'}
|
| 610 |
+
|
| 611 |
+
Analyze this question and provide your reasoning and final answer.
|
|
|
|
|
|
|
|
|
|
| 612 |
"""
|
| 613 |
+
|
| 614 |
try:
|
| 615 |
from llama_index.core.workflow import Context
|
| 616 |
ctx = Context(self.coordinator)
|
| 617 |
+
raw_response = await self.coordinator.run(ctx=ctx, user_msg=context_prompt)
|
| 618 |
+
|
| 619 |
+
# Post-process to extract exact GAIA format
|
| 620 |
+
formatted_answer = self.format_gaia_answer(str(raw_response), question)
|
| 621 |
+
|
| 622 |
+
print(f"Raw response: {raw_response}")
|
| 623 |
+
print(f"Formatted answer: {formatted_answer}")
|
| 624 |
+
|
| 625 |
+
return formatted_answer
|
| 626 |
+
|
| 627 |
except Exception as e:
|
| 628 |
return f"Error processing question: {str(e)}"
|