|
from smolagents import FinalAnswerTool |
|
from datasets import load_dataset, Dataset |
|
import json |
|
import os |
|
|
|
HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN") |
|
|
|
gaia_dataset = load_dataset("gaia-benchmark/GAIA", "2023_level1", trust_remote_code=True, split="validation", token=HF_ACCESS_TOKEN) |
|
|
|
def get_example_by_feature_value(dataset: Dataset, feature_name: str, feature_value: str): |
|
|
|
for example in dataset: |
|
if example[feature_name] == feature_value: |
|
return example |
|
|
|
return None |
|
|
|
|
|
def get_question(task_id: str) -> str: |
|
|
|
question_data = get_example_by_feature_value(gaia_dataset, "task_id", task_id) |
|
|
|
question_text = "Question: " + question_data["Question"] + "\n\n" |
|
|
|
if question_data["file_name"]: |
|
question_text = question_text + "File path: " + question_data["file_path"] + "\n\n" |
|
|
|
question_text = question_text + "Tools required:\n" + question_data["Annotator Metadata"]['Tools'] + "\n\n" |
|
question_text = question_text + "Approximately, the problem can be solved as follows::\n" + question_data["Annotator Metadata"]["Steps"] + "\n\n" |
|
|
|
return question_text |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
id = "a1e91b78-d3d8-4675-bb8d-62741b4b68a6" |
|
|
|
|
|
print(get_question(id)) |