Spaces:
Runtime error
Runtime error
import os | |
import json | |
import requests | |
# Constants | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
QUESTIONS_FILE = "questions.json" | |
DOWNLOAD_DIR = "files" # Directory to save downloaded files (relative to script's CWD, expected to be 'questions' dir) | |
def download_required_files(): | |
""" | |
Reads questions.json, identifies tasks requiring additional files, | |
downloads them using their task_id, and saves them to the current directory. | |
""" | |
if not os.path.exists(QUESTIONS_FILE): | |
print(f"Error: {QUESTIONS_FILE} not found. Please run the question fetcher script first.") | |
return | |
try: | |
with open(QUESTIONS_FILE, 'r', encoding='utf-8') as f: | |
questions_data = json.load(f) | |
except json.JSONDecodeError as e: | |
print(f"Error decoding {QUESTIONS_FILE}: {e}") | |
return | |
except IOError as e: | |
print(f"Error reading {QUESTIONS_FILE}: {e}") | |
return | |
if not isinstance(questions_data, list): | |
print(f"Error: Expected a list of questions in {QUESTIONS_FILE}, but got {type(questions_data)}.") | |
return | |
print(f"Found {len(questions_data)} questions in {QUESTIONS_FILE}.") | |
download_count = 0 | |
# Ensure the download directory exists | |
if not os.path.exists(DOWNLOAD_DIR): | |
os.makedirs(DOWNLOAD_DIR) | |
print(f"Created directory: {DOWNLOAD_DIR}") | |
for item in questions_data: | |
task_id = item.get("task_id") | |
file_name = item.get("file_name") | |
if task_id and file_name: # Check if both task_id and file_name are present and non-empty | |
file_url = f"{DEFAULT_API_URL}/files/{task_id}" | |
local_file_path = os.path.join(DOWNLOAD_DIR, file_name) | |
print(f"\nProcessing task_id: {task_id}, file_name: {file_name}") | |
print(f"Attempting to download from: {file_url}") | |
try: | |
# The curl example includes 'accept: application/json'. | |
# For file downloads, this might be unusual, but we'll follow the example. | |
# stream=True is good for downloading files, especially large ones. | |
response = requests.get(file_url, headers={"accept": "application/json"}, stream=True, timeout=30) | |
response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx) | |
with open(local_file_path, 'wb') as f_out: | |
for chunk in response.iter_content(chunk_size=8192): | |
f_out.write(chunk) | |
print(f"Successfully downloaded and saved to: {local_file_path}") | |
download_count += 1 | |
except requests.exceptions.HTTPError as e: | |
print(f"HTTP error downloading {file_name} for task {task_id}: {e}") | |
if e.response is not None: | |
print(f"Response status: {e.response.status_code}") | |
print(f"Response text (first 500 chars): {e.response.text[:500]}") | |
except requests.exceptions.RequestException as e: | |
print(f"Error downloading {file_name} for task {task_id}: {e}") | |
except IOError as e: | |
print(f"Error saving {file_name} to {local_file_path}: {e}") | |
except Exception as e: | |
print(f"An unexpected error occurred while processing {task_id}: {e}") | |
elif file_name: # task_id is missing but file_name is present | |
print(f"\nSkipping item with file_name '{file_name}' because task_id is missing: {item}") | |
if download_count > 0: | |
print(f"\nFinished downloading. Total files downloaded: {download_count}") | |
else: | |
print("\nNo files were identified for download or all downloads failed.") | |
if __name__ == "__main__": | |
print("--- Starting File Downloader ---") | |
# Ensure the questions directory exists (it should if questionfetch.py ran) | |
# os.makedirs(QUESTIONS_DIR, exist_ok=True) # Removed as files are saved in current directory | |
download_required_files() | |
print("--- File Downloader Finished ---") | |