CasperCvO's picture
feat: initialize project with question fetching functionality and dependencies
6f31603
import os
import json
import requests
# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_FILE = "questions.json"
DOWNLOAD_DIR = "files" # Directory to save downloaded files (relative to script's CWD, expected to be 'questions' dir)
def download_required_files():
"""
Reads questions.json, identifies tasks requiring additional files,
downloads them using their task_id, and saves them to the current directory.
"""
if not os.path.exists(QUESTIONS_FILE):
print(f"Error: {QUESTIONS_FILE} not found. Please run the question fetcher script first.")
return
try:
with open(QUESTIONS_FILE, 'r', encoding='utf-8') as f:
questions_data = json.load(f)
except json.JSONDecodeError as e:
print(f"Error decoding {QUESTIONS_FILE}: {e}")
return
except IOError as e:
print(f"Error reading {QUESTIONS_FILE}: {e}")
return
if not isinstance(questions_data, list):
print(f"Error: Expected a list of questions in {QUESTIONS_FILE}, but got {type(questions_data)}.")
return
print(f"Found {len(questions_data)} questions in {QUESTIONS_FILE}.")
download_count = 0
# Ensure the download directory exists
if not os.path.exists(DOWNLOAD_DIR):
os.makedirs(DOWNLOAD_DIR)
print(f"Created directory: {DOWNLOAD_DIR}")
for item in questions_data:
task_id = item.get("task_id")
file_name = item.get("file_name")
if task_id and file_name: # Check if both task_id and file_name are present and non-empty
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
local_file_path = os.path.join(DOWNLOAD_DIR, file_name)
print(f"\nProcessing task_id: {task_id}, file_name: {file_name}")
print(f"Attempting to download from: {file_url}")
try:
# The curl example includes 'accept: application/json'.
# For file downloads, this might be unusual, but we'll follow the example.
# stream=True is good for downloading files, especially large ones.
response = requests.get(file_url, headers={"accept": "application/json"}, stream=True, timeout=30)
response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
with open(local_file_path, 'wb') as f_out:
for chunk in response.iter_content(chunk_size=8192):
f_out.write(chunk)
print(f"Successfully downloaded and saved to: {local_file_path}")
download_count += 1
except requests.exceptions.HTTPError as e:
print(f"HTTP error downloading {file_name} for task {task_id}: {e}")
if e.response is not None:
print(f"Response status: {e.response.status_code}")
print(f"Response text (first 500 chars): {e.response.text[:500]}")
except requests.exceptions.RequestException as e:
print(f"Error downloading {file_name} for task {task_id}: {e}")
except IOError as e:
print(f"Error saving {file_name} to {local_file_path}: {e}")
except Exception as e:
print(f"An unexpected error occurred while processing {task_id}: {e}")
elif file_name: # task_id is missing but file_name is present
print(f"\nSkipping item with file_name '{file_name}' because task_id is missing: {item}")
if download_count > 0:
print(f"\nFinished downloading. Total files downloaded: {download_count}")
else:
print("\nNo files were identified for download or all downloads failed.")
if __name__ == "__main__":
print("--- Starting File Downloader ---")
# Ensure the questions directory exists (it should if questionfetch.py ran)
# os.makedirs(QUESTIONS_DIR, exist_ok=True) # Removed as files are saved in current directory
download_required_files()
print("--- File Downloader Finished ---")