import os import json import requests # Constants DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" QUESTIONS_FILE = "questions.json" DOWNLOAD_DIR = "files" # Directory to save downloaded files (relative to script's CWD, expected to be 'questions' dir) def download_required_files(): """ Reads questions.json, identifies tasks requiring additional files, downloads them using their task_id, and saves them to the current directory. """ if not os.path.exists(QUESTIONS_FILE): print(f"Error: {QUESTIONS_FILE} not found. Please run the question fetcher script first.") return try: with open(QUESTIONS_FILE, 'r', encoding='utf-8') as f: questions_data = json.load(f) except json.JSONDecodeError as e: print(f"Error decoding {QUESTIONS_FILE}: {e}") return except IOError as e: print(f"Error reading {QUESTIONS_FILE}: {e}") return if not isinstance(questions_data, list): print(f"Error: Expected a list of questions in {QUESTIONS_FILE}, but got {type(questions_data)}.") return print(f"Found {len(questions_data)} questions in {QUESTIONS_FILE}.") download_count = 0 # Ensure the download directory exists if not os.path.exists(DOWNLOAD_DIR): os.makedirs(DOWNLOAD_DIR) print(f"Created directory: {DOWNLOAD_DIR}") for item in questions_data: task_id = item.get("task_id") file_name = item.get("file_name") if task_id and file_name: # Check if both task_id and file_name are present and non-empty file_url = f"{DEFAULT_API_URL}/files/{task_id}" local_file_path = os.path.join(DOWNLOAD_DIR, file_name) print(f"\nProcessing task_id: {task_id}, file_name: {file_name}") print(f"Attempting to download from: {file_url}") try: # The curl example includes 'accept: application/json'. # For file downloads, this might be unusual, but we'll follow the example. # stream=True is good for downloading files, especially large ones. response = requests.get(file_url, headers={"accept": "application/json"}, stream=True, timeout=30) response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx) with open(local_file_path, 'wb') as f_out: for chunk in response.iter_content(chunk_size=8192): f_out.write(chunk) print(f"Successfully downloaded and saved to: {local_file_path}") download_count += 1 except requests.exceptions.HTTPError as e: print(f"HTTP error downloading {file_name} for task {task_id}: {e}") if e.response is not None: print(f"Response status: {e.response.status_code}") print(f"Response text (first 500 chars): {e.response.text[:500]}") except requests.exceptions.RequestException as e: print(f"Error downloading {file_name} for task {task_id}: {e}") except IOError as e: print(f"Error saving {file_name} to {local_file_path}: {e}") except Exception as e: print(f"An unexpected error occurred while processing {task_id}: {e}") elif file_name: # task_id is missing but file_name is present print(f"\nSkipping item with file_name '{file_name}' because task_id is missing: {item}") if download_count > 0: print(f"\nFinished downloading. Total files downloaded: {download_count}") else: print("\nNo files were identified for download or all downloads failed.") if __name__ == "__main__": print("--- Starting File Downloader ---") # Ensure the questions directory exists (it should if questionfetch.py ran) # os.makedirs(QUESTIONS_DIR, exist_ok=True) # Removed as files are saved in current directory download_required_files() print("--- File Downloader Finished ---")