Final_Assignment_agentcourse

Runtime error

App Files Files Community

Final_Assignment_agentcourse / questions /filedownloader.py

CasperCvO

feat: initialize project with question fetching functionality and dependencies

6f31603 2 months ago

raw

history blame contribute delete

4.07 kB

	import os
	import json
	import requests

	# Constants
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	QUESTIONS_FILE = "questions.json"
	DOWNLOAD_DIR = "files" # Directory to save downloaded files (relative to script's CWD, expected to be 'questions' dir)

	def download_required_files():
	"""
	Reads questions.json, identifies tasks requiring additional files,
	downloads them using their task_id, and saves them to the current directory.
	"""
	if not os.path.exists(QUESTIONS_FILE):
	print(f"Error: {QUESTIONS_FILE} not found. Please run the question fetcher script first.")
	return

	try:
	with open(QUESTIONS_FILE, 'r', encoding='utf-8') as f:
	questions_data = json.load(f)
	except json.JSONDecodeError as e:
	print(f"Error decoding {QUESTIONS_FILE}: {e}")
	return
	except IOError as e:
	print(f"Error reading {QUESTIONS_FILE}: {e}")
	return

	if not isinstance(questions_data, list):
	print(f"Error: Expected a list of questions in {QUESTIONS_FILE}, but got {type(questions_data)}.")
	return

	print(f"Found {len(questions_data)} questions in {QUESTIONS_FILE}.")
	download_count = 0

	# Ensure the download directory exists
	if not os.path.exists(DOWNLOAD_DIR):
	os.makedirs(DOWNLOAD_DIR)
	print(f"Created directory: {DOWNLOAD_DIR}")

	for item in questions_data:
	task_id = item.get("task_id")
	file_name = item.get("file_name")

	if task_id and file_name: # Check if both task_id and file_name are present and non-empty
	file_url = f"{DEFAULT_API_URL}/files/{task_id}"
	local_file_path = os.path.join(DOWNLOAD_DIR, file_name)

	print(f"\nProcessing task_id: {task_id}, file_name: {file_name}")
	print(f"Attempting to download from: {file_url}")

	try:
	# The curl example includes 'accept: application/json'.
	# For file downloads, this might be unusual, but we'll follow the example.
	# stream=True is good for downloading files, especially large ones.
	response = requests.get(file_url, headers={"accept": "application/json"}, stream=True, timeout=30)
	response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)

	with open(local_file_path, 'wb') as f_out:
	for chunk in response.iter_content(chunk_size=8192):
	f_out.write(chunk)

	print(f"Successfully downloaded and saved to: {local_file_path}")
	download_count += 1

	except requests.exceptions.HTTPError as e:
	print(f"HTTP error downloading {file_name} for task {task_id}: {e}")
	if e.response is not None:
	print(f"Response status: {e.response.status_code}")
	print(f"Response text (first 500 chars): {e.response.text[:500]}")
	except requests.exceptions.RequestException as e:
	print(f"Error downloading {file_name} for task {task_id}: {e}")
	except IOError as e:
	print(f"Error saving {file_name} to {local_file_path}: {e}")
	except Exception as e:
	print(f"An unexpected error occurred while processing {task_id}: {e}")
	elif file_name: # task_id is missing but file_name is present
	print(f"\nSkipping item with file_name '{file_name}' because task_id is missing: {item}")

	if download_count > 0:
	print(f"\nFinished downloading. Total files downloaded: {download_count}")
	else:
	print("\nNo files were identified for download or all downloads failed.")

	if __name__ == "__main__":
	print("--- Starting File Downloader ---")
	# Ensure the questions directory exists (it should if questionfetch.py ran)
	# os.makedirs(QUESTIONS_DIR, exist_ok=True) # Removed as files are saved in current directory
	download_required_files()
	print("--- File Downloader Finished ---")