File size: 4,068 Bytes
6f31603
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import json
import requests

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_FILE = "questions.json"
DOWNLOAD_DIR = "files"  # Directory to save downloaded files (relative to script's CWD, expected to be 'questions' dir)

def download_required_files():
    """
    Reads questions.json, identifies tasks requiring additional files,
    downloads them using their task_id, and saves them to the current directory.
    """
    if not os.path.exists(QUESTIONS_FILE):
        print(f"Error: {QUESTIONS_FILE} not found. Please run the question fetcher script first.")
        return

    try:
        with open(QUESTIONS_FILE, 'r', encoding='utf-8') as f:
            questions_data = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error decoding {QUESTIONS_FILE}: {e}")
        return
    except IOError as e:
        print(f"Error reading {QUESTIONS_FILE}: {e}")
        return

    if not isinstance(questions_data, list):
        print(f"Error: Expected a list of questions in {QUESTIONS_FILE}, but got {type(questions_data)}.")
        return

    print(f"Found {len(questions_data)} questions in {QUESTIONS_FILE}.")
    download_count = 0

    # Ensure the download directory exists
    if not os.path.exists(DOWNLOAD_DIR):
        os.makedirs(DOWNLOAD_DIR)
        print(f"Created directory: {DOWNLOAD_DIR}")

    for item in questions_data:
        task_id = item.get("task_id")
        file_name = item.get("file_name")

        if task_id and file_name:  # Check if both task_id and file_name are present and non-empty
            file_url = f"{DEFAULT_API_URL}/files/{task_id}"
            local_file_path = os.path.join(DOWNLOAD_DIR, file_name)

            print(f"\nProcessing task_id: {task_id}, file_name: {file_name}")
            print(f"Attempting to download from: {file_url}")

            try:
                # The curl example includes 'accept: application/json'.
                # For file downloads, this might be unusual, but we'll follow the example.
                # stream=True is good for downloading files, especially large ones.
                response = requests.get(file_url, headers={"accept": "application/json"}, stream=True, timeout=30)
                response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)

                with open(local_file_path, 'wb') as f_out:
                    for chunk in response.iter_content(chunk_size=8192):
                        f_out.write(chunk)
                
                print(f"Successfully downloaded and saved to: {local_file_path}")
                download_count += 1

            except requests.exceptions.HTTPError as e:
                print(f"HTTP error downloading {file_name} for task {task_id}: {e}")
                if e.response is not None:
                    print(f"Response status: {e.response.status_code}")
                    print(f"Response text (first 500 chars): {e.response.text[:500]}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading {file_name} for task {task_id}: {e}")
            except IOError as e:
                print(f"Error saving {file_name} to {local_file_path}: {e}")
            except Exception as e:
                print(f"An unexpected error occurred while processing {task_id}: {e}")
        elif file_name: # task_id is missing but file_name is present
            print(f"\nSkipping item with file_name '{file_name}' because task_id is missing: {item}")

    if download_count > 0:
        print(f"\nFinished downloading. Total files downloaded: {download_count}")
    else:
        print("\nNo files were identified for download or all downloads failed.")

if __name__ == "__main__":
    print("--- Starting File Downloader ---")
    # Ensure the questions directory exists (it should if questionfetch.py ran)
    # os.makedirs(QUESTIONS_DIR, exist_ok=True) # Removed as files are saved in current directory
    download_required_files()
    print("--- File Downloader Finished ---")