Spaces:
Running
Running
from urllib.parse import urlparse | |
import time | |
from test import get_cookies | |
def infer_base_domain(original_source_url): | |
""" | |
Infers a correct base domain for YouTube/Google cookies from a | |
potentially problematic original_source_url. | |
Prioritizes common YouTube/Google domains. | |
""" | |
if "youtube.com" in original_source_url: | |
return "youtube.com" | |
elif "google.com" in original_source_url: | |
return "google.com" | |
elif "youtube.com" in original_source_url: # Catches other YouTube domains if any | |
return "youtube.com" | |
elif "googleusercontent.com" in original_source_url: # Generic googleusercontent fallback | |
return "googleusercontent.com" | |
# Fallback if no specific match, but this might still give bad results | |
# if the original_source_url itself is completely unrecognizable. | |
parsed_url = urlparse(original_source_url) | |
if parsed_url.hostname: | |
return parsed_url.hostname | |
return "unknown.com" # Should ideally not be hit for YouTube/Google cookies | |
def convert_raw_cookie_string_to_netscape_fixed_domain(raw_cookie_str, original_source_url): | |
""" | |
Converts a raw cookie string to Netscape format, inferring a correct | |
base domain for YouTube/Google cookies. | |
""" | |
base_domain = infer_base_domain(original_source_url) | |
lines = [ | |
"# Netscape HTTP Cookie File", | |
"# http://curl.haxx.se/rfc/cookie_spec.html", | |
"# This file was generated by script - Domain inferred for YouTube/Google compatibility." | |
] | |
for raw_cookie in raw_cookie_str.split(';'): | |
cookie = raw_cookie.strip().split('=', 1) | |
if len(cookie) != 2: | |
continue | |
name, value = cookie[0], cookie[1] | |
# Determine domain field. Use a leading dot for general cookies. | |
# This handles the . prefix for subdomains correctly. | |
domain_field = f".{base_domain}" | |
# Heuristics for HttpOnly based on common cookie prefixes | |
# Note: True HttpOnly status isn't available from raw string, this is a guess. | |
is_httponly = name.startswith("__Secure-") or name.startswith("__Host-") or name.startswith( | |
"SID") or name.startswith("SSID") or name.startswith("LSID") | |
if is_httponly: | |
# Netscape format for HttpOnly is #HttpOnly_.domain | |
# Ensure we don't duplicate the leading dot if it's already there from base_domain | |
if domain_field.startswith("."): | |
domain_field = f"#HttpOnly_{domain_field}" | |
else: | |
domain_field = f"#HttpOnly_.{domain_field}" | |
# The 'flag' column (Include Subdomains) is generally TRUE for Netscape format. | |
include_subdomains = "TRUE" | |
# The 'path' column is usually '/' for general site cookies. | |
path = "/" | |
# The 'secure' column. Guess based on __Secure- prefix, but better if known from source. | |
secure = "TRUE" if name.startswith("__Secure-") else "FALSE" | |
# Expiration: A fixed, far-future date is often sufficient for yt-dlp. | |
expires = str(int(time.time()) + 180 * 24 * 3600) # 180 days from now | |
# Construct the line | |
line = f"{domain_field}\t{include_subdomains}\t{path}\t{secure}\t{expires}\t{name}\t{value}" | |
lines.append(line) | |
return lines | |
# --- Main Execution Block (adapt to your API structure) --- | |
# Assuming 'cookie_entry' is received at your API endpoint | |
# For testing purposes, using your provided cookie_entry_data: | |
# from test import get_cookies # Uncomment if get_cookies is defined elsewhere | |
cookie_entry_data = get_cookies() | |
raw_str = cookie_entry_data['youtube']['raw_cookies'] | |
source = cookie_entry_data['youtube']['source_url'] | |
# Convert to Netscape format | |
output_lines = convert_raw_cookie_string_to_netscape_fixed_domain(raw_str, source) | |
# Save to file (this would happen on your API server) | |
with open("cookies.txt", "w", encoding="utf-8") as f: | |
f.write("\n".join(output_lines)) | |
print("✅ Saved as cookies.txt in Netscape format with inferred domains.") |