import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr # 🚨 DEBUG: Print ALL environment variables again print("🔍 DEBUG: Listing all environment variables:") for key, value in os.environ.items(): if "HF" in key or "TOKEN" in key or "SECRET" in key: # Only show relevant secrets print(f"{key} = {value[:5]}...{value[-5:]} (Masked for security)") for key, value in os.environ.items(): print(f"{key} = {value}") # ✅ Get the HF_TOKEN HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise ValueError("❌ HF_TOKEN is STILL not set! Hugging Face Spaces is NOT detecting it.") else: print(f"✅ HF_TOKEN detected: {HF_TOKEN[:5]}...{HF_TOKEN[-5:]} (Masked for security)") # ✅ Get Hugging Face token from environment HF_TOKEN = os.getenv("HF_TOKEN") # ✅ Ensure token is properly set if not HF_TOKEN: raise ValueError("❌ HF_TOKEN is not set! Go to Hugging Face Spaces → Settings → Secrets and add your token.") # ✅ Define model model_name = "meta-llama/Llama-3.2-1B-Instruct" # ✅ Load tokenizer & model with authentication print(f"🔄 Loading model: {model_name} ...") tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN) # ❌ Remove `use_auth_token=True` model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16, token=HF_TOKEN ) # ❌ Remove `use_auth_token=True` print(f"✅ Model '{model_name}' loaded successfully!") # ✅ Define chatbot function def chatbot(prompt): inputs = tokenizer(prompt, return_tensors="pt").to("cuda") output = model.generate(**inputs, max_length=200) return tokenizer.decode(output[0], skip_special_tokens=True) # ✅ Launch Gradio print("🚀 Launching chatbot...") gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Llama Chatbot").launch()