sheikh-3b / app.py
likhonsheikh's picture
Create app.py
bc152f4 verified
# app.py
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# --- 1. Model and Tokenizer Configuration ---
# We are using the specific model you mentioned earlier.
# The Space will download this from the Hugging Face Hub automatically.
model_name = "likhonsheikh/sheikh-coder-v1-3b"
print("Starting script...")
# --- 2. Load the Model ---
# We'll wrap this in a try-except block to provide clear error messages if something goes wrong on the Space.
try:
# Use torch_dtype="auto" to let transformers choose the best precision (like bfloat16 on new GPUs)
# This can significantly speed up inference and reduce memory usage.
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Move model to GPU if available on the Space's hardware
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model_loaded = True
print(f"Model '{model_name}' loaded successfully on device: {device}")
except Exception as e:
model_loaded = False
error_message = str(e)
print(f"FATAL: Failed to load model. Error: {error_message}")
# --- 3. Define the Prediction Function ---
def generate_code(prompt):
"""
This function takes a text prompt and returns the model's completion.
"""
if not model_loaded:
# If the model failed to load, show an error in the UI.
raise gr.Error(f"Model failed to load: {error_message}")
try:
# Tokenize the input prompt and move it to the same device as the model.
inputs = tokenizer(prompt, return_tensors="pt").to(device)
# Generate the output from the model
outputs = model.generate(
**inputs,
max_new_tokens=256, # Limit the number of new tokens to generate
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id # Set pad token to avoid warnings
)
# Decode the generated tokens into a string
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated_text
except Exception as e:
print(f"Error during generation: {str(e)}")
raise gr.Error(f"An error occurred during code generation: {str(e)}")
# --- 4. Create the Gradio Interface ---
demo = gr.Interface(
fn=generate_code,
inputs=gr.Textbox(
lines=5,
label="Enter your code snippet or question:",
placeholder="def fibonacci(n):"
),
outputs=gr.Textbox(label="AI Sheikh's Response:", lines=10),
title="AI Sheikh Coder (3B Model)",
description="A Gradio app for the sheikh-coder-v1-3b model. Provide a starting piece of code or a question, and the AI will complete it. Model loading can take a minute on boot.",
examples=[
["def factorial(n):"],
["import pandas as pd\n# create a dataframe with 3 columns: 'name', 'age', 'city'"],
["# A python function to check if a number is prime"]
]
)
# --- 5. Launch the App (for Hugging Face Spaces) ---
# The demo.launch() command is all that's needed to start the web server.
if __name__ == "__main__":
demo.launch()