# app.py import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # --- 1. Model and Tokenizer Configuration --- # We are using the specific model you mentioned earlier. # The Space will download this from the Hugging Face Hub automatically. model_name = "likhonsheikh/sheikh-coder-v1-3b" print("Starting script...") # --- 2. Load the Model --- # We'll wrap this in a try-except block to provide clear error messages if something goes wrong on the Space. try: # Use torch_dtype="auto" to let transformers choose the best precision (like bfloat16 on new GPUs) # This can significantly speed up inference and reduce memory usage. model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype="auto" ) tokenizer = AutoTokenizer.from_pretrained(model_name) # Move model to GPU if available on the Space's hardware device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) model_loaded = True print(f"Model '{model_name}' loaded successfully on device: {device}") except Exception as e: model_loaded = False error_message = str(e) print(f"FATAL: Failed to load model. Error: {error_message}") # --- 3. Define the Prediction Function --- def generate_code(prompt): """ This function takes a text prompt and returns the model's completion. """ if not model_loaded: # If the model failed to load, show an error in the UI. raise gr.Error(f"Model failed to load: {error_message}") try: # Tokenize the input prompt and move it to the same device as the model. inputs = tokenizer(prompt, return_tensors="pt").to(device) # Generate the output from the model outputs = model.generate( **inputs, max_new_tokens=256, # Limit the number of new tokens to generate num_return_sequences=1, pad_token_id=tokenizer.eos_token_id # Set pad token to avoid warnings ) # Decode the generated tokens into a string generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text except Exception as e: print(f"Error during generation: {str(e)}") raise gr.Error(f"An error occurred during code generation: {str(e)}") # --- 4. Create the Gradio Interface --- demo = gr.Interface( fn=generate_code, inputs=gr.Textbox( lines=5, label="Enter your code snippet or question:", placeholder="def fibonacci(n):" ), outputs=gr.Textbox(label="AI Sheikh's Response:", lines=10), title="AI Sheikh Coder (3B Model)", description="A Gradio app for the sheikh-coder-v1-3b model. Provide a starting piece of code or a question, and the AI will complete it. Model loading can take a minute on boot.", examples=[ ["def factorial(n):"], ["import pandas as pd\n# create a dataframe with 3 columns: 'name', 'age', 'city'"], ["# A python function to check if a number is prime"] ] ) # --- 5. Launch the App (for Hugging Face Spaces) --- # The demo.launch() command is all that's needed to start the web server. if __name__ == "__main__": demo.launch()