Spaces:

likhonsheikh
/

sheikh-3b

Runtime error

App Files Files Community

sheikh-3b / app.py

likhonsheikh

Create app.py

bc152f4 verified 24 days ago

raw

history blame contribute delete

3.3 kB

	# app.py

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# --- 1. Model and Tokenizer Configuration ---
	# We are using the specific model you mentioned earlier.
	# The Space will download this from the Hugging Face Hub automatically.
	model_name = "likhonsheikh/sheikh-coder-v1-3b"
	print("Starting script...")

	# --- 2. Load the Model ---
	# We'll wrap this in a try-except block to provide clear error messages if something goes wrong on the Space.
	try:
	# Use torch_dtype="auto" to let transformers choose the best precision (like bfloat16 on new GPUs)
	# This can significantly speed up inference and reduce memory usage.
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	torch_dtype="auto"
	)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Move model to GPU if available on the Space's hardware
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	model_loaded = True
	print(f"Model '{model_name}' loaded successfully on device: {device}")

	except Exception as e:
	model_loaded = False
	error_message = str(e)
	print(f"FATAL: Failed to load model. Error: {error_message}")

	# --- 3. Define the Prediction Function ---
	def generate_code(prompt):
	"""
	This function takes a text prompt and returns the model's completion.
	"""
	if not model_loaded:
	# If the model failed to load, show an error in the UI.
	raise gr.Error(f"Model failed to load: {error_message}")

	try:
	# Tokenize the input prompt and move it to the same device as the model.
	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	# Generate the output from the model
	outputs = model.generate(
	**inputs,
	max_new_tokens=256, # Limit the number of new tokens to generate
	num_return_sequences=1,
	pad_token_id=tokenizer.eos_token_id # Set pad token to avoid warnings
	)

	# Decode the generated tokens into a string
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return generated_text

	except Exception as e:
	print(f"Error during generation: {str(e)}")
	raise gr.Error(f"An error occurred during code generation: {str(e)}")

	# --- 4. Create the Gradio Interface ---
	demo = gr.Interface(
	fn=generate_code,
	inputs=gr.Textbox(
	lines=5,
	label="Enter your code snippet or question:",
	placeholder="def fibonacci(n):"
	),
	outputs=gr.Textbox(label="AI Sheikh's Response:", lines=10),
	title="AI Sheikh Coder (3B Model)",
	description="A Gradio app for the sheikh-coder-v1-3b model. Provide a starting piece of code or a question, and the AI will complete it. Model loading can take a minute on boot.",
	examples=[
	["def factorial(n):"],
	["import pandas as pd\n# create a dataframe with 3 columns: 'name', 'age', 'city'"],
	["# A python function to check if a number is prime"]
	]
	)

	# --- 5. Launch the App (for Hugging Face Spaces) ---
	# The demo.launch() command is all that's needed to start the web server.
	if __name__ == "__main__":
	demo.launch()