import gradio as gr import torch from transformers import AutoTokenizer from model import SmolLM2 # Ensure this imports your model correctly # Load the model and tokenizer model_path = "smollm2_final.pt" tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer") # Adjust if necessary # Load model configuration model_config = { "bos_token_id": 0, "eos_token_id": 0, "hidden_act": "silu", "hidden_size": 576, "initializer_range": 0.041666666666666664, "intermediate_size": 1536, "is_llama_config": True, "max_position_embeddings": 2048, "num_attention_heads": 9, "num_hidden_layers": 30, "num_key_value_heads": 3, "pad_token_id": None, "pretraining_tp": 1, "rms_norm_eps": 1.0e-05, "rope_interleaved": False, "rope_scaling": None, "rope_theta": 10000.0, "tie_word_embeddings": True, "use_cache": True, "vocab_size": 49152 } # Initialize the model with the configuration model = SmolLM2(model_config) # Pass the configuration to the model # Load the model weights with map_location to handle CPU-only environments model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) # Load the model weights model.eval() # Set the model to evaluation mode def generate_text(prompt, length, num_sequences): input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"] generated_texts = [] for _ in range(num_sequences): generated_sequence = model.generate( input_ids, max_length=length + len(input_ids[0]), # Adjust for input length pad_token_id=tokenizer.pad_token_id, do_sample=True, temperature=0.8, top_k=50, top_p=0.95 ) # Decode the generated sequence generated_text = tokenizer.decode(generated_sequence[0], skip_special_tokens=True) generated_texts.append(generated_text) # Format the output formatted_output = "\n\n".join([f"Sequence {i + 1}:\n{text}" for i, text in enumerate(generated_texts)]) return formatted_output # Create Gradio interface with gr.Blocks() as app: gr.Markdown("# SmolLM2 Text Generator") prompt_input = gr.Textbox(label="Enter your text prompt", placeholder="Type your prompt here...") length_slider = gr.Slider(minimum=10, maximum=200, label="Predict Additional Text of Length", value=50) num_sequences_slider = gr.Slider(minimum=1, maximum=5, label="Number of Sequences to Generate", value=1, step=1) # Step set to 1 for integer values generate_button = gr.Button("Generate Text") output_text = gr.Textbox(label="Generated Text", interactive=False) generate_button.click( fn=generate_text, inputs=[prompt_input, length_slider, num_sequences_slider], outputs=output_text ) # Launch the app app.launch()