import gradio as gr import torch import logging from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from threading import Thread # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load model & tokenizer MODEL_NAME = "ubiodee/Cardano_plutus" try: logger.info("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) logger.info("Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", torch_dtype=torch.float16, low_cpu_mem_usage=True ) model.eval() logger.info("Model and tokenizer loaded successfully.") except Exception as e: logger.error(f"Error loading model or tokenizer: {str(e)}") raise # Prompt template to guide the model (simple, since no model card details) def format_prompt(user_prompt): return f"User: {user_prompt}\nAssistant:" # Response function with proper streaming def generate_response(user_prompt): try: logger.info("Processing prompt...") prompt = format_prompt(user_prompt) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Use streamer for token-by-token generation streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = { **inputs, "streamer": streamer, "max_new_tokens": 300, # Increased slightly for completeness "do_sample": True, # Revert to sampling to avoid repetition "temperature": 0.1, "top_p": 0.1, "eos_token_id": tokenizer.eos_token_id, "pad_token_id": tokenizer.pad_token_id } # Run generation in a separate thread to avoid blocking thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() generated_text = "" for new_text in streamer: generated_text += new_text yield generated_text.strip() logger.info("Response generated successfully.") except Exception as e: logger.error(f"Error during generation: {str(e)}") yield f"Error: {str(e)}" # Gradio UI demo = gr.Interface( fn=generate_response, inputs=gr.Textbox( label="Enter your prompt", lines=4, placeholder="Ask about Plutus or Cardano..." ), outputs=gr.Textbox(label="Model Response"), title="Cardano Plutus AI Assistant", description="Your Cardano AI Builder..", allow_flagging="never" ) # Launch the app try: logger.info("Launching Gradio interface...") demo.launch() except Exception as e: logger.error(f"Error launching Gradio: {str(e)}") raise