import gradio as gr import tempfile import imageio import torch import time import os from openai import OpenAI from transformers import pipeline from diffusers import DiffusionPipeline # ---------- Load OpenAI Key from Hugging Face Secret ---------- client = OpenAI(api_key=os.getenv("OPENAI_KEY")) # ---------- Configuration ---------- AVAILABLE_MODELS = { "Codette Fine-Tuned (v9)": "ft:gpt-4.1-2025-04-14:raiffs-bits:codette-final:BO907H7Z", "GPT-2 (small, fast)": "gpt2", "Falcon (TII UAE)": "tiiuae/falcon-7b-instruct", "Mistral (OpenAccess)": "mistralai/Mistral-7B-v0.1" } device = "cuda" if torch.cuda.is_available() else "cpu" text_model_cache = {} chat_memory = {} last_usage_time = {} MAX_PROMPTS_PER_SESSION = 5 THROTTLE_SECONDS = 30 # ---------- Load Image Generator ---------- try: image_generator = DiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", safety_checker=None, torch_dtype=torch.float16 if device == "cuda" else torch.float32 ) image_generator.to(device) image_enabled = True except Exception as e: print(f"[Image Model Load Error]: {e}") image_generator = None image_enabled = False # ---------- Load Video Generator ---------- try: video_pipeline = DiffusionPipeline.from_pretrained( "damo-vilab/text-to-video-ms-1.7b", safety_checker=None, torch_dtype=torch.float16 if device == "cuda" else torch.float32 ) video_pipeline.to(device) video_enabled = True except Exception as e: print(f"[Video Model Load Error]: {e}") video_pipeline = None video_enabled = False # ---------- Main Terminal with Rate Limits ---------- def codette_terminal_limited(prompt, model_name, generate_image, generate_video, session_id, batch_size, video_steps, fps): if session_id not in chat_memory: chat_memory[session_id] = [] if prompt.lower() in ["exit", "quit"]: chat_memory[session_id] = [] yield "🧠 Codette signing off... Session reset.", None, None return if model_name == "Codette Fine-Tuned (v9)": count = sum(1 for line in chat_memory[session_id] if line.startswith("🖋️ You >")) if count >= MAX_PROMPTS_PER_SESSION: yield "[🛑 Limit] Max 5 prompts per session.", None, None return now = time.time() if now - last_usage_time.get(session_id, 0) < THROTTLE_SECONDS: wait = int(THROTTLE_SECONDS - (now - last_usage_time[session_id])) yield f"[⏳ Wait] Try again in {wait} sec.", None, None return last_usage_time[session_id] = now if model_name == "Codette Fine-Tuned (v9)": try: response = client.chat.completions.create( model=AVAILABLE_MODELS[model_name], messages=[{"role": "user", "content": prompt}], temperature=0.7, max_tokens=256 ) output = response.choices[0].message.content.strip() except Exception as e: yield f"[OpenAI error]: {e}", None, None return else: if model_name not in text_model_cache: try: text_model_cache[model_name] = pipeline( "text-generation", model=AVAILABLE_MODELS[model_name], device=0 if device == "cuda" else -1 ) except Exception as e: yield f"[Text model error]: {e}", None, None return try: output = text_model_cache[model_name]( prompt, max_length=100, do_sample=True, num_return_sequences=1 )[0]['generated_text'].strip() except Exception as e: yield f"[Generation error]: {e}", None, None return # Stream text output response_so_far = "" for char in output: response_so_far += char temp_log = chat_memory[session_id][:] temp_log.append(f"🖋️ You > {prompt}") temp_log.append(f"🧠 Codette > {response_so_far}") yield "\n".join(temp_log[-10:]), None, None time.sleep(0.01) chat_memory[session_id].append(f"🖋️ You > {prompt}") chat_memory[session_id].append(f"🧠 Codette > {output}") imgs, vid = None, None if generate_image and image_enabled: try: result = image_generator(prompt, num_images_per_prompt=batch_size) imgs = result.images except Exception as e: response_so_far += f"\n[Image error]: {e}" if generate_video and video_enabled: try: result = video_pipeline(prompt, num_inference_steps=video_steps) frames = result.frames temp_video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name imageio.mimsave(temp_video_path, frames, fps=fps) vid = temp_video_path except Exception as e: response_so_far += f"\n[Video error]: {e}" yield "\n".join(chat_memory[session_id][-10:]), imgs, vid # ---------- Gradio UI ---------- with gr.Blocks(title="🧬 Codette Terminal – Streamed AI Chat") as demo: gr.Markdown("## 🧬 Codette Terminal (Chat + Image + Video + Fine-Tuned AI)") gr.Markdown("Type a prompt, choose a model, and generate responses. Type `'exit'` to reset the session.") with gr.Row(): session_id = gr.Textbox(value="session_default", visible=False) model_dropdown = gr.Dropdown(choices=list(AVAILABLE_MODELS.keys()), value="GPT-2 (small, fast)", label="Language Model") with gr.Row(): generate_image_toggle = gr.Checkbox(label="Generate Image(s)?", value=False, interactive=image_enabled) generate_video_toggle = gr.Checkbox(label="Generate Video?", value=False, interactive=video_enabled) with gr.Row(): batch_size_slider = gr.Slider(label="Number of Images", minimum=1, maximum=4, step=1, value=1) video_steps_slider = gr.Slider(label="Video Inference Steps", minimum=10, maximum=100, step=10, value=50) fps_slider = gr.Slider(label="Video FPS", minimum=4, maximum=24, step=2, value=8) with gr.Row(): user_input = gr.Textbox( label="Your Prompt", placeholder="e.g. A robot dreaming on Mars", lines=1 ) with gr.Row(): output_text = gr.Textbox(label="Codette Output", lines=15, interactive=False) with gr.Row(): output_image = gr.Gallery(label="Generated Image(s)", columns=2) output_video = gr.Video(label="Generated Video") user_input.submit( codette_terminal_limited, inputs=[ user_input, model_dropdown, generate_image_toggle, generate_video_toggle, session_id, batch_size_slider, video_steps_slider, fps_slider ], outputs=[output_text, output_image, output_video] ) # ---------- Launch ---------- if __name__ == "__main__": demo.launch(mcp_server=True)