import gradio as gr from gradio_client import Client, handle_file from PIL import Image, ImageOps import os import tempfile def resize_and_pad(image_path, dimension): # Open the image img = Image.open(image_path) original_width, original_height = img.size if dimension == "Square (1024x1024)": target_width, target_height = 1024, 1024 elif dimension == "Portrait (768x1360)": target_width, target_height = 768, 1360 elif dimension == "Landscape (1360x768)": target_width, target_height = 1360, 768 else: raise ValueError("Invalid dimension selected") # Calculate the scaling factor width_ratio = target_width / original_width height_ratio = target_height / original_height scaling_factor = min(width_ratio, height_ratio) # Calculate new dimensions new_width = int(original_width * scaling_factor) new_height = int(original_height * scaling_factor) # Resize the image img = img.resize((new_width, new_height), Image.LANCZOS) # Create a new white image with target dimensions new_img = Image.new("RGB", (target_width, target_height), (255, 255, 255)) # Calculate padding offset = ((target_width - new_width) // 2, (target_height - new_height) // 2) # Paste the resized image into the new image new_img.paste(img, offset) # Save the new image temporarily padded_image_path = os.path.join(tempfile.gettempdir(), "padded_image.png") new_img.save(padded_image_path) return padded_image_path def image_to_image(prompt, image, dimension, flux_client): # First, resize and pad the image padded_image_path = resize_and_pad(image, dimension) prompt = "Transforms a user-provided input image based on their prompt while maintaining consistency and fidelity to the original image. Generate an output image that accurately reflects the user's requested modifications. " + prompt image = flux_client.predict( input_image=handle_file(padded_image_path), prompt=prompt, guidance_scale=2.5, steps=25, randomize_seed=True, api_name="/infer" ) print(image) # Clean up the temporary file (optional, but good practice) try: os.remove(padded_image_path) except: pass return image[0] def image_to_video(prompt, image, duration, ltx_client): result = ltx_client.predict( prompt=prompt, input_image_url=None, middle_image_url=handle_file(image), final_image_url=None, duration_ui=4, api_name="/generate_video" ) print(result) return result def personalized_video(prompt, image, dimension, duration, request: gr.Request): x_ip_token = request.headers['x-ip-token'] flux_client = Client("black-forest-labs/FLUX.1-Kontext-Dev", headers={"x-ip-token": x_ip_token}) ltx_client = Client("KingNish/ltx-video-distilled", headers={"x-ip-token": x_ip_token}) image = image_to_image(prompt, image, dimension, flux_client) yield image, None video = image_to_video(prompt, image, duration, ltx_client) yield image, video with gr.Blocks() as demo: gr.Markdown("## Personalized Video Generator") with gr.Row(): with gr.Column(): input_image = gr.Image(label="Input Image", type="filepath") prompt = gr.Textbox(label="Prompt") dimension = gr.Radio(["Square (1024x1024)", "Portrait (768x1360)", "Landscape (1360x768)"], label="Dimension", value="Square (1024x1024)") duration = gr.Slider(minimum=1, maximum=5, value=4, label="Video Duration") submit_button = gr.Button("Submit") with gr.Column(): edited_image = gr.Image(label="Edited Image") output_video = gr.Video(label="Output Video") submit_button.click(personalized_video, [prompt, input_image, dimension, duration], [edited_image, output_video]) prompt.submit(personalized_video, [prompt, input_image, dimension, duration], [edited_image, output_video]) demo.launch(show_error=True)