import gradio as gr import tensorflow as tf import numpy as np from PIL import Image from io import BytesIO from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input from tensorflow.keras.preprocessing.image import load_img, img_to_array from transformers import TFAutoModelWithLMHead, AutoTokenizer from stylegan2.tf_api import G_synthesis as StyleGAN2 # Load pre-trained image captioning model tokenizer = AutoTokenizer.from_pretrained("t5-large") model = TFAutoModelWithLMHead.from_pretrained("t5-large") # Load pre-trained StyleGAN2 model g = StyleGAN2() g.load_weights('models/stylegan2-ffhq-config-f.pkl') # Load pre-trained InceptionV3 model for image preprocessing inception_v3 = InceptionV3(weights='imagenet') # Define function to preprocess image for GAN def preprocess_image(image): image = image.resize((256, 256)) image_array = img_to_array(image) image_array = preprocess_input(image_array) image_array = np.expand_dims(image_array, axis=0) return image_array # Define function to generate image from text using StyleGAN2 def generate_image(description): z = tf.random.normal([1, g.input_shape[1]]) text = "generate image of a " + description input_ids = tokenizer.encode(text, return_tensors='tf') output = model.generate(input_ids=input_ids) caption = tokenizer.decode(output[0], skip_special_tokens=True) image = g(z, caption) image = (image.numpy()[0] * 255).astype(np.uint8) image = Image.fromarray(image, mode='RGB') return image # Define function to generate text description of uploaded image using InceptionV3 and T5 def generate_description(image_file): image = Image.open(BytesIO(image_file.read())) image = preprocess_image(image) features = inception_v3.predict(image) features = tf.keras.backend.flatten(features) input_text = tokenizer.encode("generate a description of an image", return_tensors="tf") output = model.generate(input_ids=input_text, attention_mask=tf.ones(input_text.shape), max_length=50) caption = tokenizer.decode(output[0], skip_special_tokens=True) return caption # Define functionto create the web application using Gradio def image_generation(text_input, image_file): if image_file is not None: # Generate text description of uploaded image description = generate_description(image_file) # Generate image from text description generated_image = generate_image(description) else: # Generate image from user input text generated_image = generate_image(text_input) return generated_image #Define Gradio interface inputs = [gr.inputs.Textbox(label="Input text"), gr.inputs.Image(label="Upload an image (optional)") ] outputs = gr.outputs.Image(label="Generated Image") gr.Interface( fn=image_generation, inputs=inputs, outputs=outputs, title="Image Generation from Text", description="Generate high-quality images from text descriptions.", theme="default", layout="vertical", examples=[ ["a red sports car on a mountain road"], ["a cute puppy"], ["an elegant woman with a hat and a scarf"], ["a scenic beach with palm trees and blue water"], ["a golden retriever sitting on a couch"], ["a delicious pizza with pepperoni and cheese"], ["a futuristic city with tall buildings and flying cars"], ["an adorable kitten playing with a ball of yarn"], ], ).launch(debug=True)