Spaces:

WillemVH
/

Image_To_Text_Description

Running

App Files Files Community

Image_To_Text_Description / app.py

WillemVH

Update app.py

802588f verified 2 months ago

raw

history blame contribute delete

4.04 kB

	from PIL import Image
	from io import BytesIO
	import base64
	import os
	from groq import Groq
	import gradio as gr

	api_key = os.getenv("GROQ_API_KEY")
	client = Groq(api_key=api_key)

	def encode_image(image_path):
	"""Encode the image to base64."""
	try:
	# Open the image file
	image = Image.open(image_path).convert("RGB")

	# Resize the image to a height of 512 while maintaining the aspect ratio
	base_height = 512
	h_percent = (base_height / float(image.size[1]))
	w_size = int((float(image.size[0]) * float(h_percent)))
	image = image.resize((w_size, base_height), Image.LANCZOS)

	# Convert the image to a byte stream
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

	return img_str
	except FileNotFoundError:
	print(f"Error: The file {image_path} was not found.")
	return None
	except Exception as e:
	print(f"Error: {e}")
	return None

	def feifeichat(image):
	try:
	if image is None:
	yield "Please upload a photo"
	return

	base64_image = encode_image(image)
	if not base64_image:
	yield "Error processing image"
	return

	# Groq requires a different format for image messages
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Describe the following image in exhaustive detail, covering all visual elements, context, and potential interpretations. Break down the description into clear sections if needed. Include: Overview – Summarize the main subject and setting. Composition & Layout – Describe the arrangement of elements, perspective, and framing. Objects & Subjects – List and detail every visible object, person, animal, or significant item. Colors & Lighting – Note dominant colors, contrasts, shadows, highlights, and overall mood. Textures & Patterns – Describe surface details, materials, and repetitive designs. Text & Symbols – Mention any visible text, logos, or symbolic elements. Atmosphere & Emotion – Interpret the tone, mood, and emotional impact. Possible Context – Suggest where/when the image might be set or its purpose. Unusual or Notable Details – Highlight anything particularly striking or out of place. Be precise, objective, and thorough. If the image is abstract or ambiguous, provide multiple plausible interpretations. Please also only use plain text without newlines to answer. Now, take this data and kind of compress it using the english language so its VERY short. Of course, only say the short version. No saying the detailed description. Sorry. Thanks!"},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	]

	response = client.chat.completions.create(
	model="meta-llama/llama-4-scout-17b-16e-instruct",
	messages=messages,
	stream=True
	)

	partial_message = ""
	for chunk in response:
	if chunk.choices and chunk.choices[0].delta.content:
	partial_message += chunk.choices[0].delta.content
	yield partial_message

	except Exception as e:
	print(f"Error: {e}")
	yield "An error occurred while processing your request"

	with gr.Blocks() as demo:
	gr.Markdown("Image To Flux Prompt")
	with gr.Tab(label="Image To Flux Prompt"):
	input_img = gr.Image(label="Input Picture", height=320, type="filepath")
	output_text = gr.Textbox(label="Flux Prompt")
	submit_btn = gr.Button(value="Submit")

	submit_btn.click(
	fn=feifeichat,
	inputs=input_img,
	outputs=output_text
	)

	demo.launch()