|
from PIL import Image |
|
from io import BytesIO |
|
import base64 |
|
import os |
|
from groq import Groq |
|
import gradio as gr |
|
|
|
api_key = os.getenv("GROQ_API_KEY") |
|
client = Groq(api_key=api_key) |
|
|
|
def encode_image(image_path): |
|
"""Encode the image to base64.""" |
|
try: |
|
|
|
image = Image.open(image_path).convert("RGB") |
|
|
|
|
|
base_height = 512 |
|
h_percent = (base_height / float(image.size[1])) |
|
w_size = int((float(image.size[0]) * float(h_percent))) |
|
image = image.resize((w_size, base_height), Image.LANCZOS) |
|
|
|
|
|
buffered = BytesIO() |
|
image.save(buffered, format="JPEG") |
|
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
|
|
return img_str |
|
except FileNotFoundError: |
|
print(f"Error: The file {image_path} was not found.") |
|
return None |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return None |
|
|
|
def feifeichat(image): |
|
try: |
|
if image is None: |
|
yield "Please upload a photo" |
|
return |
|
|
|
base64_image = encode_image(image) |
|
if not base64_image: |
|
yield "Error processing image" |
|
return |
|
|
|
|
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": "Describe the following image in exhaustive detail, covering all visual elements, context, and potential interpretations. Break down the description into clear sections if needed. Include: Overview β Summarize the main subject and setting. Composition & Layout β Describe the arrangement of elements, perspective, and framing. Objects & Subjects β List and detail every visible object, person, animal, or significant item. Colors & Lighting β Note dominant colors, contrasts, shadows, highlights, and overall mood. Textures & Patterns β Describe surface details, materials, and repetitive designs. Text & Symbols β Mention any visible text, logos, or symbolic elements. Atmosphere & Emotion β Interpret the tone, mood, and emotional impact. Possible Context β Suggest where/when the image might be set or its purpose. Unusual or Notable Details β Highlight anything particularly striking or out of place. Be precise, objective, and thorough. If the image is abstract or ambiguous, provide multiple plausible interpretations. Please also only use plain text without newlines to answer. Now, take this data and kind of compress it using the english language so its VERY short. Of course, only say the short version. No saying the detailed description. Sorry. Thanks!"}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
} |
|
} |
|
] |
|
} |
|
] |
|
|
|
response = client.chat.completions.create( |
|
model="meta-llama/llama-4-scout-17b-16e-instruct", |
|
messages=messages, |
|
stream=True |
|
) |
|
|
|
partial_message = "" |
|
for chunk in response: |
|
if chunk.choices and chunk.choices[0].delta.content: |
|
partial_message += chunk.choices[0].delta.content |
|
yield partial_message |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
yield "An error occurred while processing your request" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("Image To Flux Prompt") |
|
with gr.Tab(label="Image To Flux Prompt"): |
|
input_img = gr.Image(label="Input Picture", height=320, type="filepath") |
|
output_text = gr.Textbox(label="Flux Prompt") |
|
submit_btn = gr.Button(value="Submit") |
|
|
|
submit_btn.click( |
|
fn=feifeichat, |
|
inputs=input_img, |
|
outputs=output_text |
|
) |
|
|
|
demo.launch() |