import streamlit as st from openai import OpenAI from PIL import Image import io import os import uuid from gtts import gTTS import cv2 import numpy as np # --- Configuration --- API_KEY = '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' client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=API_KEY ) # --- Helper Functions --- def describe_image(image_url): response = client.chat.completions.create( model="opengvlab/internvl3-14b:free", messages=[ { "role": "user", "content": [ {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."}, {"type": "image_url", "image_url": {"url": image_url}} ] } ] ) return response.choices[0].message.content def speak(text, filename=None): if not filename: filename = f"audio_{uuid.uuid4()}.mp3" tts = gTTS(text=text, lang='en') tts.save(filename) return filename def image_to_array(uploaded_image): img = Image.open(uploaded_image) img = img.convert('RGB') # Ensure 3 channels return np.array(img) def array_to_base64(img_array): _, buffer = cv2.imencode('.jpg', img_array) return "data:image/jpeg;base64," + buffer.tobytes().hex() # --- Streamlit UI --- st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered") st.title("👁️ AI Visual Assistant for the Blind") st.markdown("Use your **camera** to capture the world around you.") st.subheader("📸 Take a Picture") camera_image = st.camera_input("Capture a frame from your camera") if camera_image is not None: st.image(camera_image, caption="Captured Frame", use_column_width=True) with st.spinner("Analyzing the scene..."): # Save temporarily temp_path = f"temp_frame_{uuid.uuid4()}.jpg" pil_img = Image.open(camera_image).convert("RGB") pil_img.save(temp_path) # Simulate URL (in production, you'd upload to cloud storage) image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" description = describe_image(image_url) st.subheader("📝 Description") st.write(description) st.subheader("🔊 Audio Narration") audio_file = speak(description) audio_bytes = open(audio_file, 'rb').read() st.audio(audio_bytes, format='audio/mp3') # Cleanup os.remove(temp_path) os.remove(audio_file) st.markdown("---") st.markdown("*Built with 💡 using Streamlit, OpenRouter, and gTTS.*")