import streamlit as st import base64 from openai import OpenAI from PIL import Image import io import cv2 import numpy as np # Configure app st.set_page_config( page_title="AI Vision Assistant", page_icon="🔍", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS (keep your existing CSS here) st.markdown(""" """, unsafe_allow_html=True) # App title and description st.title("🔍 Optimus Alpha | Live Vision Assistant") # Initialize OpenAI client (keep your existing cached function) @st.cache_resource def get_client(): return OpenAI( base_url="https://openrouter.ai/api/v1", api_key='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' ) # ===== New Live Camera Section ===== st.subheader("Live Camera Feed") run_camera = st.checkbox("Enable Camera", value=False) FRAME_WINDOW = st.empty() captured_image = None if run_camera: cap = cv2.VideoCapture(0) capture_button = st.button("Capture Image") stop_button = st.button("Stop Camera") if stop_button: run_camera = False cap.release() st.experimental_rerun() while run_camera: ret, frame = cap.read() if not ret: st.error("Failed to access camera") break # Display the live feed frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) FRAME_WINDOW.image(frame) if capture_button: captured_image = frame run_camera = False cap.release() break else: FRAME_WINDOW.info("Camera is currently off") # ===== Image Processing Section ===== col1, col2 = st.columns([1, 2]) with col1: st.subheader("Image Source") # Option to use captured image or upload if captured_image is not None: st.image(captured_image, caption="Captured Image", width=300) use_captured = True else: use_captured = False uploaded_file = st.file_uploader( "Or upload an image", type=["jpg", "jpeg", "png"], disabled=use_captured ) # Determine which image to use if use_captured: image = Image.fromarray(captured_image) elif uploaded_file: image = Image.open(uploaded_file) else: image = None with col2: st.subheader("AI Analysis") user_prompt = st.text_input( "Your question about the image:", placeholder="e.g. 'What objects do you see?' or 'Explain this diagram'", key="user_prompt" ) if st.button("Analyze", type="primary") and image: try: # Convert image to base64 buffered = io.BytesIO() image.save(buffered, format="JPEG") image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") # Prepare messages messages = [ { "role": "system", "content": """You are a real-time vision assistant. Analyze the current camera feed or uploaded image and: 1. Identify objects, people, text clearly 2. Answer follow-up questions precisely 3. Format responses with bullet points 4. Highlight urgent/important findings""" }, { "role": "user", "content": [ { "type": "text", "text": user_prompt if user_prompt else "Describe what you see in detail" }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_base64}" } } ] } ] # Stream the response response_container = st.empty() full_response = "" client = get_client() stream = client.chat.completions.create( model="openrouter/optimus-alpha", messages=messages, stream=True ) for chunk in stream: if chunk.choices[0].delta.content is not None: full_response += chunk.choices[0].delta.content response_container.markdown(f"""
{full_response}
""", unsafe_allow_html=True) except Exception as e: st.error(f"Error: {str(e)}") # Sidebar (keep your existing sidebar) with st.sidebar: st.image("blob.png", width=200) st.markdown(""" *Powered by OpenRouter* """) st.markdown("---") st.markdown("Made with ❤️ by Koshur AI")