Spaces:

dixisouls
/

VQA

Sleeping

App Files Files Community

dixisouls commited on 23 days ago

Commit

eacbbc9

1 Parent(s): cd7dd06

Initial Commit

Browse files

Files changed (12) hide show

app/__init__.py +3 -0
app/config.py +49 -0
app/main.py +68 -0
app/models/__init__.py +3 -0
app/models/vqa_model.py +83 -0
app/routers/__init__.py +3 -0
app/routers/vqa.py +185 -0
app/services/__init__.py +3 -0
app/services/model_service.py +177 -0
app/services/session_service.py +166 -0
app/utils/__init__.py +3 -0
app/utils/image_utils.py +81 -0

app/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Package initialization for app
+"""

app/config.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+Configuration settings for the application
+"""
+import os
+from pydantic_settings import BaseSettings
+from dotenv import load_dotenv
+from pathlib import Path
+# Load .env file if it exists
+load_dotenv()
+class Settings(BaseSettings):
+    """Application settings"""
+    # App settings
+    APP_NAME: str = "VizWiz VQA API"
+    DEBUG: bool = os.getenv("DEBUG", "False").lower() == "true"
+    # Model settings
+    MODEL_PATH: str = os.getenv("MODEL_PATH", "./models/vqa_model_best.pt")
+    TEXT_MODEL: str = os.getenv("TEXT_MODEL", "bert-base-uncased")
+    VISION_MODEL: str = os.getenv("VISION_MODEL", "google/vit-base-patch16-384")
+    HUGGINGFACE_TOKEN: str = os.getenv("HUGGINGFACE_TOKEN", "")
+    # Hugging Face model repository settings
+    HF_MODEL_REPO: str = os.getenv("HF_MODEL_REPO", "dixisouls/VQA")
+    HF_MODEL_FILENAME: str = os.getenv("HF_MODEL_FILENAME", "model.pt")
+    # API settings
+    MAX_UPLOAD_SIZE: int = 10 * 1024 * 1024  # 10MB
+    # Storage settings
+    UPLOAD_DIR: str = os.getenv("UPLOAD_DIR", "./uploads")
+    MAX_SESSION_AGE: int = 60 * 30  # 30 minutes
+    # CORS settings
+    ALLOW_ORIGINS: list[str] = ["*"]
+    class Config:
+        env_file = ".env"
+        case_sensitive = True
+# Global settings instance
+settings = Settings()
+# Ensure upload directory exists
+Path(settings.UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
+# Ensure models directory exists
+Path(os.path.dirname(settings.MODEL_PATH)).mkdir(parents=True, exist_ok=True)

app/main.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""
+Main FastAPI application entry point
+"""
+import os
+import logging
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from contextlib import asynccontextmanager
+from app.routers import vqa
+from app.services.model_service import ModelService
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Initialize model service in a lifespan context manager
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Load model on startup
+    logger.info("Loading VQA model...")
+    app.state.model_service = ModelService()
+    app.state.model_service.load_model()
+    logger.info("VQA model loaded successfully")
+    yield
+    # Clean up resources on shutdown
+    logger.info("Shutting down...")
+# Initialize FastAPI app
+app = FastAPI(
+    title="VizWiz VQA API",
+    description="API for Visual Question Answering on images",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins in development
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount static files directory if it exists
+static_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static")
+if os.path.exists(static_dir):
+    app.mount("/static", StaticFiles(directory=static_dir), name="static")
+# Include routers
+app.include_router(vqa.router)
+# Health check endpoint
+@app.get("/health")
+async def health_check():
+    """Health check endpoint for monitoring the service"""
+    if not hasattr(app.state, "model_service") or not app.state.model_service.is_model_loaded():
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    return {"status": "healthy", "model_loaded": True}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)

app/models/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Package initialization for app
+"""

app/models/vqa_model.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+Model implementation for VQA
+"""
+import os
+import json
+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, AutoModel, AutoConfig, ViTImageProcessor, ViTModel
+class VQAModel(nn.Module):
+    """Vision-Language model for Visual Question Answering"""
+    def __init__(self, config, num_answers):
+        super(VQAModel, self).__init__()
+        self.config = config
+        self.num_answers = num_answers
+        # Vision encoder
+        self.vision_config = AutoConfig.from_pretrained(config['vision_model'])
+        self.vision_encoder = ViTModel.from_pretrained(config['vision_model'])
+        # Text encoder
+        self.text_config = AutoConfig.from_pretrained(config['text_model'])
+        self.text_encoder = AutoModel.from_pretrained(config['text_model'])
+        # Projection layers
+        self.vision_projection = nn.Linear(
+            self.vision_config.hidden_size, config['hidden_size']
+        )
+        self.text_projection = nn.Linear(
+            self.text_config.hidden_size, config['hidden_size']
+        )
+        # Multimodal fusion
+        self.fusion = nn.Sequential(
+            nn.Linear(2 * config['hidden_size'], config['hidden_size']),
+            nn.LayerNorm(config['hidden_size']),
+            nn.GELU(),
+            nn.Dropout(config['dropout'])
+        )
+        # Answer prediction
+        self.classifier = nn.Sequential(
+            nn.Linear(config['hidden_size'], config['hidden_size']),
+            nn.LayerNorm(config['hidden_size']),
+            nn.GELU(),
+            nn.Dropout(config['dropout']),
+            nn.Linear(config['hidden_size'], num_answers)
+        )
+        # Answerable prediction
+        self.answerable_classifier = nn.Sequential(
+            nn.Linear(config['hidden_size'], config['hidden_size'] // 2),
+            nn.LayerNorm(config['hidden_size'] // 2),
+            nn.GELU(),
+            nn.Dropout(config['dropout']),
+            nn.Linear(config['hidden_size'] // 2, 2)  # Binary classification
+        )
+    def forward(self, image_encodings, question_encodings):
+        """Forward pass of the model"""
+        # Process image
+        vision_outputs = self.vision_encoder(**image_encodings)
+        vision_embeds = vision_outputs.last_hidden_state[:, 0]  # CLS token
+        vision_embeds = self.vision_projection(vision_embeds)
+        # Process text
+        text_outputs = self.text_encoder(**question_encodings)
+        text_embeds = text_outputs.last_hidden_state[:, 0]  # CLS token
+        text_embeds = self.text_projection(text_embeds)
+        # Combine modalities
+        multimodal_features = torch.cat([vision_embeds, text_embeds], dim=1)
+        fused_features = self.fusion(multimodal_features)
+        # Predict answers and answerable
+        answer_logits = self.classifier(fused_features)
+        answerable_logits = self.answerable_classifier(fused_features)
+        return {
+            'answer_logits': answer_logits,
+            'answerable_logits': answerable_logits,
+            'fused_features': fused_features
+        }

app/routers/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Package initialization for app
+"""

app/routers/vqa.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""
+API router for VQA endpoints
+"""
+import logging
+from typing import List, Optional
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, BackgroundTasks, Request
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from app.services.session_service import SessionService
+from app.services.model_service import ModelService
+logger = logging.getLogger(__name__)
+# Initialize router
+router = APIRouter(
+    prefix="/api/vqa",
+    tags=["vqa"],
+)
+# Models for request/response
+class QuestionRequest(BaseModel):
+    """Model for question request"""
+    session_id: str
+    question: str
+class AnswerResponse(BaseModel):
+    """Model for answer response"""
+    answer: str
+    answer_confidence: float
+    is_answerable: bool
+    answerable_confidence: float
+class SessionHistoryItem(BaseModel):
+    """Model for session history item"""
+    question: str
+    answer: AnswerResponse
+    timestamp: str
+class SessionResponse(BaseModel):
+    """Model for session response"""
+    session_id: str
+    history: List[SessionHistoryItem]
+# Dependency for services
+session_service = SessionService()
+@router.post("/upload", response_model=dict)
+async def upload_image(
+    request: Request,
+    file: UploadFile = File(...),
+    background_tasks: BackgroundTasks = None
+):
+    """
+    Upload an image and create a new session
+    Args:
+        file (UploadFile): The image file to upload
+    Returns:
+        dict: The session ID
+    """
+    # Validate image file
+    if not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="File must be an image")
+    try:
+        # Create a new session
+        session_id = session_service.create_session(file)
+        return {"session_id": session_id}
+    except Exception as e:
+        logger.error(f"Error uploading image: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/ask", response_model=AnswerResponse)
+async def ask_question(
+    request: Request,
+    question_request: QuestionRequest
+):
+    """
+    Ask a question about the uploaded image
+    Args:
+        question_request (QuestionRequest): The question request
+    Returns:
+        AnswerResponse: The answer
+    """
+    # Get the model service from app state
+    model_service = request.app.state.model_service
+    # Get the session
+    session = session_service.get_session(question_request.session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found or expired")
+    try:
+        # Make prediction
+        result = model_service.predict(session.image_path, question_request.question)
+        # Add to session history
+        session.add_question(question_request.question, result)
+        return result
+    except Exception as e:
+        logger.error(f"Error processing question: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/session/{session_id}", response_model=SessionResponse)
+async def get_session(
+    request: Request,
+    session_id: str
+):
+    """
+    Get session information including question history
+    Args:
+        session_id (str): The session ID
+    Returns:
+        SessionResponse: The session information
+    """
+    # Get the session
+    session = session_service.get_session(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found or expired")
+    return {
+        "session_id": session.session_id,
+        "history": session.questions
+    }
+@router.post("/session/{session_id}/complete")
+async def complete_session(
+    request: Request,
+    session_id: str
+):
+    """
+    Mark a session as complete and clean up resources
+    Args:
+        session_id (str): The session ID
+    Returns:
+        dict: Success message
+    """
+    # Check if session exists
+    session = session_service.get_session(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found or expired")
+    # Complete the session (delete image but keep session data temporarily)
+    success = session_service.complete_session(session_id)
+    if success:
+        return {"message": "Session completed successfully, resources cleaned up"}
+    else:
+        raise HTTPException(status_code=500, detail="Failed to complete session")
+@router.delete("/session/{session_id}")
+async def reset_session(
+    request: Request,
+    session_id: str
+):
+    """
+    Reset (delete) a session to start fresh
+    Args:
+        session_id (str): The session ID
+    Returns:
+        dict: Success message
+    """
+    # Check if session exists
+    session = session_service.get_session(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found or expired")
+    # Remove the session
+    session_service._remove_session(session_id)
+    return {"message": "Session reset successfully"}

app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Package initialization for app
+"""

app/services/model_service.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+Model service for handling VQA model operations
+"""
+import os
+import json
+import logging
+import torch
+from PIL import Image
+from transformers import AutoTokenizer, ViTImageProcessor
+from huggingface_hub import hf_hub_download, login
+from app.config import settings
+from app.models.vqa_model import VQAModel
+logger = logging.getLogger(__name__)
+class ModelService:
+    """Service for loading and running the VQA model"""
+    def __init__(self):
+        """Initialize the model service"""
+        self.model = None
+        self.processor = None
+        self.tokenizer = None
+        self.config = None
+        self.answer_vocab = None
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        # Try to login to Hugging Face if token is provided
+        if settings.HUGGINGFACE_TOKEN:
+            try:
+                login(token=settings.HUGGINGFACE_TOKEN)
+                logger.info("Successfully logged in to Hugging Face Hub")
+            except Exception as e:
+                logger.error(f"Error logging in to Hugging Face Hub: {e}")
+    def _check_model_exists(self):
+        """Check if the model file exists locally"""
+        return os.path.exists(settings.MODEL_PATH)
+    def _download_model_from_hub(self):
+        """Download the model from Hugging Face Hub if not present locally"""
+        try:
+            # Create the directory if it doesn't exist
+            os.makedirs(os.path.dirname(settings.MODEL_PATH), exist_ok=True)
+            logger.info(f"Downloading model from {settings.HF_MODEL_REPO} to {settings.MODEL_PATH}")
+            # Download the model file from Hugging Face
+            hf_hub_download(
+                repo_id=settings.HF_MODEL_REPO,
+                filename=settings.HF_MODEL_FILENAME,
+                local_dir=os.path.dirname(settings.MODEL_PATH),
+                local_dir_use_symlinks=False
+            )
+            # Rename the downloaded file to match the expected path if needed
+            downloaded_path = os.path.join(os.path.dirname(settings.MODEL_PATH), settings.HF_MODEL_FILENAME)
+            if downloaded_path != settings.MODEL_PATH:
+                os.rename(downloaded_path, settings.MODEL_PATH)
+            logger.info(f"Model downloaded successfully to {settings.MODEL_PATH}")
+            return True
+        except Exception as e:
+            logger.error(f"Error downloading model from Hugging Face Hub: {e}")
+            return False
+    def load_model(self):
+        """Load the VQA model from the specified path or download it if not present"""
+        try:
+            # Check if model exists locally
+            if not self._check_model_exists():
+                logger.info(f"Model not found at {settings.MODEL_PATH}")
+                # Download the model from Hugging Face Hub
+                if not self._download_model_from_hub():
+                    logger.error("Failed to download model from Hugging Face Hub")
+                    return False
+            logger.info(f"Loading model from {settings.MODEL_PATH}")
+            checkpoint = torch.load(settings.MODEL_PATH, map_location=self.device)
+            # Extract configuration
+            self.config = checkpoint['config']
+            # Get vocabulary
+            if 'answer_vocab' in checkpoint:
+                self.answer_vocab = checkpoint['answer_vocab']
+                logger.info("Using vocabulary from model checkpoint")
+            else:
+                logger.error("Error: No vocabulary found in model checkpoint")
+                raise ValueError("No vocabulary found in model checkpoint")
+            # Initialize model
+            self.model = VQAModel(self.config, len(self.answer_vocab['answer_to_idx']))
+            self.model.load_state_dict(checkpoint['model_state_dict'])
+            self.model.to(self.device)
+            self.model.eval()
+            # Initialize preprocessors
+            self.processor = ViTImageProcessor.from_pretrained(self.config['vision_model'])
+            self.tokenizer = AutoTokenizer.from_pretrained(self.config['text_model'])
+            logger.info("Model loaded successfully")
+            return True
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            return False
+    def is_model_loaded(self):
+        """Check if the model is loaded"""
+        return self.model is not None and self.processor is not None and self.tokenizer is not None
+    def predict(self, image_path, question):
+        """
+        Make a prediction for the given image and question
+        Args:
+            image_path (str): Path to the image file
+            question (str): Question about the image
+        Returns:
+            dict: Prediction results
+        """
+        if not self.is_model_loaded():
+            logger.error("Model not loaded")
+            raise RuntimeError("Model not loaded")
+        try:
+            # Preprocess image
+            image = Image.open(image_path).convert('RGB')
+            image_encoding = self.processor(images=image, return_tensors="pt")
+            image_encoding = {k: v.to(self.device) for k, v in image_encoding.items()}
+            # Preprocess question
+            question_encoding = self.tokenizer(
+                question,
+                padding='max_length',
+                truncation=True,
+                max_length=128,
+                return_tensors='pt'
+            )
+            question_encoding = {k: v.to(self.device) for k, v in question_encoding.items()}
+            # Get predictions
+            with torch.no_grad():
+                outputs = self.model(image_encoding, question_encoding)
+                answer_logits = outputs['answer_logits']
+                answerable_logits = outputs['answerable_logits']
+                answer_idx = torch.argmax(answer_logits, dim=1).item()
+                answerable_idx = torch.argmax(answerable_logits, dim=1).item()
+                # Convert string index to int for dictionary lookup
+                answer = self.answer_vocab['idx_to_answer'][str(answer_idx)]
+                is_answerable = bool(answerable_idx)
+                # Get confidence scores
+                answer_probs = torch.softmax(answer_logits, dim=1)[0]
+                answerable_probs = torch.softmax(answerable_logits, dim=1)[0]
+                answer_confidence = float(answer_probs[answer_idx].item())
+                answerable_confidence = float(answerable_probs[answerable_idx].item())
+            return {
+                'answer': answer,
+                'answer_confidence': answer_confidence,
+                'is_answerable': is_answerable,
+                'answerable_confidence': answerable_confidence
+            }
+        except Exception as e:
+            logger.error(f"Error during prediction: {e}")
+            raise

app/services/session_service.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+import uuid
+import logging
+import time
+from datetime import datetime, timedelta
+from typing import Dict, Optional, Tuple, List
+from fastapi import UploadFile
+from pathlib import Path
+from app.config import settings
+logger = logging.getLogger(__name__)
+class Session:
+    """Object representing a user session"""
+    def __init__(self, session_id: str, image_path: str):
+        self.session_id = session_id
+        self.image_path = image_path
+        self.created_at = datetime.now()
+        self.last_accessed = datetime.now()
+        self.questions = []  # History of questions for this session
+    def is_expired(self) -> bool:
+        """Check if the session has expired"""
+        expiry_time = self.last_accessed + timedelta(seconds=settings.MAX_SESSION_AGE)
+        return datetime.now() > expiry_time
+    def update_access_time(self):
+        """Update the last accessed time"""
+        self.last_accessed = datetime.now()
+    def add_question(self, question: str, answer: Dict):
+        """Add a question and its answer to the session history"""
+        self.questions.append({
+            "question": question,
+            "answer": answer,
+            "timestamp": datetime.now().isoformat()
+        })
+        self.update_access_time()
+class SessionService:
+    """Service for managing user sessions"""
+    def __init__(self):
+        """Initialize the session service"""
+        self.sessions: Dict[str, Session] = {}
+        self.ensure_upload_dir()
+        # Start a background cleanup task
+        self._cleanup_sessions()
+    def ensure_upload_dir(self):
+        """Ensure the upload directory exists"""
+        os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
+    def create_session(self, file: UploadFile) -> str:
+        """
+        Create a new session for the user
+        Args:
+            file (UploadFile): The uploaded image file
+        Returns:
+            str: The session ID
+        """
+        # Generate a unique session ID
+        session_id = str(uuid.uuid4())
+        # Create a unique filename
+        timestamp = int(time.time())
+        file_extension = Path(file.filename).suffix
+        filename = f"{timestamp}_{session_id}{file_extension}"
+        # Save the uploaded file
+        file_path = os.path.join(settings.UPLOAD_DIR, filename)
+        with open(file_path, "wb") as f:
+            f.write(file.file.read())
+        # Create and store the session
+        self.sessions[session_id] = Session(session_id, file_path)
+        logger.info(f"Created new session {session_id} with image {file_path}")
+        return session_id
+    def get_session(self, session_id: str) -> Optional[Session]:
+        """
+        Get a session by ID
+        Args:
+            session_id (str): The session ID
+        Returns:
+            Optional[Session]: The session, or None if not found or expired
+        """
+        session = self.sessions.get(session_id)
+        if session is None:
+            return None
+        if session.is_expired():
+            self._remove_session(session_id)
+            return None
+        session.update_access_time()
+        return session
+    def complete_session(self, session_id: str) -> bool:
+        """
+        Mark a session as complete and remove its resources
+        Args:
+            session_id (str): The session ID
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        session = self.sessions.get(session_id)
+        if not session:
+            logger.warning(f"Cannot complete nonexistent session: {session_id}")
+            return False
+        logger.info(f"Completing session {session_id}")
+        try:
+            # Remove the image file but keep session data temporarily for any final operations
+            if session.image_path and os.path.exists(session.image_path):
+                os.remove(session.image_path)
+                logger.info(f"Removed image file for completed session {session.image_path}")
+                # Set the image path to None to indicate it's been removed
+                session.image_path = None
+                return True
+            return True  # No image to remove or already removed
+        except Exception as e:
+            logger.error(f"Error removing image file during session completion: {e}")
+            return False
+    def _remove_session(self, session_id: str):
+        """
+        Remove a session and its associated file
+        Args:
+            session_id (str): The session ID
+        """
+        session = self.sessions.pop(session_id, None)
+        if session:
+            try:
+                # Remove the image file
+                if session.image_path and os.path.exists(session.image_path):
+                    os.remove(session.image_path)
+                    logger.info(f"Removed session file {session.image_path}")
+            except Exception as e:
+                logger.error(f"Error removing session file: {e}")
+    def _cleanup_sessions(self):
+        """Clean up expired sessions"""
+        expired_sessions = [
+            session_id for session_id, session in self.sessions.items()
+            if session.is_expired()
+        ]
+        for session_id in expired_sessions:
+            self._remove_session(session_id)
+        if expired_sessions:
+            logger.info(f"Cleaned up {len(expired_sessions)} expired sessions")

app/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Package initialization for app
+"""

app/utils/image_utils.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+Utility functions for image processing
+"""
+import os
+import logging
+from PIL import Image
+import io
+import base64
+from typing import Tuple, Optional
+logger = logging.getLogger(__name__)
+def validate_image(image_path: str) -> bool:
+    """
+    Validate if a file is a valid image
+    Args:
+        image_path (str): Path to the image file
+    Returns:
+        bool: True if valid, False otherwise
+    """
+    try:
+        with Image.open(image_path) as img:
+            img.verify()
+        return True
+    except Exception as e:
+        logger.error(f"Image validation failed: {e}")
+        return False
+def resize_image(image_path: str, max_size: Tuple[int, int] = (1024, 1024)) -> Optional[str]:
+    """
+    Resize an image if it's larger than max_size
+    Args:
+        image_path (str): Path to the image file
+        max_size (Tuple[int, int]): Maximum width and height
+    Returns:
+        Optional[str]: Path to the resized image or None if failed
+    """
+    try:
+        with Image.open(image_path) as img:
+            # Only resize if the image is larger than max_size
+            if img.width > max_size[0] or img.height > max_size[1]:
+                # Calculate new size while maintaining aspect ratio
+                ratio = min(max_size[0] / img.width, max_size[1] / img.height)
+                new_size = (int(img.width * ratio), int(img.height * ratio))
+                # Resize the image
+                resized_img = img.resize(new_size, Image.LANCZOS)
+                # Save the resized image
+                resized_path = os.path.splitext(image_path)[0] + "_resized" + os.path.splitext(image_path)[1]
+                resized_img.save(resized_path)
+                return resized_path
+            # No need to resize
+            return image_path
+    except Exception as e:
+        logger.error(f"Image resizing failed: {e}")
+        return None
+def image_to_base64(image_path: str) -> Optional[str]:
+    """
+    Convert an image to base64 string
+    Args:
+        image_path (str): Path to the image file
+    Returns:
+        Optional[str]: Base64 encoded image string or None if failed
+    """
+    try:
+        with open(image_path, "rb") as image_file:
+            encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+            return encoded_string
+    except Exception as e:
+        logger.error(f"Base64 conversion failed: {e}")
+        return None