from langchain_core.messages import HumanMessage from dotenv import load_dotenv from PyPDF2 import PdfReader from docx import Document import mimetypes import asyncio import base64 load_dotenv() async def preprocess_file(file_name: str): mime_type = mimetypes.guess_type(file_name)[0] if "image" in mime_type: return await preprocess_image(file_name) elif "video" in mime_type: prompt = "Give a detailed description of the video." elif "audio" in mime_type: return await preprocess_audio(file_name) else: return await asyncio.to_thread(preprocess_text, file_name, mime_type) async def preprocess_audio(file_name): from agent.models import groq_client transcription = await groq_client.audio.transcriptions.create( model="whisper-large-v3-turbo", file=open(file_name, "rb") ) return transcription.text async def preprocess_image(file_name: str): from agent.models import llm_image with open(file_name, "rb") as f: img_b64 = base64.b64encode(f.read()).decode("utf-8") response = await llm_image.ainvoke([HumanMessage( content=[ {"type": "text", "text": "Please analyze this image and give detailed description."}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"} }, ] ) ] ) return response.content def preprocess_text(file_name, mime_type: str) -> str: if "pdf" in mime_type: reader = PdfReader(file_name) return "\n".join((p.extract_text() or "") for p in reader.pages) elif "document" in mime_type: doc = Document(file_name) return "\n".join(p.text for p in doc.paragraphs) try: with open(file_name, "r", encoding="utf-8") as file: return file.read() except Exception: return "[Unsupported format]"