MultiagentPersonalAssistant / agent /file_preprocessing.py
Maga222006
MultiagentPersonalAssistant
a574ec0
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from docx import Document
import mimetypes
import asyncio
import base64
load_dotenv()
async def preprocess_file(file_name: str):
mime_type = mimetypes.guess_type(file_name)[0]
if "image" in mime_type:
return await preprocess_image(file_name)
elif "video" in mime_type:
prompt = "Give a detailed description of the video."
elif "audio" in mime_type:
return await preprocess_audio(file_name)
else:
return await asyncio.to_thread(preprocess_text, file_name, mime_type)
async def preprocess_audio(file_name):
from agent.models import groq_client
transcription = await groq_client.audio.transcriptions.create(
model="whisper-large-v3-turbo",
file=open(file_name, "rb")
)
return transcription.text
async def preprocess_image(file_name: str):
from agent.models import llm_image
with open(file_name, "rb") as f:
img_b64 = base64.b64encode(f.read()).decode("utf-8")
response = await llm_image.ainvoke([HumanMessage(
content=[
{"type": "text", "text": "Please analyze this image and give detailed description."},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
},
]
)
]
)
return response.content
def preprocess_text(file_name, mime_type: str) -> str:
if "pdf" in mime_type:
reader = PdfReader(file_name)
return "\n".join((p.extract_text() or "") for p in reader.pages)
elif "document" in mime_type:
doc = Document(file_name)
return "\n".join(p.text for p in doc.paragraphs)
try:
with open(file_name, "r", encoding="utf-8") as file:
return file.read()
except Exception:
return "[Unsupported format]"