|
from langchain_core.messages import HumanMessage |
|
from dotenv import load_dotenv |
|
from PyPDF2 import PdfReader |
|
from docx import Document |
|
import mimetypes |
|
import asyncio |
|
import base64 |
|
|
|
load_dotenv() |
|
|
|
async def preprocess_file(file_name: str): |
|
mime_type = mimetypes.guess_type(file_name)[0] |
|
if "image" in mime_type: |
|
return await preprocess_image(file_name) |
|
elif "video" in mime_type: |
|
prompt = "Give a detailed description of the video." |
|
elif "audio" in mime_type: |
|
return await preprocess_audio(file_name) |
|
else: |
|
return await asyncio.to_thread(preprocess_text, file_name, mime_type) |
|
|
|
|
|
async def preprocess_audio(file_name): |
|
from agent.models import groq_client |
|
transcription = await groq_client.audio.transcriptions.create( |
|
model="whisper-large-v3-turbo", |
|
file=open(file_name, "rb") |
|
) |
|
return transcription.text |
|
|
|
|
|
async def preprocess_image(file_name: str): |
|
from agent.models import llm_image |
|
with open(file_name, "rb") as f: |
|
img_b64 = base64.b64encode(f.read()).decode("utf-8") |
|
response = await llm_image.ainvoke([HumanMessage( |
|
content=[ |
|
{"type": "text", "text": "Please analyze this image and give detailed description."}, |
|
{ |
|
"type": "image_url", |
|
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"} |
|
}, |
|
] |
|
) |
|
] |
|
) |
|
return response.content |
|
|
|
|
|
def preprocess_text(file_name, mime_type: str) -> str: |
|
if "pdf" in mime_type: |
|
reader = PdfReader(file_name) |
|
return "\n".join((p.extract_text() or "") for p in reader.pages) |
|
elif "document" in mime_type: |
|
doc = Document(file_name) |
|
return "\n".join(p.text for p in doc.paragraphs) |
|
try: |
|
with open(file_name, "r", encoding="utf-8") as file: |
|
return file.read() |
|
except Exception: |
|
return "[Unsupported format]" |