File size: 1,986 Bytes
5cf48c0 e6a90e9 5cf48c0 e6a90e9 5cf48c0 e6a90e9 a574ec0 e6a90e9 a574ec0 e6a90e9 c3adf17 5cf48c0 29082ec 5cf48c0 c3adf17 5cf48c0 c3adf17 e6a90e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from docx import Document
import mimetypes
import asyncio
import base64
load_dotenv()
async def preprocess_file(file_name: str):
mime_type = mimetypes.guess_type(file_name)[0]
if "image" in mime_type:
return await preprocess_image(file_name)
elif "video" in mime_type:
prompt = "Give a detailed description of the video."
elif "audio" in mime_type:
return await preprocess_audio(file_name)
else:
return await asyncio.to_thread(preprocess_text, file_name, mime_type)
async def preprocess_audio(file_name):
from agent.models import groq_client
transcription = await groq_client.audio.transcriptions.create(
model="whisper-large-v3-turbo",
file=open(file_name, "rb")
)
return transcription.text
async def preprocess_image(file_name: str):
from agent.models import llm_image
with open(file_name, "rb") as f:
img_b64 = base64.b64encode(f.read()).decode("utf-8")
response = await llm_image.ainvoke([HumanMessage(
content=[
{"type": "text", "text": "Please analyze this image and give detailed description."},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
},
]
)
]
)
return response.content
def preprocess_text(file_name, mime_type: str) -> str:
if "pdf" in mime_type:
reader = PdfReader(file_name)
return "\n".join((p.extract_text() or "") for p in reader.pages)
elif "document" in mime_type:
doc = Document(file_name)
return "\n".join(p.text for p in doc.paragraphs)
try:
with open(file_name, "r", encoding="utf-8") as file:
return file.read()
except Exception:
return "[Unsupported format]" |