File size: 1,986 Bytes
5cf48c0
e6a90e9
 
 
 
 
5cf48c0
e6a90e9
 
 
 
 
 
5cf48c0
e6a90e9
 
 
a574ec0
e6a90e9
 
 
 
a574ec0
 
 
 
 
 
 
e6a90e9
c3adf17
5cf48c0
29082ec
5cf48c0
 
 
 
 
 
 
 
 
 
 
 
c3adf17
5cf48c0
c3adf17
e6a90e9
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from docx import Document
import mimetypes
import asyncio
import base64

load_dotenv()

async def preprocess_file(file_name: str):
    mime_type = mimetypes.guess_type(file_name)[0]
    if "image" in mime_type:
        return await preprocess_image(file_name)
    elif "video" in mime_type:
        prompt = "Give a detailed description of the video."
    elif "audio" in mime_type:
        return await preprocess_audio(file_name)
    else:
        return await asyncio.to_thread(preprocess_text, file_name, mime_type)


async def preprocess_audio(file_name):
    from agent.models import groq_client
    transcription = await groq_client.audio.transcriptions.create(
        model="whisper-large-v3-turbo",
        file=open(file_name, "rb")
    )
    return transcription.text


async def preprocess_image(file_name: str):
    from agent.models import llm_image
    with open(file_name, "rb") as f:
        img_b64 = base64.b64encode(f.read()).decode("utf-8")
    response = await llm_image.ainvoke([HumanMessage(
                content=[
                    {"type": "text", "text": "Please analyze this image and give detailed description."},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
                    },
                ]
            )
        ]
    )
    return response.content


def preprocess_text(file_name, mime_type: str) -> str:
    if "pdf" in mime_type:
        reader = PdfReader(file_name)
        return "\n".join((p.extract_text() or "") for p in reader.pages)
    elif "document" in mime_type:
        doc = Document(file_name)
        return "\n".join(p.text for p in doc.paragraphs)
    try:
        with open(file_name, "r", encoding="utf-8") as file:
            return file.read()
    except Exception:
        return "[Unsupported format]"