# Setup

In [None]:
import argparse
import os
from pathlib import Path
from typing import Annotated, List

from dotenv import load_dotenv
from langchain.tools import tool
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, START
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition
from typing_extensions import TypedDict

In [None]:
from dotenv import load_dotenv
load_dotenv()

import os
os.environ["LANGSMITH_TRACING"] = "true"

# Tools

In [None]:
def transcribe_audio(audio_path: str) -> str:
 """Transcribe the supplied audio file to text using the OpenAI Whisper API (``whisper-1``).

 Args:
 audio_path: The path to the audio file to transcribe.

 Returns:
 The transcribed text.

 Raises:
 RuntimeError: If the ``OPENAI_API_KEY`` environment variable is not set or the API call fails.
 """
 if not Path(audio_path).exists():
 return f"Error: Audio file not found at {audio_path}"

 api_key = os.getenv("OPENAI_API_KEY")
 if not api_key:
 raise RuntimeError("OPENAI_API_KEY environment variable is not set.")

 try:
 from openai import OpenAI # type: ignore

 client = OpenAI(api_key=api_key)
 with open(audio_path, "rb") as f:
 transcription = client.audio.transcriptions.create(
 model="whisper-1",
 file=f,
 )
 text: str | None = getattr(transcription, "text", None)
 if text:
 return text.strip()
 raise RuntimeError("Transcription response did not contain text.")
 except Exception as exc:
 raise RuntimeError(f"OpenAI transcription failed: {exc}") from exc


# Agent

In [None]:
class State(TypedDict):
 messages: Annotated[List[BaseMessage], add_messages]

In [None]:
default_system_prompt = Path("system_promt.txt").read_text(encoding="utf-8") 


In [None]:
nebius_api_key = os.environ.get("NEBIUS_API_KEY")
llm = ChatOpenAI(
 model="Qwen/Qwen3-14B",
 api_key=nebius_api_key,
 base_url="https://api.studio.nebius.com/v1/",
 )

In [None]:
tools = [transcribe_audio]
llm_with_tools = llm.bind_tools(tools)

## Nodes

In [None]:
def assistant_node(state: State):
 """The assistant node in the graph. It calls the LLM with the current state
 to decide the next action (respond or call a tool).
 """
 messages = state["messages"]
 system_message = SystemMessage(content=default_system_prompt)

 if not messages or not isinstance(messages[0], SystemMessage):
 messages.insert(0, system_message)

 response = llm_with_tools.invoke(messages)
 return {"messages": [response]}


## Graph

In [None]:
graph_builder = StateGraph(State)
graph_builder.add_node("assistant", assistant_node)
graph_builder.add_node("tools", ToolNode(tools))

graph_builder.add_edge(START, "assistant")
graph_builder.add_conditional_edges("assistant", tools_condition)
graph_builder.add_edge("tools", "assistant")

graph = graph_builder.compile()


In [None]:
# visualise
from IPython.display import Image, display

try:
 display(Image(graph.get_graph().draw_mermaid_png()))
except Exception:
 # This requires some extra dependencies and is optional
 pass

In [None]:
question_text = "What are the filling ingredients in the audio file?"
audio_path = "questions/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"

answer = graph.invoke({"messages": question_text, "audio_path": audio_path})


In [None]:
# Show the messages
for m in answer['messages']:
 m.pretty_print()