{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "code", "execution_count": null, "id": "1", "metadata": {}, "outputs": [], "source": [ "import argparse\n", "import os\n", "from pathlib import Path\n", "from typing import Annotated, List\n", "\n", "from dotenv import load_dotenv\n", "from langchain.tools import tool\n", "from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage\n", "from langchain_openai import ChatOpenAI\n", "from langgraph.graph import StateGraph, START\n", "from langgraph.graph.message import add_messages\n", "from langgraph.prebuilt import ToolNode, tools_condition\n", "from typing_extensions import TypedDict" ] }, { "cell_type": "code", "execution_count": null, "id": "2", "metadata": {}, "outputs": [], "source": [ "from dotenv import load_dotenv\n", "load_dotenv()\n", "\n", "import os\n", "os.environ[\"LANGSMITH_TRACING\"] = \"true\"" ] }, { "cell_type": "markdown", "id": "3", "metadata": {}, "source": [ "# Tools" ] }, { "cell_type": "code", "execution_count": null, "id": "4", "metadata": {}, "outputs": [], "source": [ "def transcribe_audio(audio_path: str) -> str:\n", " \"\"\"Transcribe the supplied audio file to text using the OpenAI Whisper API (``whisper-1``).\n", "\n", " Args:\n", " audio_path: The path to the audio file to transcribe.\n", "\n", " Returns:\n", " The transcribed text.\n", "\n", " Raises:\n", " RuntimeError: If the ``OPENAI_API_KEY`` environment variable is not set or the API call fails.\n", " \"\"\"\n", " if not Path(audio_path).exists():\n", " return f\"Error: Audio file not found at {audio_path}\"\n", "\n", " api_key = os.getenv(\"OPENAI_API_KEY\")\n", " if not api_key:\n", " raise RuntimeError(\"OPENAI_API_KEY environment variable is not set.\")\n", "\n", " try:\n", " from openai import OpenAI # type: ignore\n", "\n", " client = OpenAI(api_key=api_key)\n", " with open(audio_path, \"rb\") as f:\n", " transcription = client.audio.transcriptions.create(\n", " model=\"whisper-1\",\n", " file=f,\n", " )\n", " text: str | None = getattr(transcription, \"text\", None)\n", " if text:\n", " return text.strip()\n", " raise RuntimeError(\"Transcription response did not contain text.\")\n", " except Exception as exc:\n", " raise RuntimeError(f\"OpenAI transcription failed: {exc}\") from exc\n" ] }, { "cell_type": "markdown", "id": "5", "metadata": {}, "source": [ "# Agent" ] }, { "cell_type": "code", "execution_count": null, "id": "6", "metadata": {}, "outputs": [], "source": [ "class State(TypedDict):\n", " messages: Annotated[List[BaseMessage], add_messages]" ] }, { "cell_type": "code", "execution_count": null, "id": "7", "metadata": {}, "outputs": [], "source": [ "default_system_prompt = Path(\"system_promt.txt\").read_text(encoding=\"utf-8\") \n" ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": {}, "outputs": [], "source": [ "nebius_api_key = os.environ.get(\"NEBIUS_API_KEY\")\n", "llm = ChatOpenAI(\n", " model=\"Qwen/Qwen3-14B\",\n", " api_key=nebius_api_key,\n", " base_url=\"https://api.studio.nebius.com/v1/\",\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": {}, "outputs": [], "source": [ "tools = [transcribe_audio]\n", "llm_with_tools = llm.bind_tools(tools)" ] }, { "cell_type": "markdown", "id": "10", "metadata": {}, "source": [ "## Nodes" ] }, { "cell_type": "code", "execution_count": null, "id": "11", "metadata": {}, "outputs": [], "source": [ "def assistant_node(state: State):\n", " \"\"\"The assistant node in the graph. It calls the LLM with the current state\n", " to decide the next action (respond or call a tool).\n", " \"\"\"\n", " messages = state[\"messages\"]\n", " system_message = SystemMessage(content=default_system_prompt)\n", "\n", " if not messages or not isinstance(messages[0], SystemMessage):\n", " messages.insert(0, system_message)\n", "\n", " response = llm_with_tools.invoke(messages)\n", " return {\"messages\": [response]}\n" ] }, { "cell_type": "markdown", "id": "12", "metadata": {}, "source": [ "## Graph" ] }, { "cell_type": "code", "execution_count": null, "id": "13", "metadata": {}, "outputs": [], "source": [ "graph_builder = StateGraph(State)\n", "graph_builder.add_node(\"assistant\", assistant_node)\n", "graph_builder.add_node(\"tools\", ToolNode(tools))\n", "\n", "graph_builder.add_edge(START, \"assistant\")\n", "graph_builder.add_conditional_edges(\"assistant\", tools_condition)\n", "graph_builder.add_edge(\"tools\", \"assistant\")\n", "\n", "graph = graph_builder.compile()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "14", "metadata": {}, "outputs": [], "source": [ "# visualise\n", "from IPython.display import Image, display\n", "\n", "try:\n", " display(Image(graph.get_graph().draw_mermaid_png()))\n", "except Exception:\n", " # This requires some extra dependencies and is optional\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "id": "15", "metadata": {}, "outputs": [], "source": [ "question_text = \"What are the filling ingredients in the audio file?\"\n", "audio_path = \"questions/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3\"\n", "\n", "answer = graph.invoke({\"messages\": question_text, \"audio_path\": audio_path})\n" ] }, { "cell_type": "code", "execution_count": null, "id": "16", "metadata": {}, "outputs": [], "source": [ "# Show the messages\n", "for m in answer['messages']:\n", " m.pretty_print()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }