OpenGeminiAPI / examples /quickstarts /Get_started_LiveAPI_NativeAudio.py
HoneyTian's picture
update
0ec61d2
# -*- coding: utf-8 -*-
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
https://ai.google.dev/gemini-api/docs/live?hl=zh-cn#python_1
https://github.com/google-gemini/cookbook/blob/main/quickstarts/Get_started_LiveAPI_NativeAudio.py
## Setup
To install the dependencies for this script, run:
```
brew install portaudio
pip install -U google-genai pyaudio
```
## API key
Ensure the `GOOGLE_API_KEY` environment variable is set to the api-key
you obtained from Google AI Studio.
## Run
To run the script:
```
python Get_started_LiveAPI_NativeAudio.py
```
Start talking to Gemini
"""
import asyncio
import os
import sys
import traceback
import pyaudio
from google import genai
from project_settings import environment
if sys.version_info < (3, 11, 0):
import taskgroup, exceptiongroup
asyncio.TaskGroup = taskgroup.TaskGroup
asyncio.ExceptionGroup = exceptiongroup.ExceptionGroup
FORMAT = pyaudio.paInt16
CHANNELS = 1
SEND_SAMPLE_RATE = 16000
RECEIVE_SAMPLE_RATE = 24000
CHUNK_SIZE = 1024
pya = pyaudio.PyAudio()
GOOGLE_API_KEY = environment.get("GEMINI_API_KEY")
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
client = genai.Client() # GOOGLE_API_KEY must be set as env variable
MODEL = "gemini-2.5-flash-preview-native-audio-dialog"
CONFIG = {"response_modalities": ["AUDIO"]}
class AudioLoop:
def __init__(self):
self.audio_in_queue = None
self.out_queue = None
self.session = None
self.audio_stream = None
self.receive_audio_task = None
self.play_audio_task = None
async def listen_audio(self):
mic_info = pya.get_default_input_device_info()
self.audio_stream = await asyncio.to_thread(
pya.open,
format=FORMAT,
channels=CHANNELS,
rate=SEND_SAMPLE_RATE,
input=True,
input_device_index=mic_info["index"],
frames_per_buffer=CHUNK_SIZE,
)
if __debug__:
kwargs = {"exception_on_overflow": False}
else:
kwargs = {}
while True:
data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, **kwargs)
await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
async def send_realtime(self):
while True:
msg = await self.out_queue.get()
await self.session.send_realtime_input(audio=msg)
async def receive_audio(self):
"Background task to reads from the websocket and write pcm chunks to the output queue"
while True:
turn = self.session.receive()
async for response in turn:
if data := response.data:
self.audio_in_queue.put_nowait(data)
continue
if text := response.text:
print(text, end="")
# If you interrupt the model, it sends a turn_complete.
# For interruptions to work, we need to stop playback.
# So empty out the audio queue because it may have loaded
# much more audio than has played yet.
while not self.audio_in_queue.empty():
self.audio_in_queue.get_nowait()
async def play_audio(self):
stream = await asyncio.to_thread(
pya.open,
format=FORMAT,
channels=CHANNELS,
rate=RECEIVE_SAMPLE_RATE,
output=True,
)
while True:
bytestream = await self.audio_in_queue.get()
await asyncio.to_thread(stream.write, bytestream)
async def run(self):
try:
async with (
client.aio.live.connect(model=MODEL, config=CONFIG) as session,
asyncio.TaskGroup() as tg,
):
self.session = session
self.audio_in_queue = asyncio.Queue()
self.out_queue = asyncio.Queue(maxsize=5)
tg.create_task(self.send_realtime())
tg.create_task(self.listen_audio())
tg.create_task(self.receive_audio())
tg.create_task(self.play_audio())
except asyncio.CancelledError:
pass
except ExceptionGroup as EG:
if self.audio_stream:
self.audio_stream.close()
traceback.print_exception(EG)
if __name__ == "__main__":
loop = AudioLoop()
asyncio.run(loop.run())