|
from fastapi import FastAPI, WebSocket |
|
from fastapi.responses import HTMLResponse |
|
from app.asr import create_recognizer, stream_audio |
|
|
|
import json |
|
from pydantic import BaseModel |
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
@app.get("/") |
|
async def root(): |
|
with open("app/index.html") as f: |
|
return HTMLResponse(f.read()) |
|
|
|
|
|
@app.websocket("/ws") |
|
async def websocket_endpoint(websocket: WebSocket): |
|
print("[DEBUG main] ▶ Attempting to accept WebSocket…") |
|
await websocket.accept() |
|
print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!") |
|
|
|
recognizer = None |
|
stream = None |
|
orig_sr = 48000 |
|
|
|
try: |
|
while True: |
|
data = await websocket.receive() |
|
kind = data.get("type") |
|
|
|
|
|
if kind not in ("websocket.receive", "websocket.receive_bytes"): |
|
print(f"[DEBUG main] Received control/frame: {data}") |
|
continue |
|
if kind == "websocket.receive" and "text" in data: |
|
raw = data["text"] |
|
try: |
|
config_msg = json.loads(raw) |
|
except Exception as e: |
|
print(f"[ERROR main] JSON parse failed: {e}") |
|
continue |
|
if config_msg.get("type") == "config": |
|
|
|
orig_sr = int(config_msg["sampleRate"]) |
|
print(f"[INFO main] Set original sample rate to {orig_sr}") |
|
|
|
|
|
recognizer = create_recognizer() |
|
stream = recognizer.create_stream() |
|
print("[INFO main] WebSocket connection accepted; created a streaming context.") |
|
continue |
|
|
|
|
|
if recognizer is None or stream is None: |
|
continue |
|
|
|
|
|
if kind == "websocket.receive" and "bytes" in data: |
|
raw_audio = data["bytes"] |
|
|
|
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) |
|
vol_to_send = min(rms, 1.0) |
|
|
|
|
|
await websocket.send_json({"partial": result, "volume": vol_to_send}) |
|
|
|
|
|
is_ep = recognizer.is_endpoint(stream) |
|
|
|
|
|
|
|
if is_ep: |
|
if result.strip(): |
|
print(f"[DEBUG main] Emitting final: {result!r}") |
|
await websocket.send_json({"final": result}) |
|
recognizer.reset(stream) |
|
continue |
|
|
|
elif kind == "websocket.receive_bytes": |
|
raw_audio = data["bytes"] |
|
|
|
|
|
|
|
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) |
|
|
|
vol_to_send = min(rms, 1.0) |
|
|
|
|
|
await websocket.send_json({ |
|
"partial": result, |
|
"volume": min(rms, 1.0) |
|
}) |
|
|
|
if recognizer.is_endpoint(stream): |
|
if result.strip(): |
|
await websocket.send_json({"final": result}) |
|
recognizer.reset(stream) |
|
except Exception as e: |
|
print(f"[ERROR main] Unexpected exception: {e}") |
|
try: |
|
await websocket.close() |
|
except: |
|
pass |
|
print("[INFO main] WebSocket closed, cleanup complete.") |
|
|
|
|
|
class TranslationRequest(BaseModel): |
|
text: str |
|
|
|
|
|
|
|
|
|
|
|
|
|
|