File size: 4,612 Bytes
e43b7f7 87359f8 f1801f1 e43b7f7 85dee70 e43b7f7 5ac8e9f e43b7f7 85dee70 e43b7f7 85dee70 f1801f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
from app.asr import create_recognizer, stream_audio
# from app.translation import tranlate_ks
import json
from pydantic import BaseModel
app = FastAPI()
# app.mount("/static", StaticFiles(directory="app/static"), name="static")
@app.get("/")
async def root():
with open("app/index.html") as f:
return HTMLResponse(f.read())
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
print("[DEBUG main] ▶ Attempting to accept WebSocket…")
await websocket.accept()
print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!")
recognizer = None
stream = None
orig_sr = 48000 # default fallback
try:
while True:
data = await websocket.receive()
kind = data.get("type")
# Handle config messages
if kind not in ("websocket.receive", "websocket.receive_bytes"):
print(f"[DEBUG main] Received control/frame: {data}")
continue
if kind == "websocket.receive" and "text" in data:
raw = data["text"]
try:
config_msg = json.loads(raw)
except Exception as e:
print(f"[ERROR main] JSON parse failed: {e}")
continue
if config_msg.get("type") == "config":
# 1) sample rate
orig_sr = int(config_msg["sampleRate"])
print(f"[INFO main] Set original sample rate to {orig_sr}")
# 2) create recognizer with endpoint settings & biasing
recognizer = create_recognizer()
stream = recognizer.create_stream()
print("[INFO main] WebSocket connection accepted; created a streaming context.")
continue
# Don't process audio until after config
if recognizer is None or stream is None:
continue
# If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!)
if kind == "websocket.receive" and "bytes" in data:
raw_audio = data["bytes"]
# print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes")
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
vol_to_send = min(rms, 1.0)
# print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
# 1) send the interim
await websocket.send_json({"partial": result, "volume": vol_to_send})
# 2) DEBUG: log when endpoint is seen
is_ep = recognizer.is_endpoint(stream)
# print(f"[DEBUG main] is_endpoint={is_ep}")
# 3) if endpoint, emit final and reset
if is_ep:
if result.strip():
print(f"[DEBUG main] Emitting final: {result!r}")
await websocket.send_json({"final": result})
recognizer.reset(stream)
continue
elif kind == "websocket.receive_bytes":
raw_audio = data["bytes"]
# print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")
# This will also print its own debug info (see asr_worker.py)
result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
vol_to_send = min(rms, 1.0)
# print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
await websocket.send_json({
"partial": result,
"volume": min(rms, 1.0)
})
# -- INSERT: emit final on endpoint detection --
if recognizer.is_endpoint(stream):
if result.strip():
await websocket.send_json({"final": result})
recognizer.reset(stream)
except Exception as e:
print(f"[ERROR main] Unexpected exception: {e}")
try:
await websocket.close()
except:
pass
print("[INFO main] WebSocket closed, cleanup complete.")
class TranslationRequest(BaseModel):
text: str
# @app.post("/translate")
# async def translate_text(req: TranslationRequest):
# print('[Translation] Request recieved . . . ')
# return tranlate_ks(req.text)
|