File size: 4,612 Bytes
e43b7f7
 
87359f8
f1801f1
e43b7f7
85dee70
e43b7f7
 
 
 
 
 
 
5ac8e9f
e43b7f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85dee70
e43b7f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85dee70
 
 
 
 
 
f1801f1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
from app.asr import create_recognizer, stream_audio
# from app.translation import tranlate_ks
import json
from pydantic import BaseModel

app = FastAPI()

# app.mount("/static", StaticFiles(directory="app/static"), name="static")

@app.get("/")
async def root():
    with open("app/index.html") as f:
        return HTMLResponse(f.read())


@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
    print("[DEBUG main] ▶ Attempting to accept WebSocket…")
    await websocket.accept()
    print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!")

    recognizer = None
    stream = None
    orig_sr = 48000  # default fallback

    try:
        while True:
            data = await websocket.receive()
            kind = data.get("type")

            # Handle config messages
            if kind not in ("websocket.receive", "websocket.receive_bytes"):
                            print(f"[DEBUG main] Received control/frame: {data}")
                            continue
            if kind == "websocket.receive" and "text" in data:
                raw = data["text"]
                try:
                    config_msg = json.loads(raw)
                except Exception as e:
                    print(f"[ERROR main] JSON parse failed: {e}")
                    continue
                if config_msg.get("type") == "config":
                    # 1) sample rate
                    orig_sr = int(config_msg["sampleRate"])
                    print(f"[INFO main] Set original sample rate to {orig_sr}")

                    # 2) create recognizer with endpoint settings & biasing
                    recognizer = create_recognizer()
                    stream = recognizer.create_stream()
                    print("[INFO main] WebSocket connection accepted; created a streaming context.")
                continue

            # Don't process audio until after config
            if recognizer is None or stream is None:
                continue

            # If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!)  
            if kind == "websocket.receive" and "bytes" in data:
                raw_audio = data["bytes"]
                # print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes")
                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)
                vol_to_send = min(rms, 1.0)
                # print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")
                # 1) send the interim
                await websocket.send_json({"partial": result, "volume": vol_to_send})

                # 2) DEBUG: log when endpoint is seen
                is_ep = recognizer.is_endpoint(stream)
                # print(f"[DEBUG main] is_endpoint={is_ep}")

                # 3) if endpoint, emit final and reset
                if is_ep:
                    if result.strip():
                        print(f"[DEBUG main] Emitting final: {result!r}")
                        await websocket.send_json({"final": result})
                    recognizer.reset(stream)
                    continue

            elif kind == "websocket.receive_bytes":
                raw_audio = data["bytes"]
                # print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes")

                # This will also print its own debug info (see asr_worker.py)
                result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr)

                vol_to_send = min(rms, 1.0)
                # print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}")

                await websocket.send_json({
                    "partial": result,
                    "volume": min(rms, 1.0)
                })
                # -- INSERT: emit final on endpoint detection --
                if recognizer.is_endpoint(stream):
                    if result.strip():
                        await websocket.send_json({"final": result})
                    recognizer.reset(stream)
    except Exception as e:
        print(f"[ERROR main] Unexpected exception: {e}")
        try:
            await websocket.close()
        except:
            pass
        print("[INFO main] WebSocket closed, cleanup complete.")


class TranslationRequest(BaseModel):
    text: str


# @app.post("/translate")
# async def translate_text(req: TranslationRequest):
#     print('[Translation] Request recieved . . . ')
#     return tranlate_ks(req.text)