File size: 11,598 Bytes
f85af8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# espeak.py
import os
import json
import time
import gradio as gr
import speech_recognition as sr
import pyttsx3
import threading

from typing import Tuple

# Try importing OpenAI; if not present app will use local model fallback
USE_OPENAI = bool(os.getenv("OPENAI_API_KEY", "").strip())
if USE_OPENAI:
    import openai

# Local model fallback (T5-based)
try:
    import torch
    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
    from happytransformer import HappyTextToText, TTSettings
    LOCAL_MODEL_AVAILABLE = True
except Exception:
    LOCAL_MODEL_AVAILABLE = False

# Optional: Levenshtein for better scoring
try:
    import Levenshtein
    _have_lev = True
except Exception:
    _have_lev = False

APP_TITLE = "ESPeak β€” AI Grammar & Speech Assistant"

# ----------------------
# Utilities
# ----------------------
def levenshtein_distance(a: str, b: str) -> int:
    if _have_lev:
        return Levenshtein.distance(a, b)
    # fallback simple DP (O(len(a)*len(b))) β€” fine for short sentences
    la, lb = len(a), len(b)
    if la == 0: return lb
    if lb == 0: return la
    dp = [[0]*(lb+1) for _ in range(la+1)]
    for i in range(la+1):
        dp[i][0] = i
    for j in range(lb+1):
        dp[0][j] = j
    for i in range(1, la+1):
        for j in range(1, lb+1):
            cost = 0 if a[i-1]==b[j-1] else 1
            dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+cost)
    return dp[la][lb]

def score_from_edit(orig: str, corrected: str) -> int:
    # Compute a simple score: smaller edit distance -> higher score
    if not orig.strip():
        return 0
    dist = levenshtein_distance(orig, corrected)
    # Normalize by length, clamp to [0,100]
    norm = max(len(orig), 1)
    ratio = max(0.0, 1.0 - dist / norm)
    score = int(round(ratio * 100))
    return score

# ----------------------
# Model loading
# ----------------------
tokenizer = model = happy_tt = None
if not USE_OPENAI and LOCAL_MODEL_AVAILABLE:
    def load_local_models():
        global tokenizer, model, happy_tt
        model_name = "prithivida/grammar_error_correcter_v1"
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
            happy_tt = HappyTextToText("T5", model_name)
        except Exception as e:
            print("Local model load failed:", e)
            raise
    load_local_models()

# ----------------------
# Speech transcription helper (speech_recognition)
# ----------------------
def transcribe_audio_file(audio_filepath: str) -> str:
    r = sr.Recognizer()
    try:
        with sr.AudioFile(audio_filepath) as source:
            audio_data = r.record(source)
        text = r.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return ""
    except Exception as e:
        return f"[transcription_error]: {str(e)}"

# ----------------------
# LLM connectors
# ----------------------
OPENAI_PROMPT_SYSTEM = (
    "You are ESPeak Assistant β€” expert grammar corrector. "
    "Return JSON only with keys: corrected_text (string), score (0-100 integer), explanation (short string)."
)

OPENAI_USER_TEMPLATE = (
    "Correct this sentence for grammar, punctuation, and clarity while preserving tone:\n\n"
    "### INPUT\n{input_text}\n\n"
    "Return only JSON with corrected_text, score, and explanation."
)

def call_openai_correct(text: str) -> Tuple[str,int,str]:
    messages = [
        {"role":"system", "content": OPENAI_PROMPT_SYSTEM},
        {"role":"user", "content": OPENAI_USER_TEMPLATE.format(input_text=text)}
    ]
    resp = openai.ChatCompletion.create(
        model="gpt-4o-mini" if "gpt-4o-mini" in openai.Model.list() else "gpt-4",
        messages=messages,
        temperature=0.0,
        max_tokens=300
    )
    content = resp["choices"][0]["message"]["content"].strip()
    # Try to parse JSON from response
    try:
        parsed = json.loads(content)
        corrected = parsed.get("corrected_text", "")
        score = int(parsed.get("score", score_from_edit(text, corrected)))
        explanation = parsed.get("explanation", "")
        return corrected, score, explanation
    except Exception:
        # fallback: use raw content β€” try to extract a JSON substring
        try:
            start = content.index("{")
            end = content.rindex("}")+1
            data = json.loads(content[start:end])
            corrected = data.get("corrected_text","")
            score = int(data.get("score", score_from_edit(text, corrected)))
            explanation = data.get("explanation","")
            return corrected, score, explanation
        except Exception:
            # Last resort: return plain corrected via model-less heuristic
            corrected = content
            score = score_from_edit(text, corrected)
            explanation = "Auto-correction from OpenAI; parsing fallback used."
            return corrected, score, explanation

def call_local_correct(text: str) -> Tuple[str,int,str]:
    # Using prithivida T5 model and HappyTransformer to generate correction
    prefix = "gec: " + text
    # generate with transformers (fast)
    try:
        inputs = tokenizer.encode(prefix, return_tensors="pt", max_length=256, truncation=True)
        with torch.no_grad():
            outputs = model.generate(inputs, max_length=256, num_beams=4)
        corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception:
        corrected = text

    # use happy transformer to also generate explanation-like correction (best-effort)
    try:
        args = TTSettings(num_beams=4, min_length=1)
        happy_out = happy_tt.generate_text(prefix, args=args).text
        # If happy returns something meaningful, prefer it to compute score
        alt_correction = happy_out or corrected
    except Exception:
        alt_correction = corrected

    score = score_from_edit(text, alt_correction)
    # Basic explanation: detect what changed (very short)
    explanation = []
    if text.strip() == alt_correction.strip():
        explanation = ["No change needed."]
    else:
        explanation = ["Adjusted grammar/punctuation; minor wording edits to improve clarity."]
    return alt_correction, score, "; ".join(explanation)

# ----------------------
# Main processing function
# ----------------------
def process_input(audio, typed_text, use_tts=False, prefer_openai=False):
    """
    audio: filepath from Gradio (or None)
    typed_text: str
    use_tts: bool -> read corrected text with local pyttsx3
    prefer_openai: triage flag to prefer OpenAI (if key available)
    """
    source_text = ""
    # 1) Transcribe audio if present
    if audio:
        transcribed = transcribe_audio_file(audio)
        if transcribed.startswith("[transcription_error]"):
            source_text = typed_text or ""
            trans_msg = transcribed
        else:
            source_text = transcribed
            trans_msg = f"Transcribed: {transcribed}"
    else:
        source_text = typed_text or ""
        trans_msg = "Typed input"

    if not source_text.strip():
        return "No input detected.", 0, "No correction (empty input).", trans_msg, json.dumps({})

    # 2) Choose backend
    use_openai_backend = False
    if USE_OPENAI and prefer_openai:
        use_openai_backend = True
    elif USE_OPENAI and not LOCAL_MODEL_AVAILABLE:
        use_openai_backend = True
    elif not USE_OPENAI and LOCAL_MODEL_AVAILABLE:
        use_openai_backend = False
    elif USE_OPENAI and LOCAL_MODEL_AVAILABLE:
        # default: prefer OpenAI if available (more robust), unless user opts out
        use_openai_backend = prefer_openai or True

    try:
        if use_openai_backend:
            corrected, score, explanation = call_openai_correct(source_text)
        else:
            corrected, score, explanation = call_local_correct(source_text)
    except Exception as e:
        # fallback to local heuristic if something fails
        corrected = source_text
        score = 0
        explanation = f"Model error: {e}"

    # 3) Optionally speak corrected text (pyttsx3)
    tts_msg = ""
    if use_tts:
        try:
            def speak(text):
                engine = pyttsx3.init()
                engine.say(text)
                engine.runAndWait()
            threading.Thread(target=speak, args=(corrected,), daemon=True).start()
            tts_msg = "Speaking corrected text..."
        except Exception as e:
            tts_msg = f"TTS failed: {e}"

    # 4) Build JSON metadata
    meta = {
        "original": source_text,
        "corrected": corrected,
        "score": score,
        "explanation": explanation,
        "backend": "openai" if use_openai_backend else "local",
        "transcription_note": trans_msg,
        "timestamp": int(time.time())
    }

    return corrected, score, explanation, trans_msg + (" β€’ " + tts_msg if tts_msg else ""), json.dumps(meta, ensure_ascii=False, indent=2)

# ----------------------
# Gradio UI
# ----------------------
def build_ui():
    with gr.Blocks(title=APP_TITLE, css="""
        .header {background: linear-gradient(90deg,#ff8fa3,#ff6aa3); padding: 18px; border-radius: 12px; color:white}
        .muted {color: #6b7280}
    """) as demo:
        # Header
        with gr.Row(elem_id="top-row"):
            with gr.Column(scale=3):
                gr.Markdown(f"## <div class='header'>ESPeak β€” AI Grammar & Speech Assistant</div>")
                gr.Markdown("Speak or type a sentence β€” ESPeak will correct grammar, score it, and explain changes. Use OpenAI backend if you set `OPENAI_API_KEY` in environment.")
            with gr.Column(scale=1):
                gr.Markdown("**Quick tips**\n- Speak clearly (short sentences work best)\n- Toggle TTS to hear the corrected sentence\n- Use `Prefer OpenAI` to route to ChatGPT if available")
        gr.Markdown("---")

        with gr.Row():
            with gr.Column(scale=1):
                audio = gr.Audio(sources="microphone", type="filepath", label="Record (microphone)")
                typed = gr.Textbox(lines=3, placeholder="Or type your sentence here...", label="Text input")
                with gr.Row():
                    tts_checkbox = gr.Checkbox(label="Play corrected (TTS)", value=False)
                    prefer_openai = gr.Checkbox(label="Prefer OpenAI backend (if available)", value=True)
                run_btn = gr.Button("Check Grammar", variant="primary")
            with gr.Column(scale=2):
                corrected_out = gr.Textbox(label="Corrected Text", interactive=False)
                score_out = gr.Number(label="Grammar Score (0-100)", interactive=False)
                explanation_out = gr.Textbox(label="Explanation (what I changed)", interactive=False)
                trans_note = gr.Textbox(label="Transcription / Info", interactive=False)
                meta_out = gr.Code(label="JSON metadata (copyable)", language="json")

        def on_submit(audio_file, typed_text, use_tts, use_openai):
            return process_input(audio_file, typed_text, use_tts, use_openai)

        run_btn.click(on_submit, inputs=[audio, typed, tts_checkbox, prefer_openai],
                      outputs=[corrected_out, score_out, explanation_out, trans_note, meta_out])

        gr.Markdown("---")
        gr.Markdown("**ESPeak** Β· Built for quick grammar checking of spoken and typed English. Designed for demos and interview projects.")
    return demo

if __name__ == "__main__":
    demo = build_ui()
    demo.launch(share=False, inbrowser=True)