File size: 2,692 Bytes
280ab37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80f6579
280ab37
80f6579
280ab37
 
 
 
 
 
 
 
80f6579
280ab37
 
 
 
 
 
 
 
 
 
9a35455
280ab37
80f6579
 
 
280ab37
80f6579
 
9a35455
80f6579
280ab37
9a35455
80f6579
 
 
 
9a35455
80f6579
280ab37
9a35455
280ab37
9a35455
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from difflib import SequenceMatcher
import tempfile
import os

def tts(word):
    tts = gTTS(text=word, lang='en')
    temp_file_path = tempfile.mktemp(suffix=".mp3")
    tts.save(temp_file_path)
    return temp_file_path

def recognize_speech_from_microphone(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return "μŒμ„±μ„ 이해할 수 μ—†μ–΄μš”. λ‹€μ‹œ 말해 μ£Όμ„Έμš”! 🧐"
    except sr.RequestError as e:
        return f"μŒμ„± 인식 μ„œλΉ„μŠ€μ— μ—°κ²°ν•  수 μ—†μ–΄μš”. 😒 였λ₯˜: {e}"
    except Exception as e:
        return str(e)

def calculate_similarity(word, recognized_text):
    return SequenceMatcher(None, word.lower(), recognized_text.lower()).ratio() * 100

def process_audio(word, audio_path):
    recognized_text = recognize_speech_from_microphone(audio_path)
    if "였λ₯˜" in recognized_text or "μ—†μ–΄μš”" in recognized_text:
        return recognized_text, 0.0
    similarity = calculate_similarity(word, recognized_text)
    return recognized_text, similarity

def evaluate_pronunciation(word):
    temp_file_path = tts(word)
    return temp_file_path

def process_all(word, audio_path):
    recognized_text, similarity = process_audio(word, audio_path)
    return recognized_text, similarity, audio_path

with gr.Blocks(css="body {background-color: #FFFAF0; font-family: 'Comic Sans MS', cursive;} .title {font-size: 24px; text-align: center; color: #FF69B4;}") as demo:
    gr.Markdown("<h1 class='title'>🎀 μž¬λ―ΈμžˆλŠ” 발음 μ—°μŠ΅ 🎢</h1>")
    
    with gr.Row():
        word_input = gr.Textbox(label="μ—°μŠ΅ν•  단어λ₯Ό μž…λ ₯ν•˜μ„Έμš”! ✏️")
        tts_button = gr.Button("πŸ‘‚ λ“£κΈ°")
    tts_audio = gr.Audio(label="πŸ”Š 원어민 발음", type="filepath", interactive=True)
    
    with gr.Row():
        mic_input = gr.Audio(label="πŸŽ™οΈ λ‚΄ 발음 λ…ΉμŒ", type="filepath", interactive=True)
        result_button = gr.Button("πŸ“Š ν‰κ°€ν•˜κΈ°")
    
    recognized_text = gr.Textbox(label="πŸ“– μΈμ‹λœ 단어")
    similarity_score = gr.Number(label="🎯 정확도 (%)")
    mic_audio_playback = gr.Audio(label="🎧 λ‚΄ 발음 λ‹€μ‹œ λ“£κΈ°", type="filepath", interactive=True)
    
    tts_button.click(evaluate_pronunciation, inputs=word_input, outputs=tts_audio)
    result_button.click(process_all, inputs=[word_input, mic_input], outputs=[recognized_text, similarity_score, mic_audio_playback])

demo.launch()