Spaces:
Sleeping
Sleeping
File size: 2,692 Bytes
280ab37 80f6579 280ab37 80f6579 280ab37 80f6579 280ab37 9a35455 280ab37 80f6579 280ab37 80f6579 9a35455 80f6579 280ab37 9a35455 80f6579 9a35455 80f6579 280ab37 9a35455 280ab37 9a35455 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from difflib import SequenceMatcher
import tempfile
import os
def tts(word):
tts = gTTS(text=word, lang='en')
temp_file_path = tempfile.mktemp(suffix=".mp3")
tts.save(temp_file_path)
return temp_file_path
def recognize_speech_from_microphone(audio_path):
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "μμ±μ μ΄ν΄ν μ μμ΄μ. λ€μ λ§ν΄ μ£ΌμΈμ! π§"
except sr.RequestError as e:
return f"μμ± μΈμ μλΉμ€μ μ°κ²°ν μ μμ΄μ. π’ μ€λ₯: {e}"
except Exception as e:
return str(e)
def calculate_similarity(word, recognized_text):
return SequenceMatcher(None, word.lower(), recognized_text.lower()).ratio() * 100
def process_audio(word, audio_path):
recognized_text = recognize_speech_from_microphone(audio_path)
if "μ€λ₯" in recognized_text or "μμ΄μ" in recognized_text:
return recognized_text, 0.0
similarity = calculate_similarity(word, recognized_text)
return recognized_text, similarity
def evaluate_pronunciation(word):
temp_file_path = tts(word)
return temp_file_path
def process_all(word, audio_path):
recognized_text, similarity = process_audio(word, audio_path)
return recognized_text, similarity, audio_path
with gr.Blocks(css="body {background-color: #FFFAF0; font-family: 'Comic Sans MS', cursive;} .title {font-size: 24px; text-align: center; color: #FF69B4;}") as demo:
gr.Markdown("<h1 class='title'>π€ μ¬λ―Έμλ λ°μ μ°μ΅ πΆ</h1>")
with gr.Row():
word_input = gr.Textbox(label="μ°μ΅ν λ¨μ΄λ₯Ό μ
λ ₯νμΈμ! βοΈ")
tts_button = gr.Button("π λ£κΈ°")
tts_audio = gr.Audio(label="π μμ΄λ―Ό λ°μ", type="filepath", interactive=True)
with gr.Row():
mic_input = gr.Audio(label="ποΈ λ΄ λ°μ λ
Ήμ", type="filepath", interactive=True)
result_button = gr.Button("π νκ°νκΈ°")
recognized_text = gr.Textbox(label="π μΈμλ λ¨μ΄")
similarity_score = gr.Number(label="π― μ νλ (%)")
mic_audio_playback = gr.Audio(label="π§ λ΄ λ°μ λ€μ λ£κΈ°", type="filepath", interactive=True)
tts_button.click(evaluate_pronunciation, inputs=word_input, outputs=tts_audio)
result_button.click(process_all, inputs=[word_input, mic_input], outputs=[recognized_text, similarity_score, mic_audio_playback])
demo.launch() |