Spaces:
Sleeping
Sleeping
| import torch | |
| import torchaudio | |
| import gradio as gr | |
| import torch.nn.functional as F | |
| from transformers import WavLMForXVector, Wav2Vec2FeatureExtractor | |
| # 準備模型 | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = WavLMForXVector.from_pretrained("microsoft/wavlm-base-sv").to(device) | |
| feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("microsoft/wavlm-base-sv") | |
| # 音訊處理函式 | |
| def preprocess(audio): | |
| if audio is None: | |
| return None | |
| waveform, sr = torchaudio.load(audio) | |
| if sr != 16000: | |
| waveform = torchaudio.functional.resample(waveform, sr, 16000) | |
| return waveform.squeeze(0) | |
| # 取得 normalized embedding | |
| def get_embedding(waveform): | |
| inputs = feature_extractor(waveform.numpy(), sampling_rate=16000, return_tensors="pt", padding=True) | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| embedding = model(**inputs).embeddings | |
| return F.normalize(embedding, p=2, dim=1) | |
| # 主處理函式 | |
| def compare_audio(native_audio, user_audio): | |
| native_wav = preprocess(native_audio) | |
| user_wav = preprocess(user_audio) | |
| if native_wav is None or user_wav is None: | |
| return "請上傳兩段語音" | |
| emb1 = get_embedding(native_wav) | |
| emb2 = get_embedding(user_wav) | |
| similarity = F.cosine_similarity(emb1, emb2).item() | |
| score = round(similarity * 100, 2) # 轉換為 0~100 分數 | |
| # 評語 | |
| if score > 90: | |
| feedback = "非常接近!你模仿得很好 👏" | |
| elif score > 75: | |
| feedback = "不錯,再接再厲 👍" | |
| elif score > 60: | |
| feedback = "有些相似,但還有改進空間 🙂" | |
| else: | |
| feedback = "相似度不高,請再試一次 😅" | |
| return f"相似度分數:{score}/100\n{feedback}" | |
| # Gradio UI | |
| title = "🎤 語音模仿評分器" | |
| description = "上傳 native speaker 的語音,以及你模仿的語音,系統會幫你評分你的發音相似度。" | |
| demo = gr.Interface( | |
| fn=compare_audio, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="📢 Native Speaker 語音"), | |
| gr.Audio(type="filepath", label="🗣️ 你的模仿錄音"), | |
| ], | |
| outputs="text", | |
| title=title, | |
| description=description, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |