Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,14 +10,18 @@ model = Wav2Vec2ForCTC.from_pretrained("nguyenvulebinh/wav2vec2-base-vietnamese-
|
|
10 |
def transcribe(audio):
|
11 |
if audio is None:
|
12 |
return "Không có âm thanh."
|
13 |
-
|
14 |
-
# Gradio trả về (sample_rate, numpy_array)
|
15 |
sample_rate, audio_data = audio
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
#
|
20 |
-
inputs = processor(audio_data, sampling_rate=
|
21 |
with torch.no_grad():
|
22 |
logits = model(**inputs).logits
|
23 |
predicted_ids = torch.argmax(logits, dim=-1)
|
|
|
10 |
def transcribe(audio):
|
11 |
if audio is None:
|
12 |
return "Không có âm thanh."
|
13 |
+
|
|
|
14 |
sample_rate, audio_data = audio
|
15 |
+
target_rate = 16000
|
16 |
+
|
17 |
+
# Nếu sample rate khác 16kHz thì chuyển về
|
18 |
+
if sample_rate != target_rate:
|
19 |
+
duration = len(audio_data) / sample_rate
|
20 |
+
new_length = int(duration * target_rate)
|
21 |
+
audio_data = resample(audio_data, new_length)
|
22 |
|
23 |
+
# Dự đoán
|
24 |
+
inputs = processor(audio_data, sampling_rate=target_rate, return_tensors="pt", padding=True)
|
25 |
with torch.no_grad():
|
26 |
logits = model(**inputs).logits
|
27 |
predicted_ids = torch.argmax(logits, dim=-1)
|