Spaces:
Runtime error
Runtime error
File size: 1,421 Bytes
e34581d ef2ca90 e34581d ef2ca90 e34581d ef2ca90 e34581d ef2ca90 e34581d ef2ca90 e34581d ef2ca90 e34581d ef2ca90 e34581d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import torchaudio
import torch
# ุชุญู
ูู ุงูู
ุนุงูุฌ ูุงูู
ูุฏูู ุงูุนุฑุจู
processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-arabic")
model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-arabic")
def speech_to_text(audio_path):
if audio_path is None:
raise ValueError("ุงูุตูุช ุบูุฑ ู
ูุฌูุฏ")
# ุชุญู
ูู ุงูู
ูู ุงูุตูุชู
waveform, sample_rate = torchaudio.load(audio_path)
# ุฅุฐุง ุงูุตูุช ุณุชูุฑูู ูุญููู ูู
ููู
if waveform.shape[0] > 1:
waveform = waveform.mean(dim=0).unsqueeze(0)
# ุฅุนุงุฏุฉ ุชุญููู ุงูุชุฑุฏุฏ ุฅูู 16000 ูู ูุงู ู
ุฎุชูู
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
waveform = resampler(waveform)
# ุชุฌููุฒ ุงูุฅุฏุฎุงู ูููู
ูุฐุฌ
input_values = processor(waveform.squeeze().numpy(), return_tensors="pt", sampling_rate=16000).input_values
# ุชู
ุฑูุฑ ุงูุจูุงูุงุช ูููู
ูุฐุฌ ูุงูุญุตูู ุนูู ุงููุชุงุฆุฌ
with torch.no_grad():
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
# ุชุญููู ุงูุชูุจุค ุฅูู ูุต
transcription = processor.batch_decode(predicted_ids)
return transcription[0]
|