| | import librosa, joblib, numpy as np, gradio as gr |
| | from scipy.interpolate import interp1d |
| | from pyAudioAnalysis import ShortTermFeatures |
| | from pydub.silence import detect_nonsilent |
| | from pydub import AudioSegment |
| |
|
| |
|
| | def smart_resize(arr, target_size): |
| | current_size = arr.shape[1] |
| |
|
| | current_idx = np.linspace(0, current_size - 1, current_size) |
| | target_idx = np.linspace(0, current_size - 1, target_size) |
| |
|
| | |
| | interp_func = interp1d(current_idx, arr.squeeze(), kind='linear', fill_value="extrapolate") |
| | resized_arr = interp_func(target_idx) |
| | |
| | return resized_arr.reshape(1, target_size) |
| |
|
| | def remove_silence(wav_file): |
| | audSeg = AudioSegment.from_wav(wav_file) |
| | non_silence_ranges = detect_nonsilent(audSeg, min_silence_len=5, silence_thresh=-30) |
| |
|
| | if not non_silence_ranges: |
| | sound = audSeg |
| | else: |
| | start = non_silence_ranges[0][0] |
| | end = non_silence_ranges[-1][1] |
| | trimmed_sound = audSeg[start:end] |
| | sound = trimmed_sound |
| |
|
| | sound.export('audio.wav', format="wav") |
| |
|
| | def transform_data(audio): |
| | remove_silence(audio) |
| | x, sr = librosa.load('audio.wav') |
| |
|
| | result, f_names = ShortTermFeatures.feature_extraction(x, sr, 0.050*sr, 0.025*sr) |
| |
|
| | resize_features = smart_resize(result.reshape(1,-1), 20) |
| |
|
| | return resize_features |
| |
|
| | def predict(newdf, loaded_model): |
| | |
| | prediction = loaded_model.predict(newdf) |
| |
|
| | proba = loaded_model.predict_proba(newdf) |
| |
|
| | return prediction, proba[0] |
| | |
| | def get_label(newpred): |
| | if newpred == 0: |
| | return 'No' |
| | else: |
| | return 'Si' |
| |
|
| | def load_model(): |
| | ram_for = joblib.load('models/sgd_90.pkl') |
| |
|
| | return ram_for |
| | |
| | def main(audio): |
| | newdf = transform_data(audio) |
| | loaded_model = load_model() |
| | newpred, proba = predict(newdf, loaded_model) |
| | final = get_label(newpred) |
| | |
| | return final, {'Si probability': proba[1], |
| | 'No probability': proba[0]} |
| |
|
| | demo = gr.Interface( |
| | title = "Autoagent | YES or NO Classification - Layer7", |
| | description = """<h3>This model is useful to classify if the user says 'Si' or 'No'. 🎙️ </h3> |
| | <img src="https://huggingface.co/spaces/Adrian8as/imagen/resolve/main/output.png" width="350" height="350"/> <br> |
| | <b>Record your voice:</b>""", |
| | allow_flagging = "never", |
| | fn = main, |
| | inputs = gr.Audio( |
| | sources=["microphone"], |
| | type="filepath", |
| | ), |
| | outputs = [gr.Textbox(label="Clasification"),"label"] |
| | ) |
| | |
| | if __name__ == "__main__": |
| | demo.launch(show_api=False) |