Spaces:
Running
Running
from functools import lru_cache | |
from scipy import signal | |
import numpy as np | |
import librosa | |
def mel_basis(hp): | |
assert hp.fmax <= hp.sample_rate // 2 | |
return librosa.filters.mel( | |
sr=hp.sample_rate, | |
n_fft=hp.n_fft, | |
n_mels=hp.num_mels, | |
fmin=hp.fmin, | |
fmax=hp.fmax) # -> (nmel, nfreq) | |
def preemphasis(wav, hp): | |
assert hp.preemphasis != 0 | |
wav = signal.lfilter([1, -hp.preemphasis], [1], wav) | |
wav = np.clip(wav, -1, 1) | |
return wav | |
def melspectrogram(wav, hp, pad=True): | |
# Run through pre-emphasis | |
if hp.preemphasis > 0: | |
wav = preemphasis(wav, hp) | |
assert np.abs(wav).max() - 1 < 1e-07 | |
# Do the stft | |
spec_complex = _stft(wav, hp, pad=pad) | |
# Get the magnitudes | |
spec_magnitudes = np.abs(spec_complex) | |
if hp.mel_power != 1.0: | |
spec_magnitudes **= hp.mel_power | |
# Get the mel and convert magnitudes->db | |
mel = np.dot(mel_basis(hp), spec_magnitudes) | |
if hp.mel_type == "db": | |
mel = _amp_to_db(mel, hp) | |
# Normalise the mel from db to 0,1 | |
if hp.normalized_mels: | |
mel = _normalize(mel, hp).astype(np.float32) | |
assert not pad or mel.shape[1] == 1 + len(wav) // hp.hop_size # Sanity check | |
return mel # (M, T) | |
def _stft(y, hp, pad=True): | |
# NOTE: after 0.8, pad mode defaults to constant, setting this to reflect for | |
# historical consistency and streaming-version consistency | |
return librosa.stft( | |
y, | |
n_fft=hp.n_fft, | |
hop_length=hp.hop_size, | |
win_length=hp.win_size, | |
center=pad, | |
pad_mode="reflect", | |
) | |
def _amp_to_db(x, hp): | |
return 20 * np.log10(np.maximum(hp.stft_magnitude_min, x)) | |
def _db_to_amp(x): | |
return np.power(10.0, x * 0.05) | |
def _normalize(s, hp, headroom_db=15): | |
min_level_db = 20 * np.log10(hp.stft_magnitude_min) | |
s = (s - min_level_db) / (-min_level_db + headroom_db) | |
return s | |