CNN-VAD / vad /transforms.py
abhishek-sehgal's picture
Add the code to run the spaces
6a9b2e3
import numpy as np
import math
# Constants
P_REF = -93.9794
class Transform:
def __init__(self, window_size, frames_per_second):
self.window_size = window_size
self.frames_per_second = frames_per_second
# Find the next power of 2
pow2_size = 1
while pow2_size < window_size:
pow2_size <<= 1
self.points = pow2_size
# Initialize arrays
self.real = np.zeros(pow2_size, dtype=float)
self.imaginary = np.zeros(pow2_size, dtype=float)
self.power = np.zeros(pow2_size, dtype=float)
self.sine = np.zeros(pow2_size // 2, dtype=float)
self.cosine = np.zeros(pow2_size // 2, dtype=float)
self.dbpower_buffer = np.zeros(frames_per_second, dtype=float)
self.dbpower = 0
# Precompute twiddle factors
for i in range(pow2_size // 2):
arg = (-2.0 * math.pi * i) / pow2_size
self.cosine[i] = math.cos(arg)
self.sine[i] = math.sin(arg)
# Create Hanning window
self.window = np.zeros(pow2_size, dtype=float)
for i in range(window_size):
# Hanning window
self.window[i] = (
1.0 - math.cos(2.0 * math.pi * (i + 1) / (window_size + 1))
) * 0.5
def new_transform(window_size, frames_per_second):
"""Create a new Transform object"""
return Transform(window_size, frames_per_second)
def forward_fft(fft, real_input):
"""Perform forward FFT calculation"""
k = fft.points
fft.total_power = 0
# Reset arrays
fft.real = np.zeros(k, dtype=float)
fft.imaginary = np.zeros(k, dtype=float)
# Apply window function to input
for i in range(fft.window_size):
fft.real[i] = real_input[i] * fft.window[i]
j = 0
m = k // 2
# Bit reversal
for i in range(1, k - 1):
L = m
while j >= L:
j = j - L
L = L // 2
j = j + L
if i < j:
temp_real = fft.real[i]
temp_imaginary = fft.imaginary[i]
fft.real[i] = fft.real[j]
fft.imaginary[i] = fft.imaginary[j]
fft.real[j] = temp_real
fft.imaginary[j] = temp_imaginary
L = 0
m = 1
n = k // 2
# Computation
i = k
while i > 1:
L = m
m = 2 * m
o = 0
for j in range(L):
cos = fft.cosine[o]
sin = fft.sine[o]
o = o + n
for p in range(j, k, m):
q = p + L
xt = cos * fft.real[q] - sin * fft.imaginary[q]
yt = sin * fft.real[q] + cos * fft.imaginary[q]
fft.real[q] = fft.real[p] - xt
fft.real[p] = fft.real[p] + xt
fft.imaginary[q] = fft.imaginary[p] - yt
fft.imaginary[p] = fft.imaginary[p] + yt
n = n >> 1
i = i >> 1
# Calculate power spectrum
fft.power = np.zeros(k, dtype=float)
for i in range(k):
fft.power[i] = math.sqrt(
fft.real[i] * fft.real[i] + fft.imaginary[i] * fft.imaginary[i]
)
fft.total_power += fft.power[i] / k
# Calculate dB SPL
fft.dBSPL = 10 * math.log10(fft.total_power + 1e-6) - P_REF
temp = fft.dBSPL
# Update running average
fft.dbpower = fft.dbpower + (temp - fft.dbpower_buffer[0]) / fft.frames_per_second
fft.dbpower_buffer = np.roll(fft.dbpower_buffer, -1)
fft.dbpower_buffer[-1] = temp
def inverse_fft(fft):
"""Perform inverse FFT calculation"""
k = fft.points
j = 0
m = k // 2
# Bit reversal
for i in range(1, k - 1):
L = m
while j >= L:
j = j - L
L = L // 2
j = j + L
if i < j:
temp_real = fft.real[i]
temp_imaginary = fft.imaginary[i]
fft.real[i] = fft.real[j]
fft.imaginary[i] = fft.imaginary[j]
fft.real[j] = temp_real
fft.imaginary[j] = temp_imaginary
L = 0
m = 1
n = k // 2
# Computation (note negative sine for inverse)
i = k
while i > 1:
L = m
m = 2 * m
o = 0
for j in range(L):
cos = fft.cosine[o]
sin = -fft.sine[o] # Negative for inverse
o = o + n
for p in range(j, k, m):
q = p + L
xt = cos * fft.real[q] - sin * fft.imaginary[q]
yt = sin * fft.real[q] + cos * fft.imaginary[q]
fft.real[q] = fft.real[p] - xt
fft.real[p] = fft.real[p] + xt
fft.imaginary[q] = fft.imaginary[p] - yt
fft.imaginary[p] = fft.imaginary[p] + yt
n = n >> 1
i = i >> 1
# Scale the result
fft.real = fft.real / k
def destroy_transform(transform):
"""Clean up resources (not necessary in Python due to garbage collection)"""
# In Python, we don't need to explicitly free memory
# This function is included for API compatibility
pass