Spaces:

kaysrubio
/

audio_feature_extraction

Sleeping

DontFreakOut

changed output references

be60578 about 2 months ago

3.72 kB

	import gradio as gr
	import numpy as np
	import librosa

	API_KEY = "682d2362-894c-800c-af30-a4c56b7f074b"
	FRAME_LENGTH = 1024
	HOP_LENGTH = FRAME_LENGTH//4
	sr = 22050

	# Function to calculate amplitude envelope for each frame
	def amplitude_envelope(audio, frame_length, hop_length):
	return np.array([max(audio[i:i+frame_length]) for i in range(0, len(audio), hop_length)])

	# Function to extract Short-Time Fourier Transform
	def st_fourier_transform(audio, n_fft, hop_length, type="power"):
	stft = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length)
	if type=="power":
	stft_abs = np.abs(stft) ** 2 # use for a power spectrogram
	stft_dB = librosa.power_to_db(np.abs(stft_abs), ref=np.max)
	elif type=="amplitude":
	stft_abs = np.abs(stft) # use for an amplitude spectrogram
	stft_dB = librosa.amplitude_to_db(np.abs(stft_abs), ref=np.max)
	else:
	raise ValueError(f"Error: type should be 'power' or 'amplitude'")
	return stft_dB

	def extract_audio_features(file, key):
	if key != API_KEY:
	raise gr.Error("Invalid API key.")

	try:
	output = {}
	# Load audio with Librosa.load and resample
	file, _ = librosa.load(file, sr=sr)

	# Calculate amplitude envelope
	ae = amplitude_envelope(file, FRAME_LENGTH, HOP_LENGTH)
	output['amplitude_envelope'] = ae

	# Calculate root-mean-square energy
	rms = librosa.feature.rms(y=file, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
	output['root_mean_square_energy'] = rms

	# Calculate zero-crossing rate using librosa.feature.zero_crossing_rate
	zcr = librosa.feature.zero_crossing_rate(y=file, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
	output['zero_crossing_rate'] = zcr

	# Apply short-time fourier transform
	stft = st_fourier_transform(file, FRAME_LENGTH, HOP_LENGTH)

	# Extract the first 13 MFCCs
	mfccs = librosa.feature.mfcc(y=file, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH, n_mfcc=13)
	output['mfccs'] = mfccs

	# Computing first MFCCs derivatives (how they change from frame-frame over time)
	delta_mfccs = librosa.feature.delta(mfccs)
	output['delta_mfccs'] = delta_mfccs

	# Calculate spectral centroid
	sc = librosa.feature.spectral_centroid(y=file, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
	output['spectral_centroid'] = sc

	# Calculate spectral bandwidth
	sban = librosa.feature.spectral_bandwidth(y=file, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
	output['spectral_bandwidth'] = sban
	except Exception as e:
	print(f"An error occurred: {e}")
	output = f"An error occurred: {e}"

	return output

	# Create a function to generate a vertically stacked interface
	def create_transcription_interface(source):
	with gr.Blocks() as interface:
	gr.Markdown("""
	Use microphone, upload .wav file.
	""")
	with gr.Column():
	audio_input = gr.Audio(sources=source, type="filepath", label="Upload Audio")
	output = gr.JSON(label="Results")
	api_key_input = gr.Textbox(label="API Key", type="password")
	audio_input.change(fn=extract_audio_features, inputs=[audio_input, api_key_input], outputs=output)
	return interface

	# Create two interfaces (one for mic, one for file upload)
	mic_transcribe = create_transcription_interface("microphone")
	file_transcribe = create_transcription_interface("upload")

	demo = gr.TabbedInterface(
	[mic_transcribe, file_transcribe],
	["Microphone Input", "Upload .wav file"],
	title="Audio Feature Extraction",
	)

	demo.launch()