Spaces:

Frorozcol
/

music_recommedation

Build error

music_recommedation / src /preprosecing.py

frorozcol

Values

5b4d0fb over 2 years ago

7.81 kB

	import os
	import numpy as np
	import librosa
	import soundfile as sf


	import statistics as st
	from joblib import load
	from pydub import AudioSegment

	CLASSES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

	class Features:
	def __init__(self, y, sr, hop_length=5000):
	"""
	Initialize the class with audio signal, sr and hop_length
	:param y: audio signal
	:param sr: sample rate of audio signal
	:param hop_length: hop_length parameter used while calculating the chroma_stft feature
	"""
	self.y = np.split(y, 10)
	self.sr = sr
	self.hop_length = hop_length

	def get_mean_var(self, y):
	"""
	Helper function to get mean and variance of feature
	:param y: audio feature
	:return: mean, variance
	"""
	mean = y.mean()
	var = y.var()
	return mean, var

	def zero_crossing_rate(self, y):
	"""
	Returns the zero-crossing rate of the audio signal
	:return: mean and variance of zero-crossing rate
	"""
	values = librosa.feature.zero_crossing_rate(y)
	return self.get_mean_var(values)

	def harmonic_and_per(self, y):
	"""
	separates the harmonic and percussive components of the audio signal
	:return: harmonic and percussive components' mean and variance
	"""
	y_harm, y_perc = librosa.effects.hpss(y)
	harm = self.get_mean_var(y_harm)
	perc = self.get_mean_var(y_perc)
	return harm, perc


	def tempo(self, y):
	"""
	Extracts the tempo (beats per minute) of an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	float: The tempo of the audio signal in beats per minute.
	"""
	tempo = librosa.beat.tempo(y, sr=self.sr)
	return tempo

	def centroid(self, y):
	"""
	Extracts the spectral centroid of an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	tuple: A tuple containing the mean and variance of the spectral centroid.
	"""
	centroid = librosa.feature.spectral_centroid(y, sr=self.sr)
	return self.get_mean_var(centroid)



	def mfccs(self, y):
	"""
	Extracts the Mel-Frequency Cepstral Coefficients (MFCCs) of an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	ndarray: An array containing the mean and variance of the MFCCs.
	"""
	mfccs = librosa.feature.mfcc(y, sr=self.sr)
	mean = mfccs.mean(axis=1)
	var = mfccs.var(axis=1)
	values = [[mean[i], var[i]] for i in range(mean.shape[0])]
	return np.array(values).reshape(-1)

	def chroma_stft(self, y):
	"""
	Extracts the chroma feature of an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	tuple: A tuple containing the mean and variance of the chroma feature.
	"""
	chroma = librosa.feature.chroma_stft(y, sr=self.sr, hop_length=self.hop_length)
	return self.get_mean_var(chroma)

	def spectral_bandwidth(self, y):
	"""
	Extracts the spectral bandwidth of an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	tuple: A tuple containing the mean and variance of the spectral bandwidth.
	"""
	spd = librosa.feature.spectral_bandwidth(y,sr=self.sr )
	return self.get_mean_var(spd)

	def rollof(self, y):
	"""
	Extracts the spectral rolloff of an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	tuple: A tuple containing the mean and variance of the spectral rolloff.
	"""
	rollof = librosa.feature.spectral_rolloff(y=y, sr=self.sr)[0]
	return self.get_mean_var(rollof)

	def rms(self, y):
	"""
	Extracts the root mean square (RMS) of an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	tuple: A tuple containing the mean and variance of the RMS.
	"""
	rms = librosa.feature.rms(y=y)
	return self.get_mean_var(rms)

	def features(self,y):
	"""
	Extracts various audio features from an audio signal.

	Parameters:
	y (ndarray): The audio signal represented as an numpy array.

	Returns:
	ndarray: An array containing the extracted audio features.
	"""
	tempo = self.tempo(y)
	centroid_mean, centroid_var = self.centroid(y)
	chroma_mean, chroma_var = self.chroma_stft(y)
	zcr_mean, zcr_var = self.zero_crossing_rate(y)
	spd_mean, spd_var = self.spectral_bandwidth(y)
	rollof_mean, rollof_var = self.rollof(y)
	rsm_mean, rsm_var = self.rms(y)
	harm, perc = self.harmonic_and_per(y)
	harm_mean, harm_var = harm
	perc_mean, perc_var = perc
	mfccs = self.mfccs(y)

	features = np.array([y.shape[0],
	chroma_mean, chroma_var,
	rsm_mean, rsm_var,
	centroid_mean, centroid_var ,
	spd_mean, spd_var,
	rollof_mean, rollof_var,
	zcr_mean, zcr_var,
	harm_mean, harm_var,
	perc_mean, perc_var ,
	tempo,
	],
	dtype=np.float32)
	features = np.concatenate([features, mfccs])
	return features

	def splits_3sec(self):
	"""
	Splits an audio signal into 3-second sub-sequences and extracts audio features from each sub-sequence.

	Returns:
	ndarray: An array containing the extracted audio features for each 3-second sub-sequence.
	"""
	features_split = []
	for sub_sequence in self.y:
	feature = self.features(sub_sequence)
	features_split.append(feature)

	features_np = np.array(features_split)
	return features_np


	def load_model():
	path = os.path.dirname(__file__)
	path_model = os.path.join(path, 'models', "model.pkl")
	model = load(path_model)
	return model

	def predict(features):
	model = load_model()
	prediction = model.predict(features)
	mode = st.mode(prediction)
	prediction = list(map(lambda x: CLASSES[x], prediction))
	return CLASSES[mode], prediction

	def cuts_silence(audio):
	audio_file, _ = librosa.effects.trim(audio)
	return audio_file

	def convert_mp3_to_wav(music_file):
	name_file = "music_file.wav"
	sound = AudioSegment.from_mp3(music_file)
	sound.export(name_file,format="wav")
	return name_file


	def preprosecing(uploaded_file):
	name_file = convert_mp3_to_wav(uploaded_file)
	y, sr = librosa.load(name_file)
	audio_file = cuts_silence(y)
	audio_file = audio_file[:sr*30]
	sf.write(file=name_file, data=audio_file, samplerate=sr)
	file = open(name_file, 'rb')
	features = Features(audio_file, sr).splits_3sec()
	prediction = predict(features)
	return file, prediction