Spaces:

Frorozcol
/

music_recommedation

Build error

File size: 7,808 Bytes

import os
import numpy as np
import librosa
import soundfile as sf


import statistics as st
from joblib import load
from pydub import AudioSegment

CLASSES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

class Features:
    def __init__(self, y, sr, hop_length=5000):
        """
        Initialize the class with audio signal, sr and hop_length
        :param y: audio signal
        :param sr: sample rate of audio signal
        :param hop_length: hop_length  parameter used while calculating the chroma_stft feature
        """
        self.y = np.split(y, 10)
        self.sr = sr
        self.hop_length = hop_length

    def get_mean_var(self, y):
        """
        Helper function to get mean and variance of feature
        :param y: audio feature
        :return: mean, variance
        """
        mean = y.mean()
        var = y.var()
        return mean, var

    def zero_crossing_rate(self, y):
        """
        Returns the zero-crossing rate of the audio signal
        :return: mean and variance of zero-crossing rate
        """
        values = librosa.feature.zero_crossing_rate(y)
        return self.get_mean_var(values)

    def harmonic_and_per(self, y):
        """
        separates the harmonic and percussive components of the audio signal
        :return: harmonic and percussive components' mean and variance
        """
        y_harm, y_perc  = librosa.effects.hpss(y)
        harm = self.get_mean_var(y_harm)
        perc = self.get_mean_var(y_perc)
        return harm, perc
    
  
    def tempo(self, y):
            """
            Extracts the tempo (beats per minute) of an audio signal.
            
            Parameters:
                y (ndarray): The audio signal represented as an numpy array.
            
            Returns:
                float: The tempo of the audio signal in beats per minute.
            """
            tempo = librosa.beat.tempo(y, sr=self.sr)
            return tempo

    def centroid(self, y):
            """
            Extracts the spectral centroid of an audio signal.
            
            Parameters:
                y (ndarray): The audio signal represented as an numpy array.
            
            Returns:
                tuple: A tuple containing the mean and variance of the spectral centroid.
            """
            centroid = librosa.feature.spectral_centroid(y, sr=self.sr)
            return self.get_mean_var(centroid)
    

    
    def mfccs(self, y):
        """
        Extracts the Mel-Frequency Cepstral Coefficients (MFCCs) of an audio signal.
        
        Parameters:
            y (ndarray): The audio signal represented as an numpy array.
        
        Returns:
            ndarray: An array containing the mean and variance of the MFCCs.
        """
        mfccs = librosa.feature.mfcc(y, sr=self.sr)
        mean = mfccs.mean(axis=1)
        var = mfccs.var(axis=1)
        values = [[mean[i], var[i]] for i in range(mean.shape[0])]
        return np.array(values).reshape(-1)
    
    def chroma_stft(self, y):
        """
        Extracts the chroma feature of an audio signal.
        
        Parameters:
            y (ndarray): The audio signal represented as an numpy array.
        
        Returns:
            tuple: A tuple containing the mean and variance of the chroma feature.
        """
        chroma = librosa.feature.chroma_stft(y, sr=self.sr, hop_length=self.hop_length)
        return self.get_mean_var(chroma)
    
    def spectral_bandwidth(self, y):
        """
        Extracts the spectral bandwidth of an audio signal.
        
        Parameters:
            y (ndarray): The audio signal represented as an numpy array.
        
        Returns:
            tuple: A tuple containing the mean and variance of the spectral bandwidth.
        """
        spd = librosa.feature.spectral_bandwidth(y,sr=self.sr )
        return self.get_mean_var(spd)
    
    def rollof(self, y):
        """
        Extracts the spectral rolloff of an audio signal.
        
        Parameters:
            y (ndarray): The audio signal represented as an numpy array.
        
        Returns:
            tuple: A tuple containing the mean and variance of the spectral rolloff.
        """
        rollof = librosa.feature.spectral_rolloff(y=y, sr=self.sr)[0]
        return self.get_mean_var(rollof)
    
    def rms(self, y):
        """
        Extracts the root mean square (RMS) of an audio signal.
        
        Parameters:
            y (ndarray): The audio signal represented as an numpy array.
        
        Returns:
            tuple: A tuple containing the mean and variance of the RMS.
        """
        rms = librosa.feature.rms(y=y)
        return self.get_mean_var(rms)
    
    def features(self,y):
        """
        Extracts various audio features from an audio signal.
        
        Parameters:
            y (ndarray): The audio signal represented as an numpy array.
        
        Returns:
            ndarray: An array containing the extracted audio features.
        """
        tempo = self.tempo(y)
        centroid_mean, centroid_var = self.centroid(y)
        chroma_mean, chroma_var = self.chroma_stft(y)
        zcr_mean, zcr_var = self.zero_crossing_rate(y)
        spd_mean, spd_var = self.spectral_bandwidth(y)
        rollof_mean, rollof_var = self.rollof(y)
        rsm_mean, rsm_var = self.rms(y)
        harm, perc = self.harmonic_and_per(y)
        harm_mean, harm_var = harm
        perc_mean, perc_var = perc
        mfccs = self.mfccs(y)
        
        features = np.array([y.shape[0],
                            chroma_mean, chroma_var,
                            rsm_mean, rsm_var,
                            centroid_mean, centroid_var ,
                            spd_mean, spd_var,
                            rollof_mean, rollof_var,
                            zcr_mean, zcr_var,
                            harm_mean, harm_var,
                            perc_mean, perc_var ,
                            tempo,
                           ],
                 dtype=np.float32)
        features = np.concatenate([features, mfccs])
        return features
    
    def splits_3sec(self):
        """
        Splits an audio signal into 3-second sub-sequences and extracts audio features from each sub-sequence.
        
        Returns:
            ndarray: An array containing the extracted audio features for each 3-second sub-sequence.
        """
        features_split = []
        for sub_sequence in self.y:
            feature = self.features(sub_sequence)
            features_split.append(feature)
        
        features_np = np.array(features_split)
        return features_np


def load_model():
    path =  os.path.dirname(__file__)
    path_model = os.path.join(path, 'models', "model.pkl")
    model = load(path_model)
    return model

def predict(features):
    model = load_model()
    prediction = model.predict(features)
    mode = st.mode(prediction)
    prediction = list(map(lambda x: CLASSES[x], prediction))
    return CLASSES[mode], prediction

def cuts_silence(audio):
    audio_file, _ = librosa.effects.trim(audio)
    return audio_file

def convert_mp3_to_wav(music_file):  
    name_file = "music_file.wav"
    sound = AudioSegment.from_mp3(music_file)
    sound.export(name_file,format="wav")
    return name_file


def preprosecing(uploaded_file):
    name_file = convert_mp3_to_wav(uploaded_file)
    y, sr = librosa.load(name_file)
    audio_file = cuts_silence(y)
    audio_file = audio_file[:sr*30]
    sf.write(file=name_file, data=audio_file, samplerate=sr)
    file = open(name_file, 'rb')
    features = Features(audio_file, sr).splits_3sec()
    prediction = predict(features)
    return file, prediction