#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse from typing import List import librosa import numpy as np from pydub import AudioSegment from scipy.io import wavfile from project_settings import project_path def score_transform(x: float, stages: List[float], scores: List[float], ndigits: int = 4): last_stage = stages[0] last_score = scores[0] stages = stages[1:] scores = scores[1:] for stage, score in zip(stages, scores): if x >= stage: result = score + (x - stage) / (last_stage - stage + 1e-7) * (last_score - score) return round(result, ndigits) last_stage = stage last_score = score raise ValueError(f"values of x, stages and scores should between 0 and 1, " f"stages and scores should be same length and decreased. " f"x: {x}, stages: {stages}, scores: {scores}") def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0): if np.min(waveform) < -1 or np.max(waveform) > 1: raise AssertionError(f"waveform type: {type(waveform)}, dtype: {waveform.dtype}") waveform = np.array(waveform * (1 << 15), dtype=np.int16) raw_data = waveform.tobytes() audio_segment = AudioSegment( data=raw_data, sample_width=2, frame_rate=sample_rate, channels=1 ) map_list = [ [0, -150], [10, -60], [50, -35], [100, -20], ] stages = [a for a, b in map_list] scores = [b for a, b in map_list] # 计算目标 dBFS target_db = score_transform( x=volume, stages=list(reversed(stages)), scores=list(reversed(scores)), ) audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS) samples = np.array(audio_segment.get_array_of_samples()) if audio_segment.sample_width == 2: samples = samples.astype(np.float32) / (1 << (2*8-1)) elif audio_segment.sample_width == 3: samples = samples.astype(np.float32) / (1 << (3*8-1)) elif audio_segment.sample_width == 4: samples = samples.astype(np.float32) / (1 << (4*8-1)) else: raise AssertionError return samples def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--filename", default=(project_path / "data/examples/ai_agent/chinese-1.wav").as_posix(), type=str ) args = parser.parse_args() return args def main(): args = get_args() waveform, sample_rate = librosa.load(args.filename, sr=8000) waveform = set_volume( waveform=waveform, sample_rate=sample_rate, volume=10 ) waveform = np.array(waveform * (1 << 15), dtype=np.int16) wavfile.write( "temp.wav", rate=8000, data=waveform, ) return if __name__ == "__main__": main()