|
|
|
|
|
import argparse |
|
from typing import List |
|
|
|
import librosa |
|
import numpy as np |
|
from pydub import AudioSegment |
|
from scipy.io import wavfile |
|
|
|
from project_settings import project_path |
|
|
|
|
|
def score_transform(x: float, stages: List[float], scores: List[float], ndigits: int = 4): |
|
last_stage = stages[0] |
|
last_score = scores[0] |
|
stages = stages[1:] |
|
scores = scores[1:] |
|
for stage, score in zip(stages, scores): |
|
if x >= stage: |
|
result = score + (x - stage) / (last_stage - stage + 1e-7) * (last_score - score) |
|
return round(result, ndigits) |
|
last_stage = stage |
|
last_score = score |
|
raise ValueError(f"values of x, stages and scores should between 0 and 1, " |
|
f"stages and scores should be same length and decreased. " |
|
f"x: {x}, stages: {stages}, scores: {scores}") |
|
|
|
|
|
def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0): |
|
if np.min(waveform) < -1 or np.max(waveform) > 1: |
|
raise AssertionError(f"waveform type: {type(waveform)}, dtype: {waveform.dtype}") |
|
waveform = np.array(waveform * (1 << 15), dtype=np.int16) |
|
raw_data = waveform.tobytes() |
|
|
|
audio_segment = AudioSegment( |
|
data=raw_data, |
|
sample_width=2, |
|
frame_rate=sample_rate, |
|
channels=1 |
|
) |
|
|
|
map_list = [ |
|
[0, -150], |
|
[10, -60], |
|
[50, -35], |
|
[100, -20], |
|
] |
|
stages = [a for a, b in map_list] |
|
scores = [b for a, b in map_list] |
|
|
|
|
|
target_db = score_transform( |
|
x=volume, |
|
stages=list(reversed(stages)), |
|
scores=list(reversed(scores)), |
|
) |
|
|
|
audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS) |
|
|
|
samples = np.array(audio_segment.get_array_of_samples()) |
|
|
|
if audio_segment.sample_width == 2: |
|
samples = samples.astype(np.float32) / (1 << (2*8-1)) |
|
elif audio_segment.sample_width == 3: |
|
samples = samples.astype(np.float32) / (1 << (3*8-1)) |
|
elif audio_segment.sample_width == 4: |
|
samples = samples.astype(np.float32) / (1 << (4*8-1)) |
|
else: |
|
raise AssertionError |
|
return samples |
|
|
|
|
|
def get_args(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--filename", |
|
default=(project_path / "data/examples/ai_agent/chinese-1.wav").as_posix(), |
|
type=str |
|
) |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
def main(): |
|
args = get_args() |
|
|
|
waveform, sample_rate = librosa.load(args.filename, sr=8000) |
|
|
|
waveform = set_volume( |
|
waveform=waveform, |
|
sample_rate=sample_rate, |
|
volume=10 |
|
) |
|
waveform = np.array(waveform * (1 << 15), dtype=np.int16) |
|
|
|
wavfile.write( |
|
"temp.wav", |
|
rate=8000, |
|
data=waveform, |
|
) |
|
return |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|