Spaces:

qgyd2021
/

cc_vad

Running

App Files Files Community

cc_vad / toolbox /pydub /volume.py

HoneyTian

update

6efeebe about 13 hours ago

raw

history blame contribute delete

2.88 kB

	#!/usr/bin/python3
	# -- coding: utf-8 --
	import argparse
	from typing import List

	import librosa
	import numpy as np
	from pydub import AudioSegment
	from scipy.io import wavfile

	from project_settings import project_path


	def score_transform(x: float, stages: List[float], scores: List[float], ndigits: int = 4):
	last_stage = stages[0]
	last_score = scores[0]
	stages = stages[1:]
	scores = scores[1:]
	for stage, score in zip(stages, scores):
	if x >= stage:
	result = score + (x - stage) / (last_stage - stage + 1e-7) * (last_score - score)
	return round(result, ndigits)
	last_stage = stage
	last_score = score
	raise ValueError(f"values of x, stages and scores should between 0 and 1, "
	f"stages and scores should be same length and decreased. "
	f"x: {x}, stages: {stages}, scores: {scores}")


	def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0):
	if np.min(waveform) < -1 or np.max(waveform) > 1:
	raise AssertionError(f"waveform type: {type(waveform)}, dtype: {waveform.dtype}")
	waveform = np.array(waveform * (1 << 15), dtype=np.int16)
	raw_data = waveform.tobytes()

	audio_segment = AudioSegment(
	data=raw_data,
	sample_width=2,
	frame_rate=sample_rate,
	channels=1
	)

	map_list = [
	[0, -150],
	[10, -60],
	[50, -35],
	[100, -20],
	]
	stages = [a for a, b in map_list]
	scores = [b for a, b in map_list]

	# 计算目标 dBFS
	target_db = score_transform(
	x=volume,
	stages=list(reversed(stages)),
	scores=list(reversed(scores)),
	)

	audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS)

	samples = np.array(audio_segment.get_array_of_samples())

	if audio_segment.sample_width == 2:
	samples = samples.astype(np.float32) / (1 << (2*8-1))
	elif audio_segment.sample_width == 3:
	samples = samples.astype(np.float32) / (1 << (3*8-1))
	elif audio_segment.sample_width == 4:
	samples = samples.astype(np.float32) / (1 << (4*8-1))
	else:
	raise AssertionError
	return samples


	def get_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--filename",
	default=(project_path / "data/examples/ai_agent/chinese-1.wav").as_posix(),
	type=str
	)
	args = parser.parse_args()
	return args


	def main():
	args = get_args()

	waveform, sample_rate = librosa.load(args.filename, sr=8000)

	waveform = set_volume(
	waveform=waveform,
	sample_rate=sample_rate,
	volume=10
	)
	waveform = np.array(waveform * (1 << 15), dtype=np.int16)

	wavfile.write(
	"temp.wav",
	rate=8000,
	data=waveform,
	)
	return


	if __name__ == "__main__":
	main()