Spaces:

OpenSound
/

CapSpeech-TTS

Running on Zero

Upload 518 files

dd9600d verified 5 days ago

950 Bytes

	from pyannote.audio import Model
	from pathlib import Path
	from brouhaha.pipeline import RegressiveActivityDetectionPipeline
	import torch
	from huggingface_hub import hf_hub_download
	import numpy as np

	def speed_apply(waveform):
	ratio = 16000/270
	sampling_rate = 16000
	device = "cpu"
	waveform = torch.Tensor(waveform).unsqueeze(0)
	model = Model.from_pretrained(
	Path(hf_hub_download(repo_id="ylacombe/brouhaha-best", filename="best.ckpt")),
	strict=False,
	)
	model.to(device)

	pipeline = RegressiveActivityDetectionPipeline(segmentation=model, batch_size=1)
	pipeline.to(torch.device(device))

	device = pipeline._models["segmentation"].device

	res = pipeline({"sample_rate": sampling_rate,
	"waveform": waveform.to(device).float()})

	speech_duration = sum(map(lambda x: x[0].duration, res["annotation"].itertracks()))

	return speech_duration