Spaces:

mumbert
/

automatic-audio-captioning-demo-dcase

Sleeping

Martí Umbert

app_dcase.py: bugfix

a37b5fd 5 months ago

1.95 kB


	# import gradio as gr
	# from msclap import CLAP

	# clap_model = CLAP(version = 'clapcap', use_cuda=False)

	# def clap_inference(mic=None, file=None):

	# if mic is not None:
	# audio = mic
	# elif file is not None:
	# audio = file
	# else:
	# return "You must either provide a mic recording or a file"

	# # Generate captions for the recording
	# captions = clap_model.generate_caption([audio],
	# resample=True,
	# beam_size=5,
	# entry_length=67,
	# temperature=0.01)

	# return captions[0]

	import gradio as gr
	from dcase24t6.nn.hub import baseline_pipeline
	import librosa
	import torch

	model = baseline_pipeline()

	def dcase_inference(mic=None, file=None):

	if mic is not None:
	audio = mic
	sr = 48000
	gr.Info(f"sr 1: {sr}")
	elif file is not None:
	gr.Info(f"file 1: {file}")
	audio, sr = librosa.load(file, sr=None)
	audio = torch.from_numpy(audio)
	gr.Info(f"file 1: {sr}")
	else:
	return "You must either provide a mic recording or a file"

	# Generate captions for the recording
	item = {"audio": audio, "sr": sr}
	outputs = model(item)
	candidate = outputs["candidates"][0]

	return candidate

	def create_app():

	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# DCASE demo for automatic audio captioning
	"""
	)
	gr.Interface(
	fn=dcase_inference,
	inputs=[
	gr.Audio(sources="microphone", type="filepath"),
	gr.Audio(sources="upload", type="filepath"),
	],
	outputs="text",
	)

	return demo

	def main():

	app = create_app()
	app.launch(debug=True)


	if __name__ == "__main__":
	main()