ShukaNote / app.py
helvekami's picture
Updated app.py with email (placeholder) and transcript download functionality
a9b9492 verified
raw
history blame
2.49 kB
import transformers
import gradio as gr
import librosa
import torch
import spaces
import numpy as np
import tempfile
@spaces.GPU(duration=60)
def transcribe_and_respond(audio_file, email):
try:
pipe = transformers.pipeline(
model='sarvamai/shuka_v1',
trust_remote_code=True,
device=0,
torch_dtype=torch.bfloat16
)
# Load the audio file at 16kHz
audio, sr = librosa.load(audio_file, sr=16000)
# Convert the audio to a contiguous float32 array
audio = np.ascontiguousarray(audio, dtype=np.float32)
# If audio is multi-channel, convert to mono by averaging channels
if audio.ndim > 1:
audio = np.mean(audio, axis=-1)
# Debug: Print audio properties
print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
# Set up the prompt to get key takeaways
turns = [
{'role': 'system', 'content': 'Share the Key Take Aways and Action Steps'},
{'role': 'user', 'content': '<|audio|>'}
]
print(f"Initial turns: {turns}")
# Run the model inference (this call is synchronous)
output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=10000)
print(f"Model output: {output}")
# Extract transcript text from the output
transcript = str(output)
if email and email.strip():
transcript = f"Email provided: {email}\n\n{transcript}"
# Write the transcript to a temporary file for download
with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as tmp:
tmp.write(transcript)
transcript_file = tmp.name
# Return transcript text and file download path
return transcript, transcript_file
except Exception as e:
return f"Error: {str(e)}", ""
iface = gr.Interface(
fn=transcribe_and_respond,
inputs=[
gr.Audio(sources=["upload", "microphone"], type="filepath"),
gr.Textbox(label="Email", placeholder="Enter your email address (optional)")
],
outputs=[
gr.Textbox(label="Transcript"),
gr.File(label="Download Transcript")
],
title="ShukaNotesApp",
description="Upload or record your meeting audio, optionally provide your email, and download the transcript."
)
if __name__ == "__main__":
iface.launch()