import gradio as gr import os from transformers import pipeline # Load ASR (Speech-to-Text) pipeline with timestamp handling asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") # Load Summarization model summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Function to transcribe and summarize audio def transcribe_and_summarize(audio_file): if audio_file is None: return "Error: No audio file provided.", "" try: # Transcribe audio (handling long-form audio) transcription_result = asr(audio_file, return_timestamps=True) # Extract transcribed text transcribed_text = " ".join([segment['text'] for segment in transcription_result['chunks']]) # Ensure the transcribed text isn't too short for summarization if len(transcribed_text.split()) < 50: summarized_text = "Text too short to summarize." else: # Summarize the transcribed text summary_result = summarizer(transcribed_text, max_length=100, min_length=30, do_sample=False) summarized_text = summary_result[0]['summary_text'] return transcribed_text, summarized_text except Exception as e: return f"Error: {str(e)}", "" # Create Gradio interface iface = gr.Interface( fn=transcribe_and_summarize, inputs=gr.Audio(type="filepath"), # Accepts an audio file outputs=[ gr.Textbox(label="Transcribed Text"), gr.Textbox(label="Summarized Text") ] ) # Get port safely (default to 7860 if not set) port = int(os.environ.get('PORT1', 7860)) # Launch Gradio app iface.launch(share=True, server_port=port)