parakeet-tdt-0.6b-v2

Running on Zero

App Files Files Community

fdaudens HF Staff commited on May 1

Commit

5346a3c

verified ·

1 Parent(s): 20e5fd0

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -0

app.py CHANGED Viewed

@@ -18,6 +18,20 @@ model = ASRModel.from_pretrained(model_name=MODEL_NAME)
 model.eval()
 def get_audio_segment(audio_path, start_second, end_second):
     if not audio_path or not Path(audio_path).exists():
         print(f"Warning: Audio path '{audio_path}' not found or invalid for clipping.")
         return None
@@ -56,6 +70,24 @@ def get_audio_segment(audio_path, start_second, end_second):
 @spaces.GPU
 def get_transcripts_and_raw_times(audio_path):
     if not audio_path:
         gr.Error("No audio file path provided for transcription.", duration=None)
         # Return an update to hide the button
@@ -200,6 +232,21 @@ def get_transcripts_and_raw_times(audio_path):
                 print(f"Error removing temporary audio file {processed_audio_path}: {e}")
 def play_segment(evt: gr.SelectData, raw_ts_list, current_audio_path):
     if not isinstance(raw_ts_list, list):
         print(f"Warning: raw_ts_list is not a list ({type(raw_ts_list)}). Cannot play segment.")
         return gr.Audio(value=None, label="Selected Segment")

 model.eval()
 def get_audio_segment(audio_path, start_second, end_second):
+   """
+    Extract a segment of audio from a given audio file.
+    Parameters:
+        audio_path (str): Path to the audio file to process
+        start_second (float): Start time of the segment in seconds
+        end_second (float): End time of the segment in seconds
+    Returns:
+        tuple or None: A tuple containing (frame_rate, samples) where:
+            - frame_rate (int): The sample rate of the audio
+            - samples (numpy.ndarray): The audio samples as a numpy array
+            Returns None if there's an error processing the audio
+    """
     if not audio_path or not Path(audio_path).exists():
         print(f"Warning: Audio path '{audio_path}' not found or invalid for clipping.")
         return None
 @spaces.GPU
 def get_transcripts_and_raw_times(audio_path):
+    """
+    Transcribe an audio file and generate timestamps for each segment.
+    Parameters:
+        audio_path (str): Path to the audio file to transcribe
+    Returns:
+        tuple: A tuple containing:
+            - vis_data (list): List of [start, end, text] for visualization
+            - raw_times_data (list): List of [start, end] timestamps
+            - audio_path (str): Path to the processed audio file
+            - button_update (gr.DownloadButton): Gradio button component for CSV download
+    Notes:
+        - Automatically handles audio preprocessing (resampling to 16kHz, mono conversion)
+        - Uses NVIDIA's Parakeet TDT model for transcription
+        - Generates a CSV file with transcription results
+    """
     if not audio_path:
         gr.Error("No audio file path provided for transcription.", duration=None)
         # Return an update to hide the button
                 print(f"Error removing temporary audio file {processed_audio_path}: {e}")
 def play_segment(evt: gr.SelectData, raw_ts_list, current_audio_path):
+    """
+    Play a selected segment from the transcription results.
+    Parameters:
+        evt (gr.SelectData): Gradio select event containing the index of selected segment
+        raw_ts_list (list): List of [start, end] timestamps for all segments
+        current_audio_path (str): Path to the current audio file being processed
+    Returns:
+        gr.Audio: Gradio Audio component containing the selected segment for playback
+    Notes:
+        - Extracts and plays the audio segment corresponding to the selected transcription
+        - Returns None if segment extraction fails or inputs are invalid
+    """
     if not isinstance(raw_ts_list, list):
         print(f"Warning: raw_ts_list is not a list ({type(raw_ts_list)}). Cannot play segment.")
         return gr.Audio(value=None, label="Selected Segment")