Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,20 @@ model = ASRModel.from_pretrained(model_name=MODEL_NAME)
|
|
18 |
model.eval()
|
19 |
|
20 |
def get_audio_segment(audio_path, start_second, end_second):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
if not audio_path or not Path(audio_path).exists():
|
22 |
print(f"Warning: Audio path '{audio_path}' not found or invalid for clipping.")
|
23 |
return None
|
@@ -56,6 +70,24 @@ def get_audio_segment(audio_path, start_second, end_second):
|
|
56 |
|
57 |
@spaces.GPU
|
58 |
def get_transcripts_and_raw_times(audio_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
if not audio_path:
|
60 |
gr.Error("No audio file path provided for transcription.", duration=None)
|
61 |
# Return an update to hide the button
|
@@ -200,6 +232,21 @@ def get_transcripts_and_raw_times(audio_path):
|
|
200 |
print(f"Error removing temporary audio file {processed_audio_path}: {e}")
|
201 |
|
202 |
def play_segment(evt: gr.SelectData, raw_ts_list, current_audio_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
if not isinstance(raw_ts_list, list):
|
204 |
print(f"Warning: raw_ts_list is not a list ({type(raw_ts_list)}). Cannot play segment.")
|
205 |
return gr.Audio(value=None, label="Selected Segment")
|
|
|
18 |
model.eval()
|
19 |
|
20 |
def get_audio_segment(audio_path, start_second, end_second):
|
21 |
+
"""
|
22 |
+
Extract a segment of audio from a given audio file.
|
23 |
+
|
24 |
+
Parameters:
|
25 |
+
audio_path (str): Path to the audio file to process
|
26 |
+
start_second (float): Start time of the segment in seconds
|
27 |
+
end_second (float): End time of the segment in seconds
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
tuple or None: A tuple containing (frame_rate, samples) where:
|
31 |
+
- frame_rate (int): The sample rate of the audio
|
32 |
+
- samples (numpy.ndarray): The audio samples as a numpy array
|
33 |
+
Returns None if there's an error processing the audio
|
34 |
+
"""
|
35 |
if not audio_path or not Path(audio_path).exists():
|
36 |
print(f"Warning: Audio path '{audio_path}' not found or invalid for clipping.")
|
37 |
return None
|
|
|
70 |
|
71 |
@spaces.GPU
|
72 |
def get_transcripts_and_raw_times(audio_path):
|
73 |
+
"""
|
74 |
+
Transcribe an audio file and generate timestamps for each segment.
|
75 |
+
|
76 |
+
Parameters:
|
77 |
+
audio_path (str): Path to the audio file to transcribe
|
78 |
+
|
79 |
+
Returns:
|
80 |
+
tuple: A tuple containing:
|
81 |
+
- vis_data (list): List of [start, end, text] for visualization
|
82 |
+
- raw_times_data (list): List of [start, end] timestamps
|
83 |
+
- audio_path (str): Path to the processed audio file
|
84 |
+
- button_update (gr.DownloadButton): Gradio button component for CSV download
|
85 |
+
|
86 |
+
Notes:
|
87 |
+
- Automatically handles audio preprocessing (resampling to 16kHz, mono conversion)
|
88 |
+
- Uses NVIDIA's Parakeet TDT model for transcription
|
89 |
+
- Generates a CSV file with transcription results
|
90 |
+
"""
|
91 |
if not audio_path:
|
92 |
gr.Error("No audio file path provided for transcription.", duration=None)
|
93 |
# Return an update to hide the button
|
|
|
232 |
print(f"Error removing temporary audio file {processed_audio_path}: {e}")
|
233 |
|
234 |
def play_segment(evt: gr.SelectData, raw_ts_list, current_audio_path):
|
235 |
+
"""
|
236 |
+
Play a selected segment from the transcription results.
|
237 |
+
|
238 |
+
Parameters:
|
239 |
+
evt (gr.SelectData): Gradio select event containing the index of selected segment
|
240 |
+
raw_ts_list (list): List of [start, end] timestamps for all segments
|
241 |
+
current_audio_path (str): Path to the current audio file being processed
|
242 |
+
|
243 |
+
Returns:
|
244 |
+
gr.Audio: Gradio Audio component containing the selected segment for playback
|
245 |
+
|
246 |
+
Notes:
|
247 |
+
- Extracts and plays the audio segment corresponding to the selected transcription
|
248 |
+
- Returns None if segment extraction fails or inputs are invalid
|
249 |
+
"""
|
250 |
if not isinstance(raw_ts_list, list):
|
251 |
print(f"Warning: raw_ts_list is not a list ({type(raw_ts_list)}). Cannot play segment.")
|
252 |
return gr.Audio(value=None, label="Selected Segment")
|