Spaces:
Running
Running
solved ffmpeg-related error
Browse files- requirements.txt +3 -1
- src/app.py +1 -15
requirements.txt
CHANGED
@@ -12,8 +12,10 @@ llama-index-llms-llama-cpp
|
|
12 |
sentence-transformers>=2.2.0
|
13 |
|
14 |
# Audio processing
|
15 |
-
ffmpeg-python
|
|
|
16 |
librosa>=0.10.1
|
|
|
17 |
|
18 |
# System utilities
|
19 |
psutil
|
|
|
12 |
sentence-transformers>=2.2.0
|
13 |
|
14 |
# Audio processing
|
15 |
+
ffmpeg-python>=0.2.0
|
16 |
+
pydub>=0.25.1
|
17 |
librosa>=0.10.1
|
18 |
+
soundfile>=0.12.1
|
19 |
|
20 |
# System utilities
|
21 |
psutil
|
src/app.py
CHANGED
@@ -264,7 +264,7 @@ def process_speech(audio_data, history):
|
|
264 |
sample_rate = 16000
|
265 |
|
266 |
# Transcribe with error handling
|
267 |
-
|
268 |
# Format dictionary correctly with required keys
|
269 |
input_features = {
|
270 |
"raw": audio_array,
|
@@ -304,9 +304,6 @@ def process_speech(audio_data, history):
|
|
304 |
})}
|
305 |
]
|
306 |
|
307 |
-
except Exception as e:
|
308 |
-
print(f"Transcription error: {str(e)}")
|
309 |
-
return []
|
310 |
else:
|
311 |
print(f"Invalid audio format: {type(audio_data)}")
|
312 |
return []
|
@@ -530,7 +527,6 @@ with gr.Blocks(
|
|
530 |
if not audio or not isinstance(audio, tuple):
|
531 |
return ""
|
532 |
|
533 |
-
try:
|
534 |
sample_rate, audio_array = audio
|
535 |
features = process_audio(audio_array, sample_rate)
|
536 |
|
@@ -543,10 +539,6 @@ with gr.Blocks(
|
|
543 |
elif isinstance(result, str):
|
544 |
return result.strip()
|
545 |
return ""
|
546 |
-
|
547 |
-
except Exception as e:
|
548 |
-
print(f"Transcription error: {str(e)}")
|
549 |
-
return ""
|
550 |
|
551 |
microphone.stream(
|
552 |
fn=update_live_transcription,
|
@@ -574,7 +566,6 @@ with gr.Blocks(
|
|
574 |
if not text:
|
575 |
return history
|
576 |
|
577 |
-
try:
|
578 |
# Limit input length
|
579 |
if len(text) > 500:
|
580 |
text = text[:500] + "..."
|
@@ -600,11 +591,6 @@ with gr.Blocks(
|
|
600 |
})}
|
601 |
]
|
602 |
|
603 |
-
except Exception as e:
|
604 |
-
print(f"Text processing error: {str(e)}")
|
605 |
-
cleanup_memory()
|
606 |
-
return history
|
607 |
-
|
608 |
submit_btn.click(
|
609 |
fn=process_text_input,
|
610 |
inputs=[text_input, chatbot],
|
|
|
264 |
sample_rate = 16000
|
265 |
|
266 |
# Transcribe with error handling
|
267 |
+
|
268 |
# Format dictionary correctly with required keys
|
269 |
input_features = {
|
270 |
"raw": audio_array,
|
|
|
304 |
})}
|
305 |
]
|
306 |
|
|
|
|
|
|
|
307 |
else:
|
308 |
print(f"Invalid audio format: {type(audio_data)}")
|
309 |
return []
|
|
|
527 |
if not audio or not isinstance(audio, tuple):
|
528 |
return ""
|
529 |
|
|
|
530 |
sample_rate, audio_array = audio
|
531 |
features = process_audio(audio_array, sample_rate)
|
532 |
|
|
|
539 |
elif isinstance(result, str):
|
540 |
return result.strip()
|
541 |
return ""
|
|
|
|
|
|
|
|
|
542 |
|
543 |
microphone.stream(
|
544 |
fn=update_live_transcription,
|
|
|
566 |
if not text:
|
567 |
return history
|
568 |
|
|
|
569 |
# Limit input length
|
570 |
if len(text) > 500:
|
571 |
text = text[:500] + "..."
|
|
|
591 |
})}
|
592 |
]
|
593 |
|
|
|
|
|
|
|
|
|
|
|
594 |
submit_btn.click(
|
595 |
fn=process_text_input,
|
596 |
inputs=[text_input, chatbot],
|