gpaasch commited on
Commit
4b8e08c
·
1 Parent(s): 9ae574b

solved ffmpeg-related error

Browse files
Files changed (2) hide show
  1. requirements.txt +3 -1
  2. src/app.py +1 -15
requirements.txt CHANGED
@@ -12,8 +12,10 @@ llama-index-llms-llama-cpp
12
  sentence-transformers>=2.2.0
13
 
14
  # Audio processing
15
- ffmpeg-python
 
16
  librosa>=0.10.1
 
17
 
18
  # System utilities
19
  psutil
 
12
  sentence-transformers>=2.2.0
13
 
14
  # Audio processing
15
+ ffmpeg-python>=0.2.0
16
+ pydub>=0.25.1
17
  librosa>=0.10.1
18
+ soundfile>=0.12.1
19
 
20
  # System utilities
21
  psutil
src/app.py CHANGED
@@ -264,7 +264,7 @@ def process_speech(audio_data, history):
264
  sample_rate = 16000
265
 
266
  # Transcribe with error handling
267
- try:
268
  # Format dictionary correctly with required keys
269
  input_features = {
270
  "raw": audio_array,
@@ -304,9 +304,6 @@ def process_speech(audio_data, history):
304
  })}
305
  ]
306
 
307
- except Exception as e:
308
- print(f"Transcription error: {str(e)}")
309
- return []
310
  else:
311
  print(f"Invalid audio format: {type(audio_data)}")
312
  return []
@@ -530,7 +527,6 @@ with gr.Blocks(
530
  if not audio or not isinstance(audio, tuple):
531
  return ""
532
 
533
- try:
534
  sample_rate, audio_array = audio
535
  features = process_audio(audio_array, sample_rate)
536
 
@@ -543,10 +539,6 @@ with gr.Blocks(
543
  elif isinstance(result, str):
544
  return result.strip()
545
  return ""
546
-
547
- except Exception as e:
548
- print(f"Transcription error: {str(e)}")
549
- return ""
550
 
551
  microphone.stream(
552
  fn=update_live_transcription,
@@ -574,7 +566,6 @@ with gr.Blocks(
574
  if not text:
575
  return history
576
 
577
- try:
578
  # Limit input length
579
  if len(text) > 500:
580
  text = text[:500] + "..."
@@ -600,11 +591,6 @@ with gr.Blocks(
600
  })}
601
  ]
602
 
603
- except Exception as e:
604
- print(f"Text processing error: {str(e)}")
605
- cleanup_memory()
606
- return history
607
-
608
  submit_btn.click(
609
  fn=process_text_input,
610
  inputs=[text_input, chatbot],
 
264
  sample_rate = 16000
265
 
266
  # Transcribe with error handling
267
+
268
  # Format dictionary correctly with required keys
269
  input_features = {
270
  "raw": audio_array,
 
304
  })}
305
  ]
306
 
 
 
 
307
  else:
308
  print(f"Invalid audio format: {type(audio_data)}")
309
  return []
 
527
  if not audio or not isinstance(audio, tuple):
528
  return ""
529
 
 
530
  sample_rate, audio_array = audio
531
  features = process_audio(audio_array, sample_rate)
532
 
 
539
  elif isinstance(result, str):
540
  return result.strip()
541
  return ""
 
 
 
 
542
 
543
  microphone.stream(
544
  fn=update_live_transcription,
 
566
  if not text:
567
  return history
568
 
 
569
  # Limit input length
570
  if len(text) > 500:
571
  text = text[:500] + "..."
 
591
  })}
592
  ]
593
 
 
 
 
 
 
594
  submit_btn.click(
595
  fn=process_text_input,
596
  inputs=[text_input, chatbot],