baoyin2024 commited on
Commit
3215c20
·
verified ·
1 Parent(s): 0666a2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -41
app.py CHANGED
@@ -10,14 +10,15 @@ from werkzeug.utils import secure_filename
10
  import tempfile
11
  from moviepy.editor import VideoFileClip
12
  import logging
13
- import torchaudio # Import torchaudio
 
14
 
15
- # Configure logging
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
 
18
  app = Flask(__name__)
19
 
20
- # Configuration
21
  MAX_CONCURRENT_REQUESTS = 2
22
  MAX_FILE_DURATION = 60 * 30
23
  TEMPORARY_FOLDER = tempfile.gettempdir()
@@ -25,15 +26,15 @@ ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'o
25
  ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
26
  ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
27
 
28
- API_KEY = os.environ.get("API_KEY") # Load API key from environment
29
- MODEL_NAME = os.environ.get("WHISPER_MODEL", "guillaumekln/faster-whisper-large-v2") # Configurable model
30
 
31
- # Device check for faster-whisper
32
  device = "cuda" if torch.cuda.is_available() else "cpu"
33
  compute_type = "float16" if device == "cuda" else "int8"
34
- logging.info(f"Using device: {device} with compute_type: {compute_type}")
35
 
36
- # Faster Whisper setup
37
  beamsize = 2
38
  try:
39
  wmodel = WhisperModel(
@@ -42,12 +43,12 @@ try:
42
  compute_type=compute_type,
43
  download_root="./model_cache"
44
  )
45
- logging.info(f"Model {MODEL_NAME} loaded successfully.")
46
  except Exception as e:
47
- logging.error(f"Failed to load model {MODEL_NAME}: {e}")
48
  wmodel = None
49
 
50
- # Concurrency control
51
  request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
52
  active_requests = 0
53
 
@@ -70,29 +71,35 @@ def cleanup_temp_files(*file_paths):
70
  try:
71
  if file_path and os.path.exists(file_path):
72
  os.remove(file_path)
73
- logging.info(f"Deleted temporary file: {file_path}")
74
  except Exception as e:
75
- logging.error(f"Error cleaning up temp file {file_path}: {str(e)}")
76
 
77
 
78
  def extract_audio_from_video(video_path, output_audio_path):
79
  try:
80
- video = VideoFileClip(video_path)
 
 
 
 
 
 
81
  if video.duration > MAX_FILE_DURATION:
82
  video.close()
83
- raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
84
- video.audio.write_audiofile(output_audio_path, codec='pcm_s16le') # Specify codec
85
  video.close()
 
86
  return output_audio_path
87
  except Exception as e:
88
- logging.exception("Error extracting audio from video")
89
- raise Exception(f"Failed to extract audio from video: {str(e)}")
90
 
91
 
92
  @app.route("/health", methods=["GET"])
93
  def health_check():
94
  return jsonify({
95
- 'status': 'API is running',
96
  'timestamp': datetime.datetime.now().isoformat(),
97
  'device': device,
98
  'compute_type': compute_type,
@@ -118,10 +125,10 @@ def transcribe():
118
  global active_requests
119
 
120
  if not validate_api_key(request):
121
- return jsonify({'error': 'Invalid API key'}), 401
122
 
123
  if not request_semaphore.acquire(blocking=False):
124
- return jsonify({'error': 'Server busy'}), 503
125
 
126
  active_requests += 1
127
  start_time = time.time()
@@ -130,20 +137,20 @@ def transcribe():
130
 
131
  try:
132
  if wmodel is None:
133
- return jsonify({'error': 'Model failed to load. Check server logs.'}), 500
134
 
135
  if 'file' not in request.files:
136
- return jsonify({'error': 'No file provided'}), 400
137
 
138
  file = request.files['file']
139
  if not (file and allowed_file(file.filename)):
140
- return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
141
 
142
- # Save uploaded file to temporary location
143
  temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
144
  file.save(temp_file_path)
145
 
146
- # Check if file is a video and extract audio if necessary
147
  file_extension = file.filename.rsplit('.', 1)[1].lower()
148
  is_video = file_extension in ALLOWED_VIDEO_EXTENSIONS
149
 
@@ -153,19 +160,32 @@ def transcribe():
153
  transcription_file = temp_audio_path
154
  else:
155
  transcription_file = temp_file_path
156
- # Check audio file duration directly
 
157
  try:
158
- info = torchaudio.info(transcription_file)
159
- duration = info.num_frames / info.sample_rate
 
160
  if duration > MAX_FILE_DURATION:
161
- raise ValueError(f"Audio duration exceeds {MAX_FILE_DURATION} seconds")
162
- except Exception as duration_err:
163
- logging.exception(f"Error getting/checking audio duration for {transcription_file}")
164
- return jsonify({'error': f'Error getting/checking audio duration: {str(duration_err)}'}), 400
165
-
166
-
167
-
168
- # Transcribe the audio file
 
 
 
 
 
 
 
 
 
 
 
169
  segments, _ = wmodel.transcribe(
170
  transcription_file,
171
  beam_size=beamsize,
@@ -182,21 +202,22 @@ def transcribe():
182
  }), 200
183
 
184
  except Exception as e:
185
- logging.exception("Exception during transcription process")
186
  return jsonify({'error': str(e)}), 500
187
 
188
  finally:
189
  cleanup_temp_files(temp_file_path, temp_audio_path)
190
  active_requests -= 1
191
  request_semaphore.release()
192
- print(f"Processed in {time.time() - start_time:.2f}s (Active: {active_requests})")
193
 
194
 
195
  if __name__ == "__main__":
196
- # Create temporary folder if it doesn't exist
197
  if not os.path.exists(TEMPORARY_FOLDER):
198
  os.makedirs(TEMPORARY_FOLDER)
199
- logging.info(f"Created temporary folder: {TEMPORARY_FOLDER}")
200
 
201
  app.run(host="0.0.0.0", port=7860, threaded=True)
202
 
 
 
10
  import tempfile
11
  from moviepy.editor import VideoFileClip
12
  import logging
13
+ import torchaudio
14
+ import ffmpeg # 导入 ffmpeg-python
15
 
16
+ # 配置日志
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
 
19
  app = Flask(__name__)
20
 
21
+ # 配置
22
  MAX_CONCURRENT_REQUESTS = 2
23
  MAX_FILE_DURATION = 60 * 30
24
  TEMPORARY_FOLDER = tempfile.gettempdir()
 
26
  ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
27
  ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
28
 
29
+ API_KEY = os.environ.get("API_KEY")
30
+ MODEL_NAME = os.environ.get("WHISPER_MODEL", "guillaumekln/faster-whisper-large-v2")
31
 
32
+ # 设备检查
33
  device = "cuda" if torch.cuda.is_available() else "cpu"
34
  compute_type = "float16" if device == "cuda" else "int8"
35
+ logging.info(f"使用设备: {device},计算类型: {compute_type}")
36
 
37
+ # Faster Whisper 设置
38
  beamsize = 2
39
  try:
40
  wmodel = WhisperModel(
 
43
  compute_type=compute_type,
44
  download_root="./model_cache"
45
  )
46
+ logging.info(f"模型 {MODEL_NAME} 加载成功.")
47
  except Exception as e:
48
+ logging.error(f"加载模型 {MODEL_NAME} 失败: {e}")
49
  wmodel = None
50
 
51
+ # 并发控制
52
  request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
53
  active_requests = 0
54
 
 
71
  try:
72
  if file_path and os.path.exists(file_path):
73
  os.remove(file_path)
74
+ logging.info(f"删除临时文件: {file_path}")
75
  except Exception as e:
76
+ logging.error(f"删除临时文件 {file_path} 出错: {str(e)}")
77
 
78
 
79
  def extract_audio_from_video(video_path, output_audio_path):
80
  try:
81
+ # 使用 ffmpeg-python 调用 FFmpeg
82
+ ffmpeg.input(video_path).output(output_audio_path, acodec='pcm_s16le').run(capture_stdout=True, capture_stderr=True)
83
+ # or use with more options:
84
+ # ffmpeg.input(video_path).output(output_audio_path, acodec='pcm_s16le', ar=44100, ac=2).run(capture_stdout=True, capture_stderr=True)
85
+
86
+ # 检查视频时长
87
+ video = VideoFileClip(video_path) # moviepy
88
  if video.duration > MAX_FILE_DURATION:
89
  video.close()
90
+ raise ValueError(f"视频时长超过 {MAX_FILE_DURATION} ")
 
91
  video.close()
92
+
93
  return output_audio_path
94
  except Exception as e:
95
+ logging.exception("提取视频中的音频出错")
96
+ raise Exception(f"提取视频中的音频出错: {str(e)}")
97
 
98
 
99
  @app.route("/health", methods=["GET"])
100
  def health_check():
101
  return jsonify({
102
+ 'status': 'API 正在运行',
103
  'timestamp': datetime.datetime.now().isoformat(),
104
  'device': device,
105
  'compute_type': compute_type,
 
125
  global active_requests
126
 
127
  if not validate_api_key(request):
128
+ return jsonify({'error': '无效的 API 密钥'}), 401
129
 
130
  if not request_semaphore.acquire(blocking=False):
131
+ return jsonify({'error': '服务器繁忙'}), 503
132
 
133
  active_requests += 1
134
  start_time = time.time()
 
137
 
138
  try:
139
  if wmodel is None:
140
+ return jsonify({'error': '模型加载失败。请检查服务器日志。'}), 500
141
 
142
  if 'file' not in request.files:
143
+ return jsonify({'error': '未提供文件'}), 400
144
 
145
  file = request.files['file']
146
  if not (file and allowed_file(file.filename)):
147
+ return jsonify({'error': f'无效的文件格式。支持:{", ".join(ALLOWED_EXTENSIONS)}'}), 400
148
 
149
+ # 保存上传的文件到临时位置
150
  temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
151
  file.save(temp_file_path)
152
 
153
+ # 检查是否是视频文件,如果是,则提取音频
154
  file_extension = file.filename.rsplit('.', 1)[1].lower()
155
  is_video = file_extension in ALLOWED_VIDEO_EXTENSIONS
156
 
 
160
  transcription_file = temp_audio_path
161
  else:
162
  transcription_file = temp_file_path
163
+
164
+ # 检查音频文件时长
165
  try:
166
+ # 使用 torchaudio.load 加载音频,并指定格式
167
+ waveform, sample_rate = torchaudio.load(transcription_file, format=file_extension)
168
+ duration = waveform.size(1) / sample_rate
169
  if duration > MAX_FILE_DURATION:
170
+ raise ValueError(f"音频时长超过 {MAX_FILE_DURATION} ")
171
+ except Exception as load_err:
172
+ logging.exception(f"使用 torchaudio.load 加载音频文件出错: {transcription_file}")
173
+ try:
174
+ # 尝试使用 soundfile 后端加载 (禁用 sox_io)
175
+ torchaudio.set_audio_backend("soundfile") # 强制使用 soundfile 后端
176
+ waveform, sample_rate = torchaudio.load(transcription_file) # 不要指定文件扩展名
177
+ duration = waveform.size(1) / sample_rate
178
+ if duration > MAX_FILE_DURATION:
179
+ raise ValueError(f"音频时长超过 {MAX_FILE_DURATION} 秒")
180
+
181
+ except Exception as soundfile_err:
182
+ logging.exception(f"使用 soundfile 后端加载音频文件出错: {transcription_file}")
183
+ return jsonify({'error': f'使用两个后端加载音频文件都出错: {str(soundfile_err)}'}), 400
184
+
185
+ finally:
186
+ torchaudio.set_audio_backend("default") # 恢复默认音频后端
187
+
188
+ # 转录音频文件
189
  segments, _ = wmodel.transcribe(
190
  transcription_file,
191
  beam_size=beamsize,
 
202
  }), 200
203
 
204
  except Exception as e:
205
+ logging.exception("转录过程中发生异常")
206
  return jsonify({'error': str(e)}), 500
207
 
208
  finally:
209
  cleanup_temp_files(temp_file_path, temp_audio_path)
210
  active_requests -= 1
211
  request_semaphore.release()
212
+ print(f"处理时间:{time.time() - start_time:.2f}s (活动请求:{active_requests})")
213
 
214
 
215
  if __name__ == "__main__":
216
+ # 创建临时文件夹(如果不存在)
217
  if not os.path.exists(TEMPORARY_FOLDER):
218
  os.makedirs(TEMPORARY_FOLDER)
219
+ logging.info(f"创建临时文件夹: {TEMPORARY_FOLDER}")
220
 
221
  app.run(host="0.0.0.0", port=7860, threaded=True)
222
 
223
+