openfree commited on
Commit
fe788c4
·
verified ·
1 Parent(s): f2f0f20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -1
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import os
3
  import asyncio
@@ -55,7 +56,12 @@ except:
55
 
56
  # MeloTTS imports (for local mode)
57
  try:
58
- os.system("python -m unidic download")
 
 
 
 
 
59
  from melo.api import TTS as MeloTTS
60
  MELO_AVAILABLE = True
61
  except:
@@ -92,6 +98,7 @@ class UnifiedAudioConverter:
92
  """Initialize API mode with Together API (now fallback)"""
93
  self.llm_client = OpenAI(api_key=api_key, base_url="https://api.together.xyz/v1")
94
 
 
95
  def initialize_local_mode(self):
96
  """Initialize new local mode with Llama CPP"""
97
  if not LLAMA_CPP_AVAILABLE:
@@ -126,6 +133,7 @@ class UnifiedAudioConverter:
126
  print(f"Failed to initialize local LLM: {e}")
127
  raise RuntimeError(f"Failed to initialize local LLM: {e}")
128
 
 
129
  def initialize_legacy_local_mode(self):
130
  """Initialize legacy local mode with Hugging Face model (fallback)"""
131
  if self.legacy_local_model is None:
@@ -168,6 +176,7 @@ class UnifiedAudioConverter:
168
  if not os.path.exists("cli/inference.py"):
169
  print("Warning: Spark-TTS CLI not found. Please clone the Spark-TTS repository.")
170
 
 
171
  def initialize_melo_tts(self):
172
  """Initialize MeloTTS models"""
173
  if MELO_AVAILABLE and self.melo_models is None:
@@ -240,6 +249,7 @@ class UnifiedAudioConverter:
240
  {"role": "user", "content": self._build_prompt(text, language)}
241
  ]
242
 
 
243
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
244
  """Extract conversation using new local LLM (primary method)"""
245
  try:
@@ -294,6 +304,7 @@ class UnifiedAudioConverter:
294
  print(f"Local LLM failed: {e}, falling back to legacy local method")
295
  return self.extract_conversation_legacy_local(text, language, progress)
296
 
 
297
  def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None) -> Dict:
298
  """Extract conversation using legacy local model (fallback)"""
299
  try:
@@ -465,6 +476,7 @@ class UnifiedAudioConverter:
465
 
466
  return tmp_path
467
 
 
468
  def text_to_speech_spark(self, conversation_json: Dict, language: str = "English", progress=None) -> Tuple[str, str]:
469
  """Convert text to speech using Spark TTS CLI"""
470
  if not SPARK_AVAILABLE or not self.spark_model_dir:
@@ -557,6 +569,7 @@ class UnifiedAudioConverter:
557
  except Exception as e:
558
  raise RuntimeError(f"Failed to convert text to speech with Spark TTS: {e}")
559
 
 
560
  def text_to_speech_melo(self, conversation_json: Dict, progress=None) -> Tuple[str, str]:
561
  """Convert text to speech using MeloTTS"""
562
  if not MELO_AVAILABLE or not self.melo_models:
@@ -751,6 +764,19 @@ def update_tts_engine_for_korean(language):
751
  )
752
 
753
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
  # Gradio Interface
755
  with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
756
  gr.Markdown("# 🎙️ URL to Podcast Converter")
 
1
+ import spaces # 추가
2
  import gradio as gr
3
  import os
4
  import asyncio
 
56
 
57
  # MeloTTS imports (for local mode)
58
  try:
59
+ # unidic 다운로드를 조건부로 처리
60
+ if not os.path.exists("/usr/local/lib/python3.10/site-packages/unidic"):
61
+ try:
62
+ os.system("python -m unidic download")
63
+ except:
64
+ pass
65
  from melo.api import TTS as MeloTTS
66
  MELO_AVAILABLE = True
67
  except:
 
98
  """Initialize API mode with Together API (now fallback)"""
99
  self.llm_client = OpenAI(api_key=api_key, base_url="https://api.together.xyz/v1")
100
 
101
+ @spaces.GPU(duration=120)
102
  def initialize_local_mode(self):
103
  """Initialize new local mode with Llama CPP"""
104
  if not LLAMA_CPP_AVAILABLE:
 
133
  print(f"Failed to initialize local LLM: {e}")
134
  raise RuntimeError(f"Failed to initialize local LLM: {e}")
135
 
136
+ @spaces.GPU(duration=60)
137
  def initialize_legacy_local_mode(self):
138
  """Initialize legacy local mode with Hugging Face model (fallback)"""
139
  if self.legacy_local_model is None:
 
176
  if not os.path.exists("cli/inference.py"):
177
  print("Warning: Spark-TTS CLI not found. Please clone the Spark-TTS repository.")
178
 
179
+ @spaces.GPU(duration=60)
180
  def initialize_melo_tts(self):
181
  """Initialize MeloTTS models"""
182
  if MELO_AVAILABLE and self.melo_models is None:
 
249
  {"role": "user", "content": self._build_prompt(text, language)}
250
  ]
251
 
252
+ @spaces.GPU(duration=120)
253
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
254
  """Extract conversation using new local LLM (primary method)"""
255
  try:
 
304
  print(f"Local LLM failed: {e}, falling back to legacy local method")
305
  return self.extract_conversation_legacy_local(text, language, progress)
306
 
307
+ @spaces.GPU(duration=120)
308
  def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None) -> Dict:
309
  """Extract conversation using legacy local model (fallback)"""
310
  try:
 
476
 
477
  return tmp_path
478
 
479
+ @spaces.GPU(duration=60)
480
  def text_to_speech_spark(self, conversation_json: Dict, language: str = "English", progress=None) -> Tuple[str, str]:
481
  """Convert text to speech using Spark TTS CLI"""
482
  if not SPARK_AVAILABLE or not self.spark_model_dir:
 
569
  except Exception as e:
570
  raise RuntimeError(f"Failed to convert text to speech with Spark TTS: {e}")
571
 
572
+ @spaces.GPU(duration=60)
573
  def text_to_speech_melo(self, conversation_json: Dict, progress=None) -> Tuple[str, str]:
574
  """Convert text to speech using MeloTTS"""
575
  if not MELO_AVAILABLE or not self.melo_models:
 
764
  )
765
 
766
 
767
+ # 모델 초기화 (앱 시작 시)
768
+ if LLAMA_CPP_AVAILABLE:
769
+ try:
770
+ model_path = hf_hub_download(
771
+ repo_id=converter.config.local_model_repo,
772
+ filename=converter.config.local_model_name,
773
+ local_dir="./models"
774
+ )
775
+ print(f"Model downloaded to: {model_path}")
776
+ except Exception as e:
777
+ print(f"Failed to download model at startup: {e}")
778
+
779
+
780
  # Gradio Interface
781
  with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
782
  gr.Markdown("# 🎙️ URL to Podcast Converter")