openfree commited on
Commit
023a9e4
ยท
verified ยท
1 Parent(s): b25907a

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +103 -23
app-backup.py CHANGED
@@ -20,6 +20,9 @@ from pathlib import Path
20
  from threading import Thread
21
  from dotenv import load_dotenv
22
 
 
 
 
23
  # Edge TTS imports
24
  import edge_tts
25
  from pydub import AudioSegment
@@ -198,6 +201,35 @@ class UnifiedAudioConverter:
198
  except httpx.HTTPError as e:
199
  raise RuntimeError(f"Failed to fetch URL: {e}")
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def _get_messages_formatter_type(self, model_name):
202
  """Get appropriate message formatter for the model"""
203
  if "Mistral" in model_name or "BitSix" in model_name:
@@ -431,8 +463,9 @@ class UnifiedAudioConverter:
431
  # ์–ธ์–ด๋ณ„ ์Œ์„ฑ ์„ค์ •
432
  if language == "Korean":
433
  voices = [
434
- "ko-KR-SunHiNeural", # ์—ฌ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
435
- "ko-KR-HyunsuNeural" # ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
 
436
  ]
437
  else:
438
  voices = [
@@ -489,7 +522,7 @@ class UnifiedAudioConverter:
489
  # Create different voice characteristics for different speakers
490
  if language == "Korean":
491
  voice_configs = [
492
- {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ํŒŸ์บ์ŠคํŠธ ์ง„ํ–‰์„ ๋งก์€ ์ง„ํ–‰์ž์ž…๋‹ˆ๋‹ค.", "gender": "female"},
493
  {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ๊ฒŒ์ŠคํŠธ๋กœ ์ฐธ์—ฌํ•˜๊ฒŒ ๋˜์–ด ๊ธฐ์ฉ๋‹ˆ๋‹ค.", "gender": "male"}
494
  ]
495
  else:
@@ -643,14 +676,19 @@ class UnifiedAudioConverter:
643
  converter = UnifiedAudioConverter(ConversationConfig())
644
 
645
 
646
- async def synthesize(article_url: str, mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
647
- """Main synthesis function - Local is now primary, API is fallback"""
648
- if not article_url:
649
- return "Please provide a valid URL.", None
650
-
651
  try:
652
- # Fetch text from URL
653
- text = converter.fetch_text(article_url)
 
 
 
 
 
 
 
 
654
 
655
  # Limit text to max words
656
  words = text.split()
@@ -734,9 +772,9 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
734
  return f"Error generating audio: {str(e)}", None
735
 
736
 
737
- def synthesize_sync(article_url: str, mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
738
  """Synchronous wrapper for async synthesis"""
739
- return asyncio.run(synthesize(article_url, mode, tts_engine, language))
740
 
741
 
742
  def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
@@ -764,6 +802,14 @@ def update_tts_engine_for_korean(language):
764
  )
765
 
766
 
 
 
 
 
 
 
 
 
767
  # ๋ชจ๋ธ ์ดˆ๊ธฐํ™” (์•ฑ ์‹œ์ž‘ ์‹œ)
768
  if LLAMA_CPP_AVAILABLE:
769
  try:
@@ -778,9 +824,9 @@ if LLAMA_CPP_AVAILABLE:
778
 
779
 
780
  # Gradio Interface
781
- with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
782
- gr.Markdown("# ๐ŸŽ™๏ธ URL to Podcast Converter")
783
- gr.Markdown("Convert any article, blog, or news into an engaging podcast conversation!")
784
 
785
  # ์ƒ๋‹จ์— ๋กœ์ปฌ LLM ์ƒํƒœ ํ‘œ์‹œ
786
  with gr.Row():
@@ -793,11 +839,29 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
793
 
794
  with gr.Row():
795
  with gr.Column(scale=3):
 
 
 
 
 
 
 
 
 
796
  url_input = gr.Textbox(
797
  label="Article URL",
798
  placeholder="Enter the article URL here...",
799
- value=""
 
 
 
 
 
 
 
 
800
  )
 
801
  with gr.Column(scale=1):
802
  # ์–ธ์–ด ์„ ํƒ ์ถ”๊ฐ€
803
  language_selector = gr.Radio(
@@ -871,16 +935,23 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
871
 
872
  gr.Examples(
873
  examples=[
874
- ["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
875
- ["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
876
- ["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
877
  ],
878
- inputs=[url_input, mode_selector, tts_selector, language_selector],
879
  outputs=[conversation_output, status_output],
880
  fn=synthesize_sync,
881
  cache_examples=False,
882
  )
883
 
 
 
 
 
 
 
 
884
  # ์–ธ์–ด ๋ณ€๊ฒฝ ์‹œ TTS ์—”์ง„ ์˜ต์…˜ ์—…๋ฐ์ดํŠธ
885
  language_selector.change(
886
  fn=update_tts_engine_for_korean,
@@ -888,10 +959,19 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
888
  outputs=[tts_selector]
889
  )
890
 
891
- # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
 
 
 
 
 
 
 
892
  convert_btn.click(
893
- fn=synthesize_sync,
894
- inputs=[url_input, mode_selector, tts_selector, language_selector],
 
 
895
  outputs=[conversation_output, status_output]
896
  )
897
 
 
20
  from threading import Thread
21
  from dotenv import load_dotenv
22
 
23
+ # PDF processing imports
24
+ from langchain_community.document_loaders import PyPDFLoader
25
+
26
  # Edge TTS imports
27
  import edge_tts
28
  from pydub import AudioSegment
 
201
  except httpx.HTTPError as e:
202
  raise RuntimeError(f"Failed to fetch URL: {e}")
203
 
204
+ def extract_text_from_pdf(self, pdf_file) -> str:
205
+ """Extract text content from PDF file"""
206
+ try:
207
+ # Gradio returns file path, not file object
208
+ if isinstance(pdf_file, str):
209
+ pdf_path = pdf_file
210
+ else:
211
+ # If it's a file object (shouldn't happen with Gradio)
212
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
213
+ tmp_file.write(pdf_file.read())
214
+ pdf_path = tmp_file.name
215
+
216
+ # PDF ๋กœ๋“œ ๋ฐ ํ…์ŠคํŠธ ์ถ”์ถœ
217
+ loader = PyPDFLoader(pdf_path)
218
+ pages = loader.load()
219
+
220
+ # ๋ชจ๋“  ํŽ˜์ด์ง€์˜ ํ…์ŠคํŠธ๋ฅผ ๊ฒฐํ•ฉ
221
+ text = "\n".join([page.page_content for page in pages])
222
+
223
+ # ์ž„์‹œ ํŒŒ์ผ์ธ ๊ฒฝ์šฐ ์‚ญ์ œ
224
+ if not isinstance(pdf_file, str) and os.path.exists(pdf_path):
225
+ os.unlink(pdf_path)
226
+
227
+ return text
228
+ except Exception as e:
229
+ raise RuntimeError(f"Failed to extract text from PDF: {e}")
230
+
231
+
232
+
233
  def _get_messages_formatter_type(self, model_name):
234
  """Get appropriate message formatter for the model"""
235
  if "Mistral" in model_name or "BitSix" in model_name:
 
463
  # ์–ธ์–ด๋ณ„ ์Œ์„ฑ ์„ค์ •
464
  if language == "Korean":
465
  voices = [
466
+ "ko-KR-HyunsuNeural", # ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
467
+ "ko-KR-InJoonNeural" # ๋‚จ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
468
+
469
  ]
470
  else:
471
  voices = [
 
522
  # Create different voice characteristics for different speakers
523
  if language == "Korean":
524
  voice_configs = [
525
+ {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ํŒŸ์บ์ŠคํŠธ ์ง„ํ–‰์„ ๋งก์€ ์ง„ํ–‰์ž์ž…๋‹ˆ๋‹ค.", "gender": "male"},
526
  {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ๊ฒŒ์ŠคํŠธ๋กœ ์ฐธ์—ฌํ•˜๊ฒŒ ๋˜์–ด ๊ธฐ์ฉ๋‹ˆ๋‹ค.", "gender": "male"}
527
  ]
528
  else:
 
676
  converter = UnifiedAudioConverter(ConversationConfig())
677
 
678
 
679
+ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
680
+ """Main synthesis function - handles both URL and PDF inputs"""
 
 
 
681
  try:
682
+ # Extract text based on input type
683
+ if input_type == "URL":
684
+ if not article_input or not isinstance(article_input, str):
685
+ return "Please provide a valid URL.", None
686
+ text = converter.fetch_text(article_input)
687
+ else: # PDF
688
+ if not article_input:
689
+ return "Please upload a PDF file.", None
690
+ # Gradio returns the file path as a string
691
+ text = converter.extract_text_from_pdf(article_input)
692
 
693
  # Limit text to max words
694
  words = text.split()
 
772
  return f"Error generating audio: {str(e)}", None
773
 
774
 
775
+ def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
776
  """Synchronous wrapper for async synthesis"""
777
+ return asyncio.run(synthesize(article_input, input_type, mode, tts_engine, language))
778
 
779
 
780
  def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
 
802
  )
803
 
804
 
805
+ def toggle_input_visibility(input_type):
806
+ """Toggle visibility of URL input and file upload based on input type"""
807
+ if input_type == "URL":
808
+ return gr.update(visible=True), gr.update(visible=False)
809
+ else: # PDF
810
+ return gr.update(visible=False), gr.update(visible=True)
811
+
812
+
813
  # ๋ชจ๋ธ ์ดˆ๊ธฐํ™” (์•ฑ ์‹œ์ž‘ ์‹œ)
814
  if LLAMA_CPP_AVAILABLE:
815
  try:
 
824
 
825
 
826
  # Gradio Interface
827
+ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
828
+ gr.Markdown("# ๐ŸŽ™๏ธ URL/PDF to Podcast Converter")
829
+ gr.Markdown("Convert any article, blog, news, or PDF document into an engaging podcast conversation!")
830
 
831
  # ์ƒ๋‹จ์— ๋กœ์ปฌ LLM ์ƒํƒœ ํ‘œ์‹œ
832
  with gr.Row():
 
839
 
840
  with gr.Row():
841
  with gr.Column(scale=3):
842
+ # Input type selector
843
+ input_type_selector = gr.Radio(
844
+ choices=["URL", "PDF"],
845
+ value="URL",
846
+ label="Input Type",
847
+ info="Choose between URL or PDF file upload"
848
+ )
849
+
850
+ # URL input
851
  url_input = gr.Textbox(
852
  label="Article URL",
853
  placeholder="Enter the article URL here...",
854
+ value="",
855
+ visible=True
856
+ )
857
+
858
+ # PDF upload
859
+ pdf_input = gr.File(
860
+ label="Upload PDF",
861
+ file_types=[".pdf"],
862
+ visible=False
863
  )
864
+
865
  with gr.Column(scale=1):
866
  # ์–ธ์–ด ์„ ํƒ ์ถ”๊ฐ€
867
  language_selector = gr.Radio(
 
935
 
936
  gr.Examples(
937
  examples=[
938
+ ["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
939
+ ["https://www.bbc.com/news/technology-67988517", "URL", "Local", "Spark-TTS", "English"],
940
+ ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
941
  ],
942
+ inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
943
  outputs=[conversation_output, status_output],
944
  fn=synthesize_sync,
945
  cache_examples=False,
946
  )
947
 
948
+ # Input type change handler
949
+ input_type_selector.change(
950
+ fn=toggle_input_visibility,
951
+ inputs=[input_type_selector],
952
+ outputs=[url_input, pdf_input]
953
+ )
954
+
955
  # ์–ธ์–ด ๋ณ€๊ฒฝ ์‹œ TTS ์—”์ง„ ์˜ต์…˜ ์—…๋ฐ์ดํŠธ
956
  language_selector.change(
957
  fn=update_tts_engine_for_korean,
 
959
  outputs=[tts_selector]
960
  )
961
 
962
+ # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ - ์ˆ˜์ •๋œ ๋ถ€๋ถ„
963
+ def get_article_input(input_type, url_input, pdf_input):
964
+ """Get the appropriate input based on input type"""
965
+ if input_type == "URL":
966
+ return url_input
967
+ else:
968
+ return pdf_input
969
+
970
  convert_btn.click(
971
+ fn=lambda input_type, url_input, pdf_input, mode, tts, lang: synthesize_sync(
972
+ get_article_input(input_type, url_input, pdf_input), input_type, mode, tts, lang
973
+ ),
974
+ inputs=[input_type_selector, url_input, pdf_input, mode_selector, tts_selector, language_selector],
975
  outputs=[conversation_output, status_output]
976
  )
977