Spaces:
Running
on
Zero
Running
on
Zero
Update app-backup.py
Browse files- app-backup.py +103 -23
app-backup.py
CHANGED
@@ -20,6 +20,9 @@ from pathlib import Path
|
|
20 |
from threading import Thread
|
21 |
from dotenv import load_dotenv
|
22 |
|
|
|
|
|
|
|
23 |
# Edge TTS imports
|
24 |
import edge_tts
|
25 |
from pydub import AudioSegment
|
@@ -198,6 +201,35 @@ class UnifiedAudioConverter:
|
|
198 |
except httpx.HTTPError as e:
|
199 |
raise RuntimeError(f"Failed to fetch URL: {e}")
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
def _get_messages_formatter_type(self, model_name):
|
202 |
"""Get appropriate message formatter for the model"""
|
203 |
if "Mistral" in model_name or "BitSix" in model_name:
|
@@ -431,8 +463,9 @@ class UnifiedAudioConverter:
|
|
431 |
# ์ธ์ด๋ณ ์์ฑ ์ค์
|
432 |
if language == "Korean":
|
433 |
voices = [
|
434 |
-
"ko-KR-
|
435 |
-
"ko-KR-
|
|
|
436 |
]
|
437 |
else:
|
438 |
voices = [
|
@@ -489,7 +522,7 @@ class UnifiedAudioConverter:
|
|
489 |
# Create different voice characteristics for different speakers
|
490 |
if language == "Korean":
|
491 |
voice_configs = [
|
492 |
-
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์ ์งํ์์
๋๋ค.", "gender": "
|
493 |
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ๊ฒ์คํธ๋ก ์ฐธ์ฌํ๊ฒ ๋์ด ๊ธฐ์ฉ๋๋ค.", "gender": "male"}
|
494 |
]
|
495 |
else:
|
@@ -643,14 +676,19 @@ class UnifiedAudioConverter:
|
|
643 |
converter = UnifiedAudioConverter(ConversationConfig())
|
644 |
|
645 |
|
646 |
-
async def synthesize(
|
647 |
-
"""Main synthesis function -
|
648 |
-
if not article_url:
|
649 |
-
return "Please provide a valid URL.", None
|
650 |
-
|
651 |
try:
|
652 |
-
#
|
653 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
654 |
|
655 |
# Limit text to max words
|
656 |
words = text.split()
|
@@ -734,9 +772,9 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
|
|
734 |
return f"Error generating audio: {str(e)}", None
|
735 |
|
736 |
|
737 |
-
def synthesize_sync(
|
738 |
"""Synchronous wrapper for async synthesis"""
|
739 |
-
return asyncio.run(synthesize(
|
740 |
|
741 |
|
742 |
def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
|
@@ -764,6 +802,14 @@ def update_tts_engine_for_korean(language):
|
|
764 |
)
|
765 |
|
766 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
767 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
768 |
if LLAMA_CPP_AVAILABLE:
|
769 |
try:
|
@@ -778,9 +824,9 @@ if LLAMA_CPP_AVAILABLE:
|
|
778 |
|
779 |
|
780 |
# Gradio Interface
|
781 |
-
with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
782 |
-
gr.Markdown("# ๐๏ธ URL to Podcast Converter")
|
783 |
-
gr.Markdown("Convert any article, blog, or
|
784 |
|
785 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
786 |
with gr.Row():
|
@@ -793,11 +839,29 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
793 |
|
794 |
with gr.Row():
|
795 |
with gr.Column(scale=3):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
796 |
url_input = gr.Textbox(
|
797 |
label="Article URL",
|
798 |
placeholder="Enter the article URL here...",
|
799 |
-
value=""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
800 |
)
|
|
|
801 |
with gr.Column(scale=1):
|
802 |
# ์ธ์ด ์ ํ ์ถ๊ฐ
|
803 |
language_selector = gr.Radio(
|
@@ -871,16 +935,23 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
871 |
|
872 |
gr.Examples(
|
873 |
examples=[
|
874 |
-
["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
|
875 |
-
["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
|
876 |
-
["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
|
877 |
],
|
878 |
-
inputs=[url_input, mode_selector, tts_selector, language_selector],
|
879 |
outputs=[conversation_output, status_output],
|
880 |
fn=synthesize_sync,
|
881 |
cache_examples=False,
|
882 |
)
|
883 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
884 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
885 |
language_selector.change(
|
886 |
fn=update_tts_engine_for_korean,
|
@@ -888,10 +959,19 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
888 |
outputs=[tts_selector]
|
889 |
)
|
890 |
|
891 |
-
# ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
892 |
convert_btn.click(
|
893 |
-
fn=synthesize_sync
|
894 |
-
|
|
|
|
|
895 |
outputs=[conversation_output, status_output]
|
896 |
)
|
897 |
|
|
|
20 |
from threading import Thread
|
21 |
from dotenv import load_dotenv
|
22 |
|
23 |
+
# PDF processing imports
|
24 |
+
from langchain_community.document_loaders import PyPDFLoader
|
25 |
+
|
26 |
# Edge TTS imports
|
27 |
import edge_tts
|
28 |
from pydub import AudioSegment
|
|
|
201 |
except httpx.HTTPError as e:
|
202 |
raise RuntimeError(f"Failed to fetch URL: {e}")
|
203 |
|
204 |
+
def extract_text_from_pdf(self, pdf_file) -> str:
|
205 |
+
"""Extract text content from PDF file"""
|
206 |
+
try:
|
207 |
+
# Gradio returns file path, not file object
|
208 |
+
if isinstance(pdf_file, str):
|
209 |
+
pdf_path = pdf_file
|
210 |
+
else:
|
211 |
+
# If it's a file object (shouldn't happen with Gradio)
|
212 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
213 |
+
tmp_file.write(pdf_file.read())
|
214 |
+
pdf_path = tmp_file.name
|
215 |
+
|
216 |
+
# PDF ๋ก๋ ๋ฐ ํ
์คํธ ์ถ์ถ
|
217 |
+
loader = PyPDFLoader(pdf_path)
|
218 |
+
pages = loader.load()
|
219 |
+
|
220 |
+
# ๋ชจ๋ ํ์ด์ง์ ํ
์คํธ๋ฅผ ๊ฒฐํฉ
|
221 |
+
text = "\n".join([page.page_content for page in pages])
|
222 |
+
|
223 |
+
# ์์ ํ์ผ์ธ ๊ฒฝ์ฐ ์ญ์
|
224 |
+
if not isinstance(pdf_file, str) and os.path.exists(pdf_path):
|
225 |
+
os.unlink(pdf_path)
|
226 |
+
|
227 |
+
return text
|
228 |
+
except Exception as e:
|
229 |
+
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
def _get_messages_formatter_type(self, model_name):
|
234 |
"""Get appropriate message formatter for the model"""
|
235 |
if "Mistral" in model_name or "BitSix" in model_name:
|
|
|
463 |
# ์ธ์ด๋ณ ์์ฑ ์ค์
|
464 |
if language == "Korean":
|
465 |
voices = [
|
466 |
+
"ko-KR-HyunsuNeural", # ๋จ์ฑ ์์ฑ (์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด)
|
467 |
+
"ko-KR-InJoonNeural" # ๋จ๋จ์ฑ ์์ฑ (์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด)
|
468 |
+
|
469 |
]
|
470 |
else:
|
471 |
voices = [
|
|
|
522 |
# Create different voice characteristics for different speakers
|
523 |
if language == "Korean":
|
524 |
voice_configs = [
|
525 |
+
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์ ์งํ์์
๋๋ค.", "gender": "male"},
|
526 |
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ๊ฒ์คํธ๋ก ์ฐธ์ฌํ๊ฒ ๋์ด ๊ธฐ์ฉ๋๋ค.", "gender": "male"}
|
527 |
]
|
528 |
else:
|
|
|
676 |
converter = UnifiedAudioConverter(ConversationConfig())
|
677 |
|
678 |
|
679 |
+
async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
680 |
+
"""Main synthesis function - handles both URL and PDF inputs"""
|
|
|
|
|
|
|
681 |
try:
|
682 |
+
# Extract text based on input type
|
683 |
+
if input_type == "URL":
|
684 |
+
if not article_input or not isinstance(article_input, str):
|
685 |
+
return "Please provide a valid URL.", None
|
686 |
+
text = converter.fetch_text(article_input)
|
687 |
+
else: # PDF
|
688 |
+
if not article_input:
|
689 |
+
return "Please upload a PDF file.", None
|
690 |
+
# Gradio returns the file path as a string
|
691 |
+
text = converter.extract_text_from_pdf(article_input)
|
692 |
|
693 |
# Limit text to max words
|
694 |
words = text.split()
|
|
|
772 |
return f"Error generating audio: {str(e)}", None
|
773 |
|
774 |
|
775 |
+
def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
776 |
"""Synchronous wrapper for async synthesis"""
|
777 |
+
return asyncio.run(synthesize(article_input, input_type, mode, tts_engine, language))
|
778 |
|
779 |
|
780 |
def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
|
|
|
802 |
)
|
803 |
|
804 |
|
805 |
+
def toggle_input_visibility(input_type):
|
806 |
+
"""Toggle visibility of URL input and file upload based on input type"""
|
807 |
+
if input_type == "URL":
|
808 |
+
return gr.update(visible=True), gr.update(visible=False)
|
809 |
+
else: # PDF
|
810 |
+
return gr.update(visible=False), gr.update(visible=True)
|
811 |
+
|
812 |
+
|
813 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
814 |
if LLAMA_CPP_AVAILABLE:
|
815 |
try:
|
|
|
824 |
|
825 |
|
826 |
# Gradio Interface
|
827 |
+
with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
828 |
+
gr.Markdown("# ๐๏ธ URL/PDF to Podcast Converter")
|
829 |
+
gr.Markdown("Convert any article, blog, news, or PDF document into an engaging podcast conversation!")
|
830 |
|
831 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
832 |
with gr.Row():
|
|
|
839 |
|
840 |
with gr.Row():
|
841 |
with gr.Column(scale=3):
|
842 |
+
# Input type selector
|
843 |
+
input_type_selector = gr.Radio(
|
844 |
+
choices=["URL", "PDF"],
|
845 |
+
value="URL",
|
846 |
+
label="Input Type",
|
847 |
+
info="Choose between URL or PDF file upload"
|
848 |
+
)
|
849 |
+
|
850 |
+
# URL input
|
851 |
url_input = gr.Textbox(
|
852 |
label="Article URL",
|
853 |
placeholder="Enter the article URL here...",
|
854 |
+
value="",
|
855 |
+
visible=True
|
856 |
+
)
|
857 |
+
|
858 |
+
# PDF upload
|
859 |
+
pdf_input = gr.File(
|
860 |
+
label="Upload PDF",
|
861 |
+
file_types=[".pdf"],
|
862 |
+
visible=False
|
863 |
)
|
864 |
+
|
865 |
with gr.Column(scale=1):
|
866 |
# ์ธ์ด ์ ํ ์ถ๊ฐ
|
867 |
language_selector = gr.Radio(
|
|
|
935 |
|
936 |
gr.Examples(
|
937 |
examples=[
|
938 |
+
["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
|
939 |
+
["https://www.bbc.com/news/technology-67988517", "URL", "Local", "Spark-TTS", "English"],
|
940 |
+
["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
|
941 |
],
|
942 |
+
inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
|
943 |
outputs=[conversation_output, status_output],
|
944 |
fn=synthesize_sync,
|
945 |
cache_examples=False,
|
946 |
)
|
947 |
|
948 |
+
# Input type change handler
|
949 |
+
input_type_selector.change(
|
950 |
+
fn=toggle_input_visibility,
|
951 |
+
inputs=[input_type_selector],
|
952 |
+
outputs=[url_input, pdf_input]
|
953 |
+
)
|
954 |
+
|
955 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
956 |
language_selector.change(
|
957 |
fn=update_tts_engine_for_korean,
|
|
|
959 |
outputs=[tts_selector]
|
960 |
)
|
961 |
|
962 |
+
# ์ด๋ฒคํธ ์ฐ๊ฒฐ - ์์ ๋ ๋ถ๋ถ
|
963 |
+
def get_article_input(input_type, url_input, pdf_input):
|
964 |
+
"""Get the appropriate input based on input type"""
|
965 |
+
if input_type == "URL":
|
966 |
+
return url_input
|
967 |
+
else:
|
968 |
+
return pdf_input
|
969 |
+
|
970 |
convert_btn.click(
|
971 |
+
fn=lambda input_type, url_input, pdf_input, mode, tts, lang: synthesize_sync(
|
972 |
+
get_article_input(input_type, url_input, pdf_input), input_type, mode, tts, lang
|
973 |
+
),
|
974 |
+
inputs=[input_type_selector, url_input, pdf_input, mode_selector, tts_selector, language_selector],
|
975 |
outputs=[conversation_output, status_output]
|
976 |
)
|
977 |
|