import gradio as gr from langchain.prompts import PromptTemplate from langchain_huggingface import HuggingFacePipeline from transformers import pipeline from bs4 import BeautifulSoup import requests from TTS.api import TTS import tempfile import os # Setup summarization LLM summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1) llm = HuggingFacePipeline(pipeline=summary_pipe) # Prompt for more engaging summary summary_prompt = PromptTemplate.from_template(""" Summarize the following article content in a clear, warm, and motivational tone like a preacher speaking to an audience: {text} Summary: """) summary_chain = summary_prompt | llm # TTS model setup tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False) SPEAKER_LIST = tts_model.speakers DEFAULT_SPEAKER = SPEAKER_LIST[0] if SPEAKER_LIST else None def extract_main_content(url): try: response = requests.get(url, timeout=10) soup = BeautifulSoup(response.content, "html.parser") for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]): tag.decompose() paragraphs = soup.find_all("p") content = "\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 60]) return content.strip() except Exception as e: return f"Error extracting article content: {str(e)}" def generate_human_like_audio(text, speaker): try: # Save in tmp directory accessible by Gradio base = tempfile.gettempdir() wav_path = os.path.join(base, "summary.wav") mp3_path = os.path.join(base, "summary.mp3") tts_model.tts_to_file(text=text, speaker=speaker, file_path=wav_path) os.system(f"ffmpeg -y -i {wav_path} -codec:a libmp3lame -qscale:a 4 {mp3_path}") return wav_path if os.path.exists(wav_path) else None, mp3_path if os.path.exists(mp3_path) else None except Exception as e: print(f"TTS ERROR: {e}") return None, None def url_to_audio_summary(url, speaker): article_text = extract_main_content(url) if article_text.startswith("Error"): return article_text, None, None # Truncate if len(article_text) > 1500: article_text = article_text[:1500] + "..." summary = (summary_chain.invoke if hasattr(summary_chain, 'invoke') else summary_chain)({"text": article_text}) summary = summary.get("text") if isinstance(summary, dict) else summary wav_path, mp3_path = generate_human_like_audio(summary, speaker) return summary, wav_path, mp3_path def interface_wrapper(url, speaker): summary, wav_path, mp3_path = url_to_audio_summary(url, speaker) return summary, wav_path, mp3_path iface = gr.Interface( fn=interface_wrapper, inputs=[ gr.Textbox(label="Article URL", placeholder="Paste a news/blog URL here..."), gr.Dropdown(choices=SPEAKER_LIST, value=DEFAULT_SPEAKER, label="Select Speaker") ], outputs=[ gr.Textbox(label="Summary"), gr.Audio(label="Preacher-style Audio Summary", type="filepath"), gr.File(label="Download MP3") ], title="Preaching-Style URL to Audio Agent", description="Summarizes article content and reads it aloud in a warm, preacher-style voice using YourTTS. CPU-only." ) if __name__ == "__main__": iface.launch()