import streamlit as st from asr import transcribe_file from summarization import summarize_transcript from podcast import search_podcast_series, fetch_episodes, download_podcast_audio, fetch_audio from utils import model_names, available_gguf_llms import base64 import time from datetime import datetime # ✅ Correct import import html as html_lib # Required for text escaping in transcripts import json # ✅ Added for passing data to JS # Session state init if "transcript" not in st.session_state: st.session_state.transcript = "" if "summary" not in st.session_state: st.session_state.summary = "" if "status" not in st.session_state: st.session_state.status = "Ready" if "audio_path" not in st.session_state: st.session_state.audio_path = None if "utterances" not in st.session_state: st.session_state.utterances = [] if "audio_base64" not in st.session_state: st.session_state.audio_base64 = None if "prev_audio_path" not in st.session_state: st.session_state.prev_audio_path = None if "transcribing" not in st.session_state: st.session_state.transcribing = False st.set_page_config(page_title="🎙️ Moonshine ASR + LLM", layout="wide") st.title("🎙️ Speech Summarization with Moonshine ASR & LLM") with st.sidebar: st.header("⚙️ Settings") vad_threshold = st.slider("VAD Threshold", 0.1, 0.9, 0.5) model_name = st.selectbox("Moonshine Model", model_names.keys()) llm_model = st.selectbox("LLM for Summarization", list(available_gguf_llms.keys())) prompt_input = st.text_area("Custom Prompt", value="Summarize the transcript below.") tab1, tab2, tab3 = st.tabs(["📻 Podcast", "🎵 Audio Input", "📄 Results"]) with tab1: st.subheader("Search Podcast") query = st.text_input("Enter podcast name") if st.button("Search Series"): series_list = search_podcast_series(query) st.session_state.series_list = series_list if "series_list" in st.session_state: series_titles = [f"{s['title']} by {s['artist']}" for s in st.session_state.series_list] selected_title = st.selectbox("Select Series", series_titles) series = next((s for s in st.session_state.series_list if f"{s['title']} by {s['artist']}" == selected_title), None) if series: st.image(series["thumbnail"], width=150) st.text_area("Series Info", value=f"Title: {series['title']}\nArtist: {series['artist']}\nEpisodes: {series['episode_count']}", disabled=True) if st.button("Load Episodes"): episodes = fetch_episodes(series["feed_url"]) st.session_state.episodes = episodes if "episodes" in st.session_state: episode_titles = [e["title"] for e in st.session_state.episodes] selected_episode = st.selectbox("Select Episode", episode_titles) episode = next((e for e in st.session_state.episodes if e["title"] == selected_episode), None) if episode: st.text_area("Episode Info", value=f"Title: {episode['title']}\nPublished: {episode['published']}\nDuration: {episode['duration']}", disabled=True) if st.button("Download Episode"): audio_path, status = download_podcast_audio(episode["audio_url"], episode["title"], st.session_state.status) st.session_state.audio_path = audio_path st.session_state.status = status with tab2: st.subheader("Upload or Fetch Audio") youtube_url = st.text_input("YouTube URL") if st.button("Fetch from YouTube"): audio_path, status = fetch_audio(youtube_url, st.session_state.status) st.session_state.audio_path = audio_path st.session_state.audio_base64 = None # ✅ Clear base64 st.session_state.status = status uploaded_file = st.file_uploader("Upload Audio", type=["mp3", "wav"]) if uploaded_file: # FIX: Write to /tmp directory instead of current directory temp_audio_path = "/tmp/temp_audio.mp3" with open(temp_audio_path, "wb") as f: f.write(uploaded_file.getbuffer()) st.session_state.audio_path = temp_audio_path st.session_state.audio_base64 = None # ✅ Clear base64 with tab3: if st.session_state.audio_path and st.session_state.get("prev_audio_path") != st.session_state.audio_path: st.session_state.audio_base64 = None st.session_state.prev_audio_path = st.session_state.audio_path st.subheader("🎤 Transcription & Summary") st.markdown("---") status_placeholder = st.empty() summary_container = st.container() # ===== Audio Player and Transcript Logic ===== # If we have an audio path, prepare the base64 encoding if st.session_state.audio_path and not st.session_state.audio_base64: try: with open(st.session_state.audio_path, "rb") as f: audio_bytes = f.read() st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') except Exception as e: st.error(f"Audio loading error: {str(e)}") def create_interactive_player(audio_base64, utterances): """ Generates a single, self-contained HTML component for the audio player and the interactive transcript. Why this works: - All HTML (player, transcript) and JavaScript logic live in the SAME context. - No more complex, failing postMessage communication between different iframes. - Highlighting is handled instantly in the browser, not by slow Python reruns. - Clicking to seek is also instant, as the JS has direct access to the player. """ # Pass utterances data to JavaScript safely utterances_json = json.dumps(utterances) html_content = f"""