Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from asr import transcribe_file | |
| from summarization import summarize_transcript | |
| from podcast import search_podcast_series, fetch_episodes, download_podcast_audio, fetch_audio | |
| from utils import model_names, available_gguf_llms | |
| import base64 | |
| import time | |
| from datetime import datetime # β Correct import | |
| import html as html_lib # Required for text escaping in transcripts | |
| import json # β Added for passing data to JS | |
| # Session state init | |
| if "transcript" not in st.session_state: | |
| st.session_state.transcript = "" | |
| if "summary" not in st.session_state: | |
| st.session_state.summary = "" | |
| if "status" not in st.session_state: | |
| st.session_state.status = "Ready" | |
| if "audio_path" not in st.session_state: | |
| st.session_state.audio_path = None | |
| if "utterances" not in st.session_state: | |
| st.session_state.utterances = [] | |
| if "audio_base64" not in st.session_state: | |
| st.session_state.audio_base64 = None | |
| if "prev_audio_path" not in st.session_state: | |
| st.session_state.prev_audio_path = None | |
| if "transcribing" not in st.session_state: | |
| st.session_state.transcribing = False | |
| st.set_page_config(page_title="ποΈ Moonshine ASR + LLM", layout="wide") | |
| st.title("ποΈ Speech Summarization with Moonshine ASR & LLM") | |
| with st.sidebar: | |
| st.header("βοΈ Settings") | |
| vad_threshold = st.slider("VAD Threshold", 0.1, 0.9, 0.5) | |
| model_name = st.selectbox("Moonshine Model", model_names.keys()) | |
| llm_model = st.selectbox("LLM for Summarization", list(available_gguf_llms.keys())) | |
| prompt_input = st.text_area("Custom Prompt", value="Summarize the transcript below.") | |
| tab1, tab2, tab3 = st.tabs(["π» Podcast", "π΅ Audio Input", "π Results"]) | |
| with tab1: | |
| st.subheader("Search Podcast") | |
| query = st.text_input("Enter podcast name") | |
| if st.button("Search Series"): | |
| series_list = search_podcast_series(query) | |
| st.session_state.series_list = series_list | |
| if "series_list" in st.session_state: | |
| series_titles = [f"{s['title']} by {s['artist']}" for s in st.session_state.series_list] | |
| selected_title = st.selectbox("Select Series", series_titles) | |
| series = next((s for s in st.session_state.series_list if f"{s['title']} by {s['artist']}" == selected_title), None) | |
| if series: | |
| st.image(series["thumbnail"], width=150) | |
| st.text_area("Series Info", value=f"Title: {series['title']}\nArtist: {series['artist']}\nEpisodes: {series['episode_count']}", disabled=True) | |
| if st.button("Load Episodes"): | |
| episodes = fetch_episodes(series["feed_url"]) | |
| st.session_state.episodes = episodes | |
| if "episodes" in st.session_state: | |
| episode_titles = [e["title"] for e in st.session_state.episodes] | |
| selected_episode = st.selectbox("Select Episode", episode_titles) | |
| episode = next((e for e in st.session_state.episodes if e["title"] == selected_episode), None) | |
| if episode: | |
| st.text_area("Episode Info", value=f"Title: {episode['title']}\nPublished: {episode['published']}\nDuration: {episode['duration']}", disabled=True) | |
| if st.button("Download Episode"): | |
| audio_path, status = download_podcast_audio(episode["audio_url"], episode["title"], st.session_state.status) | |
| st.session_state.audio_path = audio_path | |
| st.session_state.status = status | |
| with tab2: | |
| st.subheader("Upload or Fetch Audio") | |
| youtube_url = st.text_input("YouTube URL") | |
| if st.button("Fetch from YouTube"): | |
| audio_path, status = fetch_audio(youtube_url, st.session_state.status) | |
| st.session_state.audio_path = audio_path | |
| st.session_state.audio_base64 = None # β Clear base64 | |
| st.session_state.status = status | |
| uploaded_file = st.file_uploader("Upload Audio", type=["mp3", "wav"]) | |
| if uploaded_file: | |
| # FIX: Write to /tmp directory instead of current directory | |
| temp_audio_path = "/tmp/temp_audio.mp3" | |
| with open(temp_audio_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| st.session_state.audio_path = temp_audio_path | |
| st.session_state.audio_base64 = None # β Clear base64 | |
| with tab3: | |
| if st.session_state.audio_path and st.session_state.get("prev_audio_path") != st.session_state.audio_path: | |
| st.session_state.audio_base64 = None | |
| st.session_state.prev_audio_path = st.session_state.audio_path | |
| st.subheader("π€ Transcription & Summary") | |
| st.markdown("---") | |
| status_placeholder = st.empty() | |
| summary_container = st.container() | |
| # ===== Audio Player and Transcript Logic ===== | |
| # If we have an audio path, prepare the base64 encoding | |
| if st.session_state.audio_path and not st.session_state.audio_base64: | |
| try: | |
| with open(st.session_state.audio_path, "rb") as f: | |
| audio_bytes = f.read() | |
| st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') | |
| except Exception as e: | |
| st.error(f"Audio loading error: {str(e)}") | |
| def create_interactive_player(audio_base64, utterances): | |
| """ | |
| Generates a single, self-contained HTML component for the audio player | |
| and the interactive transcript. | |
| Why this works: | |
| - All HTML (player, transcript) and JavaScript logic live in the SAME context. | |
| - No more complex, failing postMessage communication between different iframes. | |
| - Highlighting is handled instantly in the browser, not by slow Python reruns. | |
| - Clicking to seek is also instant, as the JS has direct access to the player. | |
| """ | |
| # Pass utterances data to JavaScript safely | |
| utterances_json = json.dumps(utterances) | |
| html_content = f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>Interactive Player</title> | |
| <style> | |
| body {{ font-family: sans-serif; }} | |
| .utterance {{ | |
| padding: 10px; margin: 5px 0; border-radius: 8px; | |
| cursor: pointer; transition: all 0.2s ease-in-out; | |
| border: 1px solid #e0e0e0; line-height: 1.6; | |
| }} | |
| .utterance:hover {{ | |
| background-color: #f5f5f5; | |
| transform: translateX(4px); | |
| }} | |
| .current-utterance {{ | |
| background-color: #fff3e0 !important; | |
| border-left: 5px solid #ff9800; | |
| font-weight: 600; | |
| }} | |
| #transcript-container {{ | |
| max-height: 500px; | |
| overflow-y: auto; | |
| padding-right: 10px; | |
| }} | |
| audio {{ | |
| width: 100%; | |
| margin-bottom: 20px; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <audio id="audioPlayer" controls> | |
| <source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3"> | |
| Your browser does not support the audio element. | |
| </audio> | |
| <div id="transcript-container"></div> | |
| <script> | |
| const player = document.getElementById('audioPlayer'); | |
| const transcriptContainer = document.getElementById('transcript-container'); | |
| const utterances = {utterances_json}; | |
| let currentHighlight = null; | |
| // 1. Function to build the transcript from data | |
| function buildTranscript() {{ | |
| utterances.forEach((utt, index) => {{ | |
| if (utt.length !== 3) return; // Skip malformed utterances | |
| const [start, end, text] = utt; | |
| const utteranceDiv = document.createElement('div'); | |
| utteranceDiv.className = 'utterance'; | |
| utteranceDiv.dataset.start = start; | |
| utteranceDiv.dataset.end = end; | |
| utteranceDiv.dataset.index = index; | |
| const startTime = new Date(start * 1000).toISOString().substr(14, 5); | |
| utteranceDiv.innerHTML = `<b>[${{startTime}}]</b> ${{text}}`; | |
| // β FIX: CLICK TO SEEK | |
| // Add click event listener to seek the audio player | |
| utteranceDiv.addEventListener('click', () => {{ | |
| console.log(`Clicked utterance. Seeking to: ${{start}}`); | |
| player.currentTime = start; | |
| player.play(); | |
| }}); | |
| transcriptContainer.appendChild(utteranceDiv); | |
| }}); | |
| }} | |
| // 2. Function to handle highlighting based on audio time | |
| // β FIX: HIGHLIGHTING AS AUDIO PLAYS | |
| function updateHighlight() {{ | |
| const currentTime = player.currentTime; | |
| let activeUtterance = null; | |
| for (const utt of utterances) {{ | |
| const [start, end, text] = utt; | |
| if (currentTime >= start && currentTime < end) {{ | |
| activeUtterance = utt; | |
| break; | |
| }} | |
| }} | |
| const allUtteranceDivs = document.querySelectorAll('.utterance'); | |
| // Find the div corresponding to the active utterance | |
| let activeDiv = null; | |
| if (activeUtterance) {{ | |
| activeDiv = transcriptContainer.querySelector(`[data-start="${{activeUtterance[0]}}"]`); | |
| }} | |
| if (activeDiv !== currentHighlight) {{ | |
| // Remove highlight from the previous element | |
| if (currentHighlight) {{ | |
| currentHighlight.classList.remove('current-utterance'); | |
| }} | |
| // Add highlight to the new element | |
| if (activeDiv) {{ | |
| activeDiv.classList.add('current-utterance'); | |
| // Auto-scroll into view | |
| activeDiv.scrollIntoView({{ behavior: 'smooth', block: 'center' }}); | |
| }} | |
| currentHighlight = activeDiv; | |
| }} | |
| }} | |
| // 3. Attach listeners | |
| buildTranscript(); | |
| player.addEventListener('timeupdate', updateHighlight); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| return html_content | |
| # Placeholder for transcript display (either streaming text or interactive player) | |
| transcript_display = st.empty() | |
| # ===== Transcription Process ===== | |
| if st.button("ποΈ Transcribe Audio", key="transcribe_button_tab3"): | |
| if st.session_state.audio_path: | |
| status_placeholder.info("π Transcribing audio... Please wait.") | |
| # Reset previous results | |
| st.session_state.utterances = [] | |
| st.session_state.transcript = "" | |
| st.session_state.transcribing = True | |
| # Set up live streaming display | |
| with transcript_display.container(): | |
| st.markdown("### π Live Transcript (Streaming)") | |
| live_placeholder = st.empty() | |
| try: | |
| transcription_gen = transcribe_file( | |
| st.session_state.audio_path, | |
| vad_threshold, | |
| model_names[model_name] | |
| ) | |
| for _, all_utts in transcription_gen: | |
| st.session_state.utterances = list(all_utts) if all_utts else [] | |
| st.session_state.transcript = "\n".join( | |
| f"{text}" | |
| for start, end, text in st.session_state.utterances | |
| ) | |
| live_placeholder.markdown(st.session_state.transcript) | |
| st.session_state.transcribing = False | |
| status_placeholder.success("β Transcription completed! The interactive player is now active.") | |
| st.rerun() | |
| except Exception as e: | |
| status_placeholder.error(f"Transcription error: {str(e)}") | |
| st.session_state.transcribing = False | |
| else: | |
| status_placeholder.warning("β οΈ No audio file available") | |
| # ===== Summarization Process ===== | |
| if st.button("π Generate Summary", key="summarize_button_tab3"): | |
| if st.session_state.transcript: | |
| status_placeholder.info("π§ Generating summary...") | |
| st.session_state.summary = "" | |
| summary_container.empty() # Clear old summary | |
| live_summary_area = st.empty() | |
| with live_summary_area.container(): | |
| st.markdown("### π Live Summary (In Progress)") | |
| progress_placeholder = st.empty() | |
| summary_gen = summarize_transcript(st.session_state.transcript, llm_model, prompt_input) | |
| # Accumulate the summary in session_state | |
| for accumulated_summary in summary_gen: | |
| st.session_state.summary = accumulated_summary | |
| progress_placeholder.markdown(accumulated_summary) | |
| # Clear the "Live Summary" placeholder | |
| live_summary_area.empty() | |
| else: | |
| status_placeholder.warning("β οΈ No transcript available") | |
| # Display the interactive player if transcription is complete | |
| if st.session_state.get("audio_base64") and st.session_state.get("utterances") and not st.session_state.transcribing: | |
| component_html = create_interactive_player(st.session_state.audio_base64, st.session_state.utterances) | |
| # Calculate a dynamic height for the component | |
| estimated_height = min(600, max(200, len(st.session_state.utterances) * 50 + 100)) | |
| with transcript_display.container(): | |
| st.components.v1.html(component_html, height=estimated_height, scrolling=True) | |
| elif not st.session_state.utterances and not st.session_state.transcribing: | |
| with transcript_display.container(): | |
| st.info("No transcript available. Click 'Transcribe Audio' to generate one.") | |
| # β THIS BLOCK NOW HANDLES ALL DISPLAYING | |
| # Display the final summary if it exists in the session state | |
| if st.session_state.summary: | |
| with summary_container: | |
| # Title changed for consistency | |
| st.markdown("### π Final Summary") | |
| st.markdown(st.session_state.summary) |