Spaces:
Sleeping
Sleeping
File size: 15,043 Bytes
f9d2354 0d6a1a8 c029cee 4804b0a ac98278 04b9905 f9d2354 0d6a1a8 c029cee 761eda7 c029cee ceda09c b920ed0 0d6a1a8 22a6668 0d6a1a8 ceda09c 0d6a1a8 0ea37a5 0d6a1a8 0ea37a5 ceda09c 9712737 0d6a1a8 ceda09c 06b55be 9712737 04b9905 9712737 04b9905 adc5b1d 06b55be 256d7e9 04b9905 adc5b1d 04b9905 5130d14 04b9905 9712737 04b9905 0085043 04b9905 4315eee 04b9905 4b51683 04b9905 0cdf87e 04b9905 0cdf87e 04b9905 0085043 06b55be 04b9905 b920ed0 04b9905 adc5b1d 04b9905 adc5b1d b920ed0 adc5b1d 06b55be adc5b1d 04b9905 b920ed0 010d5fd b920ed0 adc5b1d b920ed0 04b9905 06b55be b920ed0 06b55be adc5b1d 06b55be c337d0c adc5b1d 06b55be a10fd4c d7226ab a10fd4c d7226ab a10fd4c d7226ab a10fd4c d7226ab 06b55be 04b9905 b920ed0 d7226ab 04b9905 d7226ab 04b9905 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 |
import streamlit as st
from asr import transcribe_file
from summarization import summarize_transcript
from podcast import search_podcast_series, fetch_episodes, download_podcast_audio, fetch_audio
from utils import model_names, available_gguf_llms
import base64
import time
from datetime import datetime # β
Correct import
import html as html_lib # Required for text escaping in transcripts
import json # β
Added for passing data to JS
# Session state init
if "transcript" not in st.session_state:
st.session_state.transcript = ""
if "summary" not in st.session_state:
st.session_state.summary = ""
if "status" not in st.session_state:
st.session_state.status = "Ready"
if "audio_path" not in st.session_state:
st.session_state.audio_path = None
if "utterances" not in st.session_state:
st.session_state.utterances = []
if "audio_base64" not in st.session_state:
st.session_state.audio_base64 = None
if "prev_audio_path" not in st.session_state:
st.session_state.prev_audio_path = None
if "transcribing" not in st.session_state:
st.session_state.transcribing = False
st.set_page_config(page_title="ποΈ Moonshine ASR + LLM", layout="wide")
st.title("ποΈ Speech Summarization with Moonshine ASR & LLM")
with st.sidebar:
st.header("βοΈ Settings")
vad_threshold = st.slider("VAD Threshold", 0.1, 0.9, 0.5)
model_name = st.selectbox("Moonshine Model", model_names.keys())
llm_model = st.selectbox("LLM for Summarization", list(available_gguf_llms.keys()))
prompt_input = st.text_area("Custom Prompt", value="Summarize the transcript below.")
tab1, tab2, tab3 = st.tabs(["π» Podcast", "π΅ Audio Input", "π Results"])
with tab1:
st.subheader("Search Podcast")
query = st.text_input("Enter podcast name")
if st.button("Search Series"):
series_list = search_podcast_series(query)
st.session_state.series_list = series_list
if "series_list" in st.session_state:
series_titles = [f"{s['title']} by {s['artist']}" for s in st.session_state.series_list]
selected_title = st.selectbox("Select Series", series_titles)
series = next((s for s in st.session_state.series_list if f"{s['title']} by {s['artist']}" == selected_title), None)
if series:
st.image(series["thumbnail"], width=150)
st.text_area("Series Info", value=f"Title: {series['title']}\nArtist: {series['artist']}\nEpisodes: {series['episode_count']}", disabled=True)
if st.button("Load Episodes"):
episodes = fetch_episodes(series["feed_url"])
st.session_state.episodes = episodes
if "episodes" in st.session_state:
episode_titles = [e["title"] for e in st.session_state.episodes]
selected_episode = st.selectbox("Select Episode", episode_titles)
episode = next((e for e in st.session_state.episodes if e["title"] == selected_episode), None)
if episode:
st.text_area("Episode Info", value=f"Title: {episode['title']}\nPublished: {episode['published']}\nDuration: {episode['duration']}", disabled=True)
if st.button("Download Episode"):
audio_path, status = download_podcast_audio(episode["audio_url"], episode["title"], st.session_state.status)
st.session_state.audio_path = audio_path
st.session_state.status = status
with tab2:
st.subheader("Upload or Fetch Audio")
youtube_url = st.text_input("YouTube URL")
if st.button("Fetch from YouTube"):
audio_path, status = fetch_audio(youtube_url, st.session_state.status)
st.session_state.audio_path = audio_path
st.session_state.audio_base64 = None # β
Clear base64
st.session_state.status = status
uploaded_file = st.file_uploader("Upload Audio", type=["mp3", "wav"])
if uploaded_file:
# FIX: Write to /tmp directory instead of current directory
temp_audio_path = "/tmp/temp_audio.mp3"
with open(temp_audio_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.session_state.audio_path = temp_audio_path
st.session_state.audio_base64 = None # β
Clear base64
with tab3:
if st.session_state.audio_path and st.session_state.get("prev_audio_path") != st.session_state.audio_path:
st.session_state.audio_base64 = None
st.session_state.prev_audio_path = st.session_state.audio_path
st.subheader("π€ Transcription & Summary")
st.markdown("---")
status_placeholder = st.empty()
summary_container = st.container()
# ===== Audio Player and Transcript Logic =====
# If we have an audio path, prepare the base64 encoding
if st.session_state.audio_path and not st.session_state.audio_base64:
try:
with open(st.session_state.audio_path, "rb") as f:
audio_bytes = f.read()
st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
except Exception as e:
st.error(f"Audio loading error: {str(e)}")
def create_interactive_player(audio_base64, utterances):
"""
Generates a single, self-contained HTML component for the audio player
and the interactive transcript.
Why this works:
- All HTML (player, transcript) and JavaScript logic live in the SAME context.
- No more complex, failing postMessage communication between different iframes.
- Highlighting is handled instantly in the browser, not by slow Python reruns.
- Clicking to seek is also instant, as the JS has direct access to the player.
"""
# Pass utterances data to JavaScript safely
utterances_json = json.dumps(utterances)
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Interactive Player</title>
<style>
body {{ font-family: sans-serif; }}
.utterance {{
padding: 10px; margin: 5px 0; border-radius: 8px;
cursor: pointer; transition: all 0.2s ease-in-out;
border: 1px solid #e0e0e0; line-height: 1.6;
}}
.utterance:hover {{
background-color: #f5f5f5;
transform: translateX(4px);
}}
.current-utterance {{
background-color: #fff3e0 !important;
border-left: 5px solid #ff9800;
font-weight: 600;
}}
#transcript-container {{
max-height: 500px;
overflow-y: auto;
padding-right: 10px;
}}
audio {{
width: 100%;
margin-bottom: 20px;
}}
</style>
</head>
<body>
<audio id="audioPlayer" controls>
<source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3">
Your browser does not support the audio element.
</audio>
<div id="transcript-container"></div>
<script>
const player = document.getElementById('audioPlayer');
const transcriptContainer = document.getElementById('transcript-container');
const utterances = {utterances_json};
let currentHighlight = null;
// 1. Function to build the transcript from data
function buildTranscript() {{
utterances.forEach((utt, index) => {{
if (utt.length !== 3) return; // Skip malformed utterances
const [start, end, text] = utt;
const utteranceDiv = document.createElement('div');
utteranceDiv.className = 'utterance';
utteranceDiv.dataset.start = start;
utteranceDiv.dataset.end = end;
utteranceDiv.dataset.index = index;
const startTime = new Date(start * 1000).toISOString().substr(14, 5);
utteranceDiv.innerHTML = `<b>[${{startTime}}]</b> ${{text}}`;
// β
FIX: CLICK TO SEEK
// Add click event listener to seek the audio player
utteranceDiv.addEventListener('click', () => {{
console.log(`Clicked utterance. Seeking to: ${{start}}`);
player.currentTime = start;
player.play();
}});
transcriptContainer.appendChild(utteranceDiv);
}});
}}
// 2. Function to handle highlighting based on audio time
// β
FIX: HIGHLIGHTING AS AUDIO PLAYS
function updateHighlight() {{
const currentTime = player.currentTime;
let activeUtterance = null;
for (const utt of utterances) {{
const [start, end, text] = utt;
if (currentTime >= start && currentTime < end) {{
activeUtterance = utt;
break;
}}
}}
const allUtteranceDivs = document.querySelectorAll('.utterance');
// Find the div corresponding to the active utterance
let activeDiv = null;
if (activeUtterance) {{
activeDiv = transcriptContainer.querySelector(`[data-start="${{activeUtterance[0]}}"]`);
}}
if (activeDiv !== currentHighlight) {{
// Remove highlight from the previous element
if (currentHighlight) {{
currentHighlight.classList.remove('current-utterance');
}}
// Add highlight to the new element
if (activeDiv) {{
activeDiv.classList.add('current-utterance');
// Auto-scroll into view
activeDiv.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
}}
currentHighlight = activeDiv;
}}
}}
// 3. Attach listeners
buildTranscript();
player.addEventListener('timeupdate', updateHighlight);
</script>
</body>
</html>
"""
return html_content
# Placeholder for transcript display (either streaming text or interactive player)
transcript_display = st.empty()
# ===== Transcription Process =====
if st.button("ποΈ Transcribe Audio", key="transcribe_button_tab3"):
if st.session_state.audio_path:
status_placeholder.info("π Transcribing audio... Please wait.")
# Reset previous results
st.session_state.utterances = []
st.session_state.transcript = ""
st.session_state.transcribing = True
# Set up live streaming display
with transcript_display.container():
st.markdown("### π Live Transcript (Streaming)")
live_placeholder = st.empty()
try:
transcription_gen = transcribe_file(
st.session_state.audio_path,
vad_threshold,
model_names[model_name]
)
for _, all_utts in transcription_gen:
st.session_state.utterances = list(all_utts) if all_utts else []
st.session_state.transcript = "\n".join(
f"{text}"
for start, end, text in st.session_state.utterances
)
live_placeholder.markdown(st.session_state.transcript)
st.session_state.transcribing = False
status_placeholder.success("β
Transcription completed! The interactive player is now active.")
st.rerun()
except Exception as e:
status_placeholder.error(f"Transcription error: {str(e)}")
st.session_state.transcribing = False
else:
status_placeholder.warning("β οΈ No audio file available")
# ===== Summarization Process =====
if st.button("π Generate Summary", key="summarize_button_tab3"):
if st.session_state.transcript:
status_placeholder.info("π§ Generating summary...")
st.session_state.summary = ""
summary_container.empty() # Clear old summary
live_summary_area = st.empty()
with live_summary_area.container():
st.markdown("### π Live Summary (In Progress)")
progress_placeholder = st.empty()
summary_gen = summarize_transcript(st.session_state.transcript, llm_model, prompt_input)
# Accumulate the summary in session_state
for accumulated_summary in summary_gen:
st.session_state.summary = accumulated_summary
progress_placeholder.markdown(accumulated_summary)
# Clear the "Live Summary" placeholder
live_summary_area.empty()
else:
status_placeholder.warning("β οΈ No transcript available")
# Display the interactive player if transcription is complete
if st.session_state.get("audio_base64") and st.session_state.get("utterances") and not st.session_state.transcribing:
component_html = create_interactive_player(st.session_state.audio_base64, st.session_state.utterances)
# Calculate a dynamic height for the component
estimated_height = min(600, max(200, len(st.session_state.utterances) * 50 + 100))
with transcript_display.container():
st.components.v1.html(component_html, height=estimated_height, scrolling=True)
elif not st.session_state.utterances and not st.session_state.transcribing:
with transcript_display.container():
st.info("No transcript available. Click 'Transcribe Audio' to generate one.")
# β
THIS BLOCK NOW HANDLES ALL DISPLAYING
# Display the final summary if it exists in the session state
if st.session_state.summary:
with summary_container:
# Title changed for consistency
st.markdown("### π Final Summary")
st.markdown(st.session_state.summary) |