VoxSum / frontend /index.html
Luigi's picture
feat: add custom audio player with visual timeline
2ba9463
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>VoxSum Studio</title>
<link rel="stylesheet" href="/styles.css" />
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
</head>
<body>
<header class="app-header">
<h1>VoxSum Studio</h1>
<p class="tagline">Transform Audio into Insightful Summaries</p>
<div class="performance-notice">
<small>⚡ Running on limited CPU - operations may take 2-5 minutes for large files</small>
</div>
</header>
<div class="app-shell">
<aside class="sidebar">
<section class="panel">
<h2>ASR Settings</h2>
<label for="backend-select">Backend</label>
<select id="backend-select"></select>
<label for="model-select">Model</label>
<select id="model-select"></select>
<div id="sensevoice-options" class="conditional hidden">
<label for="sensevoice-language">Language</label>
<select id="sensevoice-language">
<option value="auto">Auto</option>
<option value="zh">Chinese</option>
<option value="en">English</option>
<option value="ja">Japanese</option>
<option value="ko">Korean</option>
<option value="yue">Cantonese</option>
</select>
<label>Text Normalization</label>
<div class="radio-group">
<label><input type="radio" name="textnorm" value="withitn" checked /> With ITN</label>
<label><input type="radio" name="textnorm" value="noitn" /> Raw</label>
</div>
</div>
<label for="vad-threshold">VAD Threshold</label>
<input id="vad-threshold" type="range" min="0.1" max="0.9" step="0.05" value="0.5" />
<span id="vad-value" class="hint">0.50</span>
</section>
<section class="panel">
<h2>Diarization</h2>
<label class="toggle">
<input id="diarization-toggle" type="checkbox" /> Enable speaker diarization
</label>
<div id="diarization-settings" class="conditional hidden">
<label for="num-speakers">Number of speakers (-1 = auto)</label>
<input id="num-speakers" type="number" min="-1" max="10" value="-1" />
<label for="cluster-threshold">Cluster threshold</label>
<input id="cluster-threshold" type="range" min="0.1" max="1" step="0.05" value="0.5" />
<span id="cluster-value" class="hint">0.50</span>
</div>
</section>
<section class="panel">
<h2>Language Model</h2>
<label for="llm-select">LLM Model</label>
<select id="llm-select"></select>
</section>
<section class="panel">
<h2>Summarization</h2>
<label for="prompt-input">Custom Prompt</label>
<textarea id="prompt-input" rows="4">Summarize the transcript below.</textarea>
</section>
</aside>
<main class="content">
<nav class="tabs">
<button class="tab active" data-target="podcast-tab">🎙️ Podcast</button>
<button class="tab" data-target="audio-tab">🎵 Audio Input</button>
<button class="tab" data-target="results-tab">📄 Results</button>
</nav>
<section id="podcast-tab" class="tab-panel active">
<div class="panel">
<h2>Search Podcasts</h2>
<div class="form-row">
<input id="podcast-query" type="text" placeholder="Podcast title" />
<button id="podcast-search">Search</button>
</div>
<div class="list-grid">
<section class="list-section">
<header class="list-section-header">
<h3>Podcast Channels</h3>
<p class="list-hint">Pick a show to reveal recent episodes.</p>
</header>
<div id="podcast-results" class="list"></div>
</section>
<section class="list-section">
<header class="list-section-header">
<h3>Episodes</h3>
<p class="list-hint">Episodes for the selected podcast appear here.</p>
</header>
<div id="episode-results" class="list"></div>
</section>
</div>
</div>
</section>
<section id="audio-tab" class="tab-panel">
<div class="panel">
<h2>YouTube</h2>
<div class="form-row">
<input id="youtube-url" type="url" placeholder="https://youtube.com/..." />
<button id="youtube-fetch">Fetch Audio</button>
</div>
</div>
<div class="panel">
<h2>Upload Audio</h2>
<input id="file-input" type="file" accept="audio/*" />
</div>
</section>
<section id="results-tab" class="tab-panel">
<div class="actions">
<button id="transcribe-btn" class="primary">Transcribe Audio</button>
<button id="cancel-transcribe-btn" class="danger hidden">Cancel Transcription</button>
<button id="detect-speaker-names-btn" class="secondary hidden">Detect Speaker Names</button>
<button id="summary-btn" class="secondary">Generate Summary</button>
<button id="cancel-summary-btn" class="danger hidden">Cancel Summary</button>
<span id="status-text" class="status-text">Ready</span>
</div>
<div id="progress-container" class="progress-container hidden">
<div class="progress-bar">
<div id="progress-fill" class="progress-fill"></div>
</div>
</div>
<section class="panel audio-player-panel">
<h2>Audio Player</h2>
<div class="custom-audio-player">
<audio id="audio-player" preload="auto"></audio>
<!-- Custom Controls -->
<div class="player-controls">
<button id="play-pause-btn" class="play-pause-btn" title="Play/Pause">
<span class="play-icon"></span>
<span class="pause-icon hidden"></span>
</button>
<span id="current-time" class="time-display">0:00</span>
<div class="timeline-container">
<canvas id="waveform-canvas" class="waveform-canvas"></canvas>
<div id="timeline-bar" class="timeline-bar">
<div id="timeline-progress" class="timeline-progress"></div>
<div id="timeline-segments" class="timeline-segments"></div>
<div id="timeline-handle" class="timeline-handle"></div>
</div>
</div>
<span id="duration-time" class="time-display">0:00</span>
<div class="volume-control">
<button id="volume-btn" class="volume-btn" title="Mute/Unmute">🔊</button>
<input id="volume-slider" type="range" min="0" max="100" value="100" class="volume-slider" />
</div>
</div>
</div>
</section>
<section class="panel">
<div class="panel-header">
<h2>Transcript</h2>
<span id="utterance-count" class="hint"></span>
</div>
<div id="transcript-container">
<ul id="transcript-list"></ul>
</div>
</section>
<section id="diarization-summary" class="panel hidden">
<h2>Speaker Analysis</h2>
<div id="diarization-metrics"></div>
<div id="speaker-breakdown"></div>
</section>
<section class="panel">
<h2>Document Title</h2>
<div id="title-output" class="title-display"></div>
</section>
<section class="panel">
<h2>Summary</h2>
<div id="summary-output" class="summary"></div>
</section>
<section class="panel">
<h2>Export</h2>
<div class="export-grid">
<div>
<label for="transcript-format">Transcript format</label>
<select id="transcript-format"></select>
</div>
<div>
<label class="toggle">
<input id="include-timestamps" type="checkbox" checked /> Include timestamps
</label>
</div>
<button id="export-transcript">Export Transcript</button>
<div>
<label for="summary-format">Summary format</label>
<select id="summary-format"></select>
</div>
<button id="export-summary">Export Summary</button>
</div>
</section>
</section>
</main>
</div>
<template id="utterance-template">
<li class="utterance-item">
<div class="utterance-header">
<span class="timestamp"></span>
<span class="speaker-tag hidden"></span>
<div class="utterance-actions">
<button class="edit-btn" title="Edit">✏️</button>
</div>
</div>
<div class="utterance-text"></div>
<div class="edit-area hidden">
<textarea rows="3"></textarea>
<div class="edit-controls">
<button class="save-edit">Save</button>
<button class="cancel-edit">Cancel</button>
</div>
</div>
</li>
</template>
<script src="/app.js"></script>
</body>
</html>