VoxFactory / docs /diagrams.html
Joseph Pollack
adds docs
a3a3978 unverified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Voxtral ASR Fine-tuning - Architecture Diagrams</title>
<script type="module">
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.esm.min.mjs';
mermaid.initialize({
startOnLoad: true,
theme: 'base',
themeVariables: {
primaryColor: '#e3f2fd',
primaryTextColor: '#1976d2',
primaryBorderColor: '#01579b',
lineColor: '#424242',
secondaryColor: '#fff3e0',
tertiaryColor: '#fce4ec',
background: '#ffffff',
mainBkg: '#ffffff',
secondBkg: '#f5f5f5',
textColor: '#333333'
},
flowchart: {
useMaxWidth: true,
htmlLabels: true,
curve: 'basis'
},
sequence: {
useMaxWidth: true
}
});
</script>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
color: #333;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background: #f8f9fa;
}
.header {
text-align: center;
margin-bottom: 40px;
padding: 20px;
background: white;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.diagram-container {
background: white;
margin: 20px 0;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.diagram-title {
font-size: 1.5em;
font-weight: bold;
margin-bottom: 15px;
color: #1976d2;
border-bottom: 2px solid #e3f2fd;
padding-bottom: 10px;
}
.diagram-description {
margin-bottom: 20px;
color: #666;
font-style: italic;
}
.navigation {
position: fixed;
top: 20px;
right: 20px;
background: white;
padding: 15px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
max-width: 200px;
}
.nav-link {
display: block;
padding: 8px 0;
color: #1976d2;
text-decoration: none;
border-bottom: 1px solid #eee;
}
.nav-link:hover {
color: #01579b;
text-decoration: underline;
}
.nav-link:last-child {
border-bottom: none;
}
.code-toggle {
background: #f5f5f5;
border: 1px solid #ddd;
padding: 10px;
margin: 10px 0;
border-radius: 4px;
cursor: pointer;
font-size: 0.9em;
}
.mermaid-code {
display: none;
background: #f8f9fa;
border: 1px solid #dee2e6;
border-radius: 4px;
padding: 15px;
margin: 10px 0;
font-family: 'Courier New', monospace;
font-size: 0.85em;
white-space: pre-wrap;
overflow-x: auto;
}
.download-btn {
background: #1976d2;
color: white;
border: none;
padding: 8px 16px;
border-radius: 4px;
cursor: pointer;
font-size: 0.9em;
margin: 10px 5px 10px 0;
}
.download-btn:hover {
background: #01579b;
}
@media print {
.navigation, .code-toggle, .download-btn {
display: none;
}
.diagram-container {
break-inside: avoid;
margin: 10px 0;
}
}
</style>
</head>
<body>
<div class="header">
<h1>🎯 Voxtral ASR Fine-tuning</h1>
<h2>Architecture & Workflow Diagrams</h2>
<p>Interactive documentation with Mermaid diagrams</p>
</div>
<nav class="navigation">
<strong>Quick Navigation</strong>
<a href="#overview" class="nav-link">Overview</a>
<a href="#architecture" class="nav-link">Architecture</a>
<a href="#interface" class="nav-link">Interface Workflow</a>
<a href="#training" class="nav-link">Training Pipeline</a>
<a href="#deployment" class="nav-link">Deployment Pipeline</a>
<a href="#dataflow" class="nav-link">Data Flow</a>
</nav>
<div id="overview" class="diagram-container">
<div class="diagram-title">πŸ“‹ Documentation Overview</div>
<div class="diagram-description">
High-level overview of the Voxtral ASR Fine-tuning application and its documentation structure.
</div>
<div class="mermaid">
graph TD
START(["Voxtral ASR Fine-tuning App"]) --> OVERVIEW{Choose Documentation}
OVERVIEW --> ARCH["Architecture Overview"]
OVERVIEW --> WORKFLOW["Interface Workflow"]
OVERVIEW --> TRAINING["Training Pipeline"]
OVERVIEW --> DEPLOYMENT["Deployment Pipeline"]
OVERVIEW --> DATAFLOW["Data Flow"]
ARCH --> ARCH_DIAG["High-level Architecture<br/>System Components & Layers"]
WORKFLOW --> WORKFLOW_DIAG["User Journey<br/>Recording β†’ Training β†’ Demo"]
TRAINING --> TRAINING_DIAG["Training Scripts<br/>Data β†’ Model β†’ Results"]
DEPLOYMENT --> DEPLOYMENT_DIAG["Publishing & Demo<br/>Model β†’ Hub β†’ Space"]
DATAFLOW --> DATAFLOW_DIAG["Complete Data Journey<br/>Input β†’ Processing β†’ Output"]
subgraph "Core Components"
INTERFACE["interface.py<br/>Gradio Web UI"]
TRAIN_SCRIPTS["scripts/train*.py<br/>Training Scripts"]
DEPLOY_SCRIPT["scripts/deploy_demo_space.py<br/>Demo Deployment"]
PUSH_SCRIPT["scripts/push_to_huggingface.py<br/>Model Publishing"]
end
subgraph "Key Data Formats"
JSONL["JSONL Dataset<br/>{'audio_path': '...', 'text': '...'}"]
HFDATA["HF Hub Models<br/>username/model-name"]
SPACES["HF Spaces<br/>Interactive Demos"]
end
INTERFACE --> WORKFLOW
TRAIN_SCRIPTS --> TRAINING
DEPLOY_SCRIPT --> DEPLOYMENT
PUSH_SCRIPT --> DEPLOYMENT
JSONL --> DATAFLOW
HFDATA --> DEPLOYMENT
SPACES --> DEPLOYMENT
classDef entry fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
classDef category fill:#fff3e0,stroke:#f57c00,stroke-width:2px
classDef diagram fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
classDef component fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
classDef data fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
class START entry
class OVERVIEW,ARCH,WORKFLOW,TRAINING,DEPLOYMENT,DATAFLOW category
class ARCH_DIAG,WORKFLOW_DIAG,TRAINING_DIAG,DEPLOYMENT_DIAG,DATAFLOW_DIAG diagram
class INTERFACE,TRAIN_SCRIPTS,DEPLOY_SCRIPT,PUSH_SCRIPT component
class JSONL,HFDATA,SPACES data
</div>
</div>
<div id="architecture" class="diagram-container">
<div class="diagram-title">System Architecture</div>
<div class="diagram-description">
High-level architecture showing the main components and their relationships in the Voxtral ASR Fine-tuning application.
</div>
<div class="mermaid">
graph TB
subgraph "User Interface"
UI["Gradio Web Interface<br/>interface.py"]
REC["Audio Recording<br/>Microphone Input"]
UP["File Upload<br/>WAV/FLAC files"]
end
subgraph "Data Processing"
DP["Data Processing<br/>Audio resampling<br/>JSONL creation"]
DS["Dataset Management<br/>NVIDIA Granary<br/>Local datasets"]
end
subgraph "Training Pipeline"
TF["Full Fine-tuning<br/>scripts/train.py"]
TL["LoRA Fine-tuning<br/>scripts/train_lora.py"]
TI["Trackio Integration<br/>Experiment Tracking"]
end
subgraph "Model Management"
MM["Model Management<br/>Hugging Face Hub<br/>Local storage"]
MC["Model Card Generation<br/>scripts/generate_model_card.py"]
end
subgraph "Deployment &amp; Demo"
DEP["Demo Space Deployment<br/>scripts/deploy_demo_space.py"]
HF["HF Spaces<br/>Interactive Demo"]
end
subgraph "External Services"
HFH["Hugging Face Hub<br/>Models & Datasets"]
GRAN["NVIDIA Granary<br/>Multilingual ASR Dataset"]
TRACK["Trackio Spaces<br/>Experiment Tracking"]
end
UI --> DP
REC --> DP
UP --> DP
DP --> DS
DS --> TF
DS --> TL
TF --> TI
TL --> TI
TF --> MM
TL --> MM
MM --> MC
MM --> DEP
DEP --> HF
DS -.-> HFH
MM -.-> HFH
TI -.-> TRACK
DS -.-> GRAN
classDef interface fill:#e1f5fe,stroke:#01579b,stroke-width:2px
classDef processing fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
classDef training fill:#e8f5e8,stroke:#1b5e20,stroke-width:2px
classDef management fill:#fff3e0,stroke:#e65100,stroke-width:2px
classDef deployment fill:#fce4ec,stroke:#880e4f,stroke-width:2px
classDef external fill:#f5f5f5,stroke:#424242,stroke-width:2px
class UI,REC,UP interface
class DP,DS processing
class TF,TL,TI training
class MM,MC management
class DEP,HF deployment
class HFH,GRAN,TRACK external
</div>
</div>
<div id="interface" class="diagram-container">
<div class="diagram-title">Interface Workflow</div>
<div class="diagram-description">
Complete user journey through the Voxtral ASR Fine-tuning interface, from language selection to demo deployment.
</div>
<div class="mermaid">
flowchart TD
START(["User Opens Interface"]) --> LANG["Language Selection<br/>Choose from 25+ languages"]
LANG --> PHRASES["Load Phrases<br/>From NVIDIA Granary"]
PHRASES --> RECORD["Recording Interface<br/>Display phrases + audio recording"]
RECORD --> |User Records| PROCESS_REC["Process Recordings<br/>Save WAV files + transcripts"]
RECORD --> |Upload Files| PROCESS_UPLOAD["Process Uploads<br/>Handle existing files + transcripts"]
PROCESS_REC --> JSONL["Create JSONL Dataset<br/>{'audio_path': '...', 'text': '...'}"]
PROCESS_UPLOAD --> JSONL
JSONL --> CONFIG["Training Configuration<br/>Model, LoRA/full, hyperparameters"]
CONFIG --> TRAIN["Training Process<br/>Execute train.py or train_lora.py"]
TRAIN --> PUSH["Push to Hub<br/>Model + metadata to HF Hub"]
TRAIN --> CARD["Generate Model Card<br/>Automated documentation"]
PUSH --> DEPLOY["Deploy Demo Space<br/>Interactive demo on HF Spaces"]
DEPLOY --> END(["Demo Ready<br/>Interactive ASR Demo"])
PUSH -.-> END
CARD -.-> END
classDef start fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
classDef process fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
classDef decision fill:#fff3e0,stroke:#f57c00,stroke-width:2px
classDef terminal fill:#e8f5e8,stroke:#388e3c,stroke-width:3px
class START start
class END terminal
class LANG,PHRASES,RECORD,PROCESS_REC,PROCESS_UPLOAD,JSONL,CONFIG,TRAIN,PUSH,CARD,DEPLOY process
</div>
</div>
<div id="training" class="diagram-container">
<div class="diagram-title">Training Pipeline</div>
<div class="diagram-description">
Detailed training pipeline showing how data flows through training scripts and supporting infrastructure.
</div>
<div class="mermaid">
graph TB
subgraph "Data Sources"
JSONL["JSONL Dataset<br/>{'audio_path': '...', 'text': '...'}"]
GRANARY["NVIDIA Granary Dataset<br/>Multilingual ASR Data"]
HFDATA["HF Hub Datasets<br/>Community Datasets"]
end
subgraph "Data Processing"
LOADER["Dataset Loader<br/>_load_jsonl_dataset()"]
CASTER["Audio Casting<br/>16kHz resampling"]
COLLATOR["VoxtralDataCollator<br/>Audio + Text Processing"]
end
subgraph "Training Scripts"
TRAIN_FULL["Full Fine-tuning<br/>scripts/train.py"]
TRAIN_LORA["LoRA Fine-tuning<br/>scripts/train_lora.py"]
subgraph "Training Components"
MODEL_INIT["Model Initialization<br/>VoxtralForConditionalGeneration"]
LORA_CONFIG["LoRA Configuration<br/>LoraConfig + get_peft_model"]
PROCESSOR_INIT["Processor Initialization<br/>VoxtralProcessor"]
end
end
subgraph "Training Infrastructure"
TRACKIO_INIT["Trackio Integration<br/>Experiment Tracking"]
HF_TRAINER["Hugging Face Trainer<br/>TrainingArguments + Trainer"]
TORCH_DEVICE["Torch Device Setup<br/>GPU/CPU Detection"]
end
subgraph "Training Process"
FORWARD_PASS["Forward Pass<br/>Audio Processing + Generation"]
LOSS_CALC["Loss Calculation<br/>Masked Language Modeling"]
BACKWARD_PASS["Backward Pass<br/>Gradient Computation"]
OPTIMIZER_STEP["Optimizer Step<br/>Parameter Updates"]
LOGGING["Metrics Logging<br/>Loss, Perplexity, etc."]
end
subgraph "Model Management"
CHECKPOINT_SAVING["Checkpoint Saving<br/>Model snapshots"]
MODEL_SAVING["Final Model Saving<br/>Processor + Model"]
LOCAL_STORAGE["Local Storage<br/>outputs/ directory"]
end
LOADER --> CASTER
CASTER --> COLLATOR
COLLATOR --> TRAIN_FULL
COLLATOR --> TRAIN_LORA
TRAIN_FULL --> MODEL_INIT
TRAIN_LORA --> MODEL_INIT
TRAIN_LORA --> LORA_CONFIG
MODEL_INIT --> PROCESSOR_INIT
LORA_CONFIG --> PROCESSOR_INIT
PROCESSOR_INIT --> TRACKIO_INIT
PROCESSOR_INIT --> HF_TRAINER
PROCESSOR_INIT --> TORCH_DEVICE
TRACKIO_INIT --> HF_TRAINER
TORCH_DEVICE --> HF_TRAINER
HF_TRAINER --> FORWARD_PASS
FORWARD_PASS --> LOSS_CALC
LOSS_CALC --> BACKWARD_PASS
BACKWARD_PASS --> OPTIMIZER_STEP
OPTIMIZER_STEP --> LOGGING
LOGGING --> CHECKPOINT_SAVING
LOGGING --> TRACKIO_INIT
HF_TRAINER --> MODEL_SAVING
MODEL_SAVING --> LOCAL_STORAGE
JSONL --> LOADER
GRANARY --> LOADER
HFDATA --> LOADER
classDef input fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
classDef processing fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
classDef training fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
classDef infrastructure fill:#fff3e0,stroke:#f57c00,stroke-width:2px
classDef execution fill:#fce4ec,stroke:#c2185b,stroke-width:2px
classDef output fill:#f5f5f5,stroke:#424242,stroke-width:2px
class JSONL,GRANARY,HFDATA input
class LOADER,CASTER,COLLATOR processing
class TRAIN_FULL,TRAIN_LORA,MODEL_INIT,LORA_CONFIG,PROCESSOR_INIT training
class TRACKIO_INIT,HF_TRAINER,TORCH_DEVICE infrastructure
class FORWARD_PASS,LOSS_CALC,BACKWARD_PASS,OPTIMIZER_STEP,LOGGING execution
class CHECKPOINT_SAVING,MODEL_SAVING,LOCAL_STORAGE output
</div>
</div>
<div id="deployment" class="diagram-container">
<div class="diagram-title">Deployment Pipeline</div>
<div class="diagram-description">
Model publishing and demo deployment process from trained model to live interactive demo.
</div>
<div class="mermaid">
graph TB
subgraph "Inputs"
TRAINED_MODEL["Trained Model<br/>Local directory"]
TRAINING_CONFIG["Training Config<br/>JSON/YAML"]
TRAINING_RESULTS["Training Results<br/>Metrics & logs"]
MODEL_METADATA["Model Metadata<br/>Name, description, etc."]
end
subgraph "Model Publishing"
PUSH_SCRIPT["push_to_huggingface.py<br/>Model Publisher"]
subgraph "Publishing Steps"
REPO_CREATION["Repository Creation<br/>HF Hub API"]
FILE_UPLOAD["File Upload<br/>Model files to HF"]
METADATA_UPLOAD["Metadata Upload<br/>Config & results"]
end
end
subgraph "Model Card Generation"
CARD_SCRIPT["generate_model_card.py<br/>Card Generator"]
subgraph "Card Components"
TEMPLATE_LOAD["Template Loading<br/>model_card.md"]
VARIABLE_REPLACEMENT["Variable Replacement<br/>Config injection"]
CONDITIONAL_PROCESSING["Conditional Sections<br/>Quantized models, etc."]
end
end
subgraph "Demo Space Deployment"
DEPLOY_SCRIPT["deploy_demo_space.py<br/>Space Deployer"]
subgraph "Space Setup"
SPACE_CREATION["Space Repository<br/>Create HF Space"]
TEMPLATE_COPY["Template Copying<br/>demo_voxtral/ files"]
ENV_INJECTION["Environment Setup<br/>Model config injection"]
SECRET_SETUP["Secret Configuration<br/>HF_TOKEN, model vars"]
end
end
subgraph "Space Building"
BUILD_TRIGGER[Build Trigger<br/>Automatic build start]
DEPENDENCY_INSTALL[Dependency Installation<br/>requirements.txt]
MODEL_DOWNLOAD[Model Download<br/>From HF Hub]
APP_INITIALIZATION[App Initialization<br/>Gradio app setup]
end
subgraph "Live Demo Space"
GRADIO_INTERFACE[Gradio Interface<br/>Interactive demo]
MODEL_INFERENCE[Model Inference<br/>Real-time ASR]
USER_INTERACTION[User Interaction<br/>Audio upload/playback]
end
subgraph "External Services"
HF_HUB[Hugging Face Hub<br/>Model & Space hosting]
HF_SPACES[HF Spaces Platform<br/>Demo hosting]
end
TRAINED_MODEL --> PUSH_SCRIPT
TRAINING_CONFIG --> PUSH_SCRIPT
TRAINING_RESULTS --> PUSH_SCRIPT
MODEL_METADATA --> PUSH_SCRIPT
PUSH_SCRIPT --> REPO_CREATION
REPO_CREATION --> FILE_UPLOAD
FILE_UPLOAD --> METADATA_UPLOAD
METADATA_UPLOAD --> CARD_SCRIPT
TRAINING_CONFIG --> CARD_SCRIPT
TRAINING_RESULTS --> CARD_SCRIPT
CARD_SCRIPT --> TEMPLATE_LOAD
TEMPLATE_LOAD --> VARIABLE_REPLACEMENT
VARIABLE_REPLACEMENT --> CONDITIONAL_PROCESSING
CONDITIONAL_PROCESSING --> DEPLOY_SCRIPT
METADATA_UPLOAD --> DEPLOY_SCRIPT
DEPLOY_SCRIPT --> SPACE_CREATION
SPACE_CREATION --> TEMPLATE_COPY
TEMPLATE_COPY --> ENV_INJECTION
ENV_INJECTION --> SECRET_SETUP
SECRET_SETUP --> BUILD_TRIGGER
BUILD_TRIGGER --> DEPENDENCY_INSTALL
DEPENDENCY_INSTALL --> MODEL_DOWNLOAD
MODEL_DOWNLOAD --> APP_INITIALIZATION
APP_INITIALIZATION --> GRADIO_INTERFACE
GRADIO_INTERFACE --> MODEL_INFERENCE
MODEL_INFERENCE --> USER_INTERACTION
HF_HUB --> MODEL_DOWNLOAD
HF_SPACES --> GRADIO_INTERFACE
classDef input fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
classDef publishing fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
classDef generation fill:#fff3e0,stroke:#f57c00,stroke-width:2px
classDef deployment fill:#fce4ec,stroke:#c2185b,stroke-width:2px
classDef building fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
classDef demo fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
classDef external fill:#f5f5f5,stroke:#424242,stroke-width:2px
class TRAINED_MODEL,TRAINING_CONFIG,TRAINING_RESULTS,MODEL_METADATA input
class PUSH_SCRIPT,REPO_CREATION,FILE_UPLOAD,METADATA_UPLOAD publishing
class CARD_SCRIPT,TEMPLATE_LOAD,VARIABLE_REPLACEMENT,CONDITIONAL_PROCESSING generation
class DEPLOY_SCRIPT,SPACE_CREATION,TEMPLATE_COPY,ENV_INJECTION,SECRET_SETUP deployment
class BUILD_TRIGGER,DEPENDENCY_INSTALL,MODEL_DOWNLOAD,APP_INITIALIZATION building
class GRADIO_INTERFACE,MODEL_INFERENCE,USER_INTERACTION demo
class HF_HUB,HF_SPACES external
</div>
</div>
<div id="dataflow" class="diagram-container">
<div class="diagram-title">Data Flow</div>
<div class="diagram-description">
Complete data journey through the Voxtral ASR Fine-tuning application from user input to deployed demo.
</div>
<div class="mermaid">
flowchart TD
subgraph "User Input"
MIC["Microphone Recording<br/>Raw audio + timestamps"]
FILE["File Upload<br/>WAV/FLAC files"]
TEXT["Manual Transcripts<br/>Text input"]
LANG["Language Selection<br/>25+ languages"]
end
subgraph "Data Processing"
AUDIO_PROC["Audio Processing<br/>Resampling to 16kHz<br/>Format conversion"]
TEXT_PROC["Text Processing<br/>Transcript validation<br/>Cleaning & formatting"]
JSONL_CONV["JSONL Conversion<br/>{'audio_path': '...', 'text': '...'}"]
end
subgraph "Dataset Storage"
LOCAL_DS["Local Dataset<br/>datasets/voxtral_user/<br/>data.jsonl + wavs/"]
HF_DS["HF Hub Dataset<br/>username/dataset-name<br/>Public sharing"]
end
subgraph "Training Data Pipeline"
DS_LOADER["Dataset Loader<br/>_load_jsonl_dataset()<br/>or load_dataset()"]
AUDIO_CAST["Audio Casting<br/>Audio(sampling_rate=16000)"]
TRAIN_SPLIT["Train Split<br/>train_dataset"]
EVAL_SPLIT["Eval Split<br/>eval_dataset"]
end
subgraph "Model Training"
COLLATOR["VoxtralDataCollator<br/>Audio + Text batching<br/>Prompt construction"]
FORWARD["Forward Pass<br/>Audio β†’ Features β†’ Text"]
LOSS["Loss Calculation<br/>Masked LM loss"]
BACKWARD["Backward Pass<br/>Gradient computation"]
OPTIMIZE["Parameter Updates<br/>LoRA or full fine-tuning"]
end
subgraph "Training Outputs"
MODEL_FILES["Model Files<br/>model.safetensors<br/>config.json<br/>tokenizer.json"]
TRAINING_LOGS["Training Logs<br/>train_results.json<br/>training_config.json<br/>loss curves"]
CHECKPOINTS["Checkpoints<br/>Intermediate models<br/>best model tracking"]
end
subgraph "Publishing Pipeline"
HF_REPO["HF Repository<br/>username/model-name<br/>Model hosting"]
MODEL_CARD["Model Card<br/>README.md<br/>Training details<br/>Usage examples"]
METADATA["Training Metadata<br/>Config + results<br/>Performance metrics"]
end
subgraph "Demo Deployment"
SPACE_REPO["HF Space Repository<br/>username/model-name-demo<br/>Demo hosting"]
DEMO_APP["Demo Application<br/>Gradio interface<br/>Real-time inference"]
ENV_VARS["Environment Config<br/>HF_MODEL_ID<br/>MODEL_NAME<br/>secrets"]
end
MIC --> AUDIO_PROC
FILE --> AUDIO_PROC
TEXT --> TEXT_PROC
LANG --> TEXT_PROC
AUDIO_PROC --> JSONL_CONV
TEXT_PROC --> JSONL_CONV
JSONL_CONV --> LOCAL_DS
LOCAL_DS --> HF_DS
LOCAL_DS --> DS_LOADER
HF_DS --> DS_LOADER
DS_LOADER --> AUDIO_CAST
AUDIO_CAST --> TRAIN_SPLIT
AUDIO_CAST --> EVAL_SPLIT
TRAIN_SPLIT --> COLLATOR
EVAL_SPLIT --> COLLATOR
COLLATOR --> FORWARD
FORWARD --> LOSS
LOSS --> BACKWARD
BACKWARD --> OPTIMIZE
OPTIMIZE --> MODEL_FILES
OPTIMIZE --> TRAINING_LOGS
OPTIMIZE --> CHECKPOINTS
MODEL_FILES --> HF_REPO
TRAINING_LOGS --> HF_REPO
CHECKPOINTS --> HF_REPO
HF_REPO --> MODEL_CARD
TRAINING_LOGS --> MODEL_CARD
MODEL_CARD --> SPACE_REPO
HF_REPO --> SPACE_REPO
ENV_VARS --> SPACE_REPO
SPACE_REPO --> DEMO_APP
classDef input fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
classDef processing fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
classDef storage fill:#fff3e0,stroke:#f57c00,stroke-width:2px
classDef training fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
classDef output fill:#fce4ec,stroke:#c2185b,stroke-width:2px
classDef publishing fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
classDef deployment fill:#f5f5f5,stroke:#424242,stroke-width:2px
class MIC,FILE,TEXT,LANG input
class AUDIO_PROC,TEXT_PROC,JSONL_CONV processing
class LOCAL_DS,HF_DS storage
class DS_LOADER,AUDIO_CAST,TRAIN_SPLIT,EVAL_SPLIT,COLLATOR,FORWARD,LOSS,BACKWARD,OPTIMIZE training
class MODEL_FILES,TRAINING_LOGS,CHECKPOINTS output
class HF_REPO,MODEL_CARD,METADATA publishing
class SPACE_REPO,DEMO_APP,ENV_VARS deployment
</div>
</div>
<script>
// Toggle mermaid code visibility
function toggleCode(diagramId) {
const codeBlock = document.querySelector(`#${diagramId} .mermaid-code`);
if (codeBlock.style.display === 'none' || codeBlock.style.display === '') {
codeBlock.style.display = 'block';
} else {
codeBlock.style.display = 'none';
}
}
// Add toggle buttons to each diagram
document.addEventListener('DOMContentLoaded', function() {
const diagrams = document.querySelectorAll('.diagram-container');
diagrams.forEach((diagram, index) => {
const diagramId = diagram.id;
const mermaidDiv = diagram.querySelector('.mermaid');
if (mermaidDiv) {
// Create toggle button
const toggleBtn = document.createElement('button');
toggleBtn.className = 'code-toggle';
toggleBtn.textContent = 'πŸ” Show Mermaid Code';
toggleBtn.onclick = () => toggleCode(diagramId);
// Create code block
const codeBlock = document.createElement('pre');
codeBlock.className = 'mermaid-code';
codeBlock.textContent = mermaidDiv.textContent.trim();
// Insert elements
mermaidDiv.parentNode.insertBefore(toggleBtn, mermaidDiv);
mermaidDiv.parentNode.insertBefore(codeBlock, mermaidDiv.nextSibling);
}
});
});
// Print functionality
function printDiagrams() {
window.print();
}
</script>
</body>
</html>