Spaces:

Tonic
/

VoxFactory

Running

VoxFactory / docs /diagrams.html

Joseph Pollack

adds docs

a3a3978 unverified 9 days ago

25.7 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Voxtral ASR Fine-tuning - Architecture Diagrams</title>
	<script type="module">
	import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.esm.min.mjs';
	mermaid.initialize({
	startOnLoad: true,
	theme: 'base',
	themeVariables: {
	primaryColor: '#e3f2fd',
	primaryTextColor: '#1976d2',
	primaryBorderColor: '#01579b',
	lineColor: '#424242',
	secondaryColor: '#fff3e0',
	tertiaryColor: '#fce4ec',
	background: '#ffffff',
	mainBkg: '#ffffff',
	secondBkg: '#f5f5f5',
	textColor: '#333333'
	},
	flowchart: {
	useMaxWidth: true,
	htmlLabels: true,
	curve: 'basis'
	},
	sequence: {
	useMaxWidth: true
	}
	});
	</script>
	<style>
	body {
	font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
	line-height: 1.6;
	color: #333;
	max-width: 1200px;
	margin: 0 auto;
	padding: 20px;
	background: #f8f9fa;
	}

	.header {
	text-align: center;
	margin-bottom: 40px;
	padding: 20px;
	background: white;
	border-radius: 8px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}

	.diagram-container {
	background: white;
	margin: 20px 0;
	padding: 20px;
	border-radius: 8px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}

	.diagram-title {
	font-size: 1.5em;
	font-weight: bold;
	margin-bottom: 15px;
	color: #1976d2;
	border-bottom: 2px solid #e3f2fd;
	padding-bottom: 10px;
	}

	.diagram-description {
	margin-bottom: 20px;
	color: #666;
	font-style: italic;
	}

	.navigation {
	position: fixed;
	top: 20px;
	right: 20px;
	background: white;
	padding: 15px;
	border-radius: 8px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	max-width: 200px;
	}

	.nav-link {
	display: block;
	padding: 8px 0;
	color: #1976d2;
	text-decoration: none;
	border-bottom: 1px solid #eee;
	}

	.nav-link:hover {
	color: #01579b;
	text-decoration: underline;
	}

	.nav-link:last-child {
	border-bottom: none;
	}

	.code-toggle {
	background: #f5f5f5;
	border: 1px solid #ddd;
	padding: 10px;
	margin: 10px 0;
	border-radius: 4px;
	cursor: pointer;
	font-size: 0.9em;
	}

	.mermaid-code {
	display: none;
	background: #f8f9fa;
	border: 1px solid #dee2e6;
	border-radius: 4px;
	padding: 15px;
	margin: 10px 0;
	font-family: 'Courier New', monospace;
	font-size: 0.85em;
	white-space: pre-wrap;
	overflow-x: auto;
	}

	.download-btn {
	background: #1976d2;
	color: white;
	border: none;
	padding: 8px 16px;
	border-radius: 4px;
	cursor: pointer;
	font-size: 0.9em;
	margin: 10px 5px 10px 0;
	}

	.download-btn:hover {
	background: #01579b;
	}

	@media print {
	.navigation, .code-toggle, .download-btn {
	display: none;
	}
	.diagram-container {
	break-inside: avoid;
	margin: 10px 0;
	}
	}
	</style>
	</head>
	<body>
	<div class="header">
	<h1>🎯 Voxtral ASR Fine-tuning</h1>
	<h2>Architecture & Workflow Diagrams</h2>
	<p>Interactive documentation with Mermaid diagrams</p>
	</div>

	<nav class="navigation">
	<strong>Quick Navigation</strong>
	<a href="#overview" class="nav-link">Overview</a>
	<a href="#architecture" class="nav-link">Architecture</a>
	<a href="#interface" class="nav-link">Interface Workflow</a>
	<a href="#training" class="nav-link">Training Pipeline</a>
	<a href="#deployment" class="nav-link">Deployment Pipeline</a>
	<a href="#dataflow" class="nav-link">Data Flow</a>
	</nav>

	<div id="overview" class="diagram-container">
	<div class="diagram-title">📋 Documentation Overview</div>
	<div class="diagram-description">
	High-level overview of the Voxtral ASR Fine-tuning application and its documentation structure.
	</div>
	<div class="mermaid">
	graph TD
	START(["Voxtral ASR Fine-tuning App"]) --> OVERVIEW{Choose Documentation}

	OVERVIEW --> ARCH["Architecture Overview"]
	OVERVIEW --> WORKFLOW["Interface Workflow"]
	OVERVIEW --> TRAINING["Training Pipeline"]
	OVERVIEW --> DEPLOYMENT["Deployment Pipeline"]
	OVERVIEW --> DATAFLOW["Data Flow"]

	ARCH --> ARCH_DIAG["High-level Architecture<br/>System Components & Layers"]
	WORKFLOW --> WORKFLOW_DIAG["User Journey<br/>Recording → Training → Demo"]
	TRAINING --> TRAINING_DIAG["Training Scripts<br/>Data → Model → Results"]
	DEPLOYMENT --> DEPLOYMENT_DIAG["Publishing & Demo<br/>Model → Hub → Space"]
	DATAFLOW --> DATAFLOW_DIAG["Complete Data Journey<br/>Input → Processing → Output"]

	subgraph "Core Components"
	INTERFACE["interface.py<br/>Gradio Web UI"]
	TRAIN_SCRIPTS["scripts/train*.py<br/>Training Scripts"]
	DEPLOY_SCRIPT["scripts/deploy_demo_space.py<br/>Demo Deployment"]
	PUSH_SCRIPT["scripts/push_to_huggingface.py<br/>Model Publishing"]
	end

	subgraph "Key Data Formats"
	JSONL["JSONL Dataset<br/>{'audio_path': '...', 'text': '...'}"]
	HFDATA["HF Hub Models<br/>username/model-name"]
	SPACES["HF Spaces<br/>Interactive Demos"]
	end

	INTERFACE --> WORKFLOW
	TRAIN_SCRIPTS --> TRAINING
	DEPLOY_SCRIPT --> DEPLOYMENT
	PUSH_SCRIPT --> DEPLOYMENT

	JSONL --> DATAFLOW
	HFDATA --> DEPLOYMENT
	SPACES --> DEPLOYMENT

	classDef entry fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
	classDef category fill:#fff3e0,stroke:#f57c00,stroke-width:2px
	classDef diagram fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
	classDef component fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
	classDef data fill:#e1f5fe,stroke:#0277bd,stroke-width:2px

	class START entry
	class OVERVIEW,ARCH,WORKFLOW,TRAINING,DEPLOYMENT,DATAFLOW category
	class ARCH_DIAG,WORKFLOW_DIAG,TRAINING_DIAG,DEPLOYMENT_DIAG,DATAFLOW_DIAG diagram
	class INTERFACE,TRAIN_SCRIPTS,DEPLOY_SCRIPT,PUSH_SCRIPT component
	class JSONL,HFDATA,SPACES data
	</div>
	</div>

	<div id="architecture" class="diagram-container">
	<div class="diagram-title">System Architecture</div>
	<div class="diagram-description">
	High-level architecture showing the main components and their relationships in the Voxtral ASR Fine-tuning application.
	</div>
	<div class="mermaid">
	graph TB
	subgraph "User Interface"
	UI["Gradio Web Interface<br/>interface.py"]
	REC["Audio Recording<br/>Microphone Input"]
	UP["File Upload<br/>WAV/FLAC files"]
	end

	subgraph "Data Processing"
	DP["Data Processing<br/>Audio resampling<br/>JSONL creation"]
	DS["Dataset Management<br/>NVIDIA Granary<br/>Local datasets"]
	end

	subgraph "Training Pipeline"
	TF["Full Fine-tuning<br/>scripts/train.py"]
	TL["LoRA Fine-tuning<br/>scripts/train_lora.py"]
	TI["Trackio Integration<br/>Experiment Tracking"]
	end

	subgraph "Model Management"
	MM["Model Management<br/>Hugging Face Hub<br/>Local storage"]
	MC["Model Card Generation<br/>scripts/generate_model_card.py"]
	end

	subgraph "Deployment & Demo"
	DEP["Demo Space Deployment<br/>scripts/deploy_demo_space.py"]
	HF["HF Spaces<br/>Interactive Demo"]
	end

	subgraph "External Services"
	HFH["Hugging Face Hub<br/>Models & Datasets"]
	GRAN["NVIDIA Granary<br/>Multilingual ASR Dataset"]
	TRACK["Trackio Spaces<br/>Experiment Tracking"]
	end

	UI --> DP
	REC --> DP
	UP --> DP
	DP --> DS

	DS --> TF
	DS --> TL
	TF --> TI
	TL --> TI

	TF --> MM
	TL --> MM
	MM --> MC

	MM --> DEP
	DEP --> HF

	DS -.-> HFH
	MM -.-> HFH
	TI -.-> TRACK
	DS -.-> GRAN

	classDef interface fill:#e1f5fe,stroke:#01579b,stroke-width:2px
	classDef processing fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
	classDef training fill:#e8f5e8,stroke:#1b5e20,stroke-width:2px
	classDef management fill:#fff3e0,stroke:#e65100,stroke-width:2px
	classDef deployment fill:#fce4ec,stroke:#880e4f,stroke-width:2px
	classDef external fill:#f5f5f5,stroke:#424242,stroke-width:2px

	class UI,REC,UP interface
	class DP,DS processing
	class TF,TL,TI training
	class MM,MC management
	class DEP,HF deployment
	class HFH,GRAN,TRACK external
	</div>
	</div>

	<div id="interface" class="diagram-container">
	<div class="diagram-title">Interface Workflow</div>
	<div class="diagram-description">
	Complete user journey through the Voxtral ASR Fine-tuning interface, from language selection to demo deployment.
	</div>
	<div class="mermaid">
	flowchart TD
	START(["User Opens Interface"]) --> LANG["Language Selection<br/>Choose from 25+ languages"]
	LANG --> PHRASES["Load Phrases<br/>From NVIDIA Granary"]
	PHRASES --> RECORD["Recording Interface<br/>Display phrases + audio recording"]

	RECORD --> \|User Records\| PROCESS_REC["Process Recordings<br/>Save WAV files + transcripts"]
	RECORD --> \|Upload Files\| PROCESS_UPLOAD["Process Uploads<br/>Handle existing files + transcripts"]

	PROCESS_REC --> JSONL["Create JSONL Dataset<br/>{'audio_path': '...', 'text': '...'}"]
	PROCESS_UPLOAD --> JSONL

	JSONL --> CONFIG["Training Configuration<br/>Model, LoRA/full, hyperparameters"]
	CONFIG --> TRAIN["Training Process<br/>Execute train.py or train_lora.py"]

	TRAIN --> PUSH["Push to Hub<br/>Model + metadata to HF Hub"]
	TRAIN --> CARD["Generate Model Card<br/>Automated documentation"]
	PUSH --> DEPLOY["Deploy Demo Space<br/>Interactive demo on HF Spaces"]

	DEPLOY --> END(["Demo Ready<br/>Interactive ASR Demo"])

	PUSH -.-> END
	CARD -.-> END

	classDef start fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
	classDef process fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
	classDef decision fill:#fff3e0,stroke:#f57c00,stroke-width:2px
	classDef terminal fill:#e8f5e8,stroke:#388e3c,stroke-width:3px

	class START start
	class END terminal
	class LANG,PHRASES,RECORD,PROCESS_REC,PROCESS_UPLOAD,JSONL,CONFIG,TRAIN,PUSH,CARD,DEPLOY process
	</div>
	</div>

	<div id="training" class="diagram-container">
	<div class="diagram-title">Training Pipeline</div>
	<div class="diagram-description">
	Detailed training pipeline showing how data flows through training scripts and supporting infrastructure.
	</div>
	<div class="mermaid">
	graph TB
	subgraph "Data Sources"
	JSONL["JSONL Dataset<br/>{'audio_path': '...', 'text': '...'}"]
	GRANARY["NVIDIA Granary Dataset<br/>Multilingual ASR Data"]
	HFDATA["HF Hub Datasets<br/>Community Datasets"]
	end

	subgraph "Data Processing"
	LOADER["Dataset Loader<br/>_load_jsonl_dataset()"]
	CASTER["Audio Casting<br/>16kHz resampling"]
	COLLATOR["VoxtralDataCollator<br/>Audio + Text Processing"]
	end

	subgraph "Training Scripts"
	TRAIN_FULL["Full Fine-tuning<br/>scripts/train.py"]
	TRAIN_LORA["LoRA Fine-tuning<br/>scripts/train_lora.py"]

	subgraph "Training Components"
	MODEL_INIT["Model Initialization<br/>VoxtralForConditionalGeneration"]
	LORA_CONFIG["LoRA Configuration<br/>LoraConfig + get_peft_model"]
	PROCESSOR_INIT["Processor Initialization<br/>VoxtralProcessor"]
	end
	end

	subgraph "Training Infrastructure"
	TRACKIO_INIT["Trackio Integration<br/>Experiment Tracking"]
	HF_TRAINER["Hugging Face Trainer<br/>TrainingArguments + Trainer"]
	TORCH_DEVICE["Torch Device Setup<br/>GPU/CPU Detection"]
	end

	subgraph "Training Process"
	FORWARD_PASS["Forward Pass<br/>Audio Processing + Generation"]
	LOSS_CALC["Loss Calculation<br/>Masked Language Modeling"]
	BACKWARD_PASS["Backward Pass<br/>Gradient Computation"]
	OPTIMIZER_STEP["Optimizer Step<br/>Parameter Updates"]
	LOGGING["Metrics Logging<br/>Loss, Perplexity, etc."]
	end

	subgraph "Model Management"
	CHECKPOINT_SAVING["Checkpoint Saving<br/>Model snapshots"]
	MODEL_SAVING["Final Model Saving<br/>Processor + Model"]
	LOCAL_STORAGE["Local Storage<br/>outputs/ directory"]
	end

	LOADER --> CASTER
	CASTER --> COLLATOR

	COLLATOR --> TRAIN_FULL
	COLLATOR --> TRAIN_LORA

	TRAIN_FULL --> MODEL_INIT
	TRAIN_LORA --> MODEL_INIT
	TRAIN_LORA --> LORA_CONFIG

	MODEL_INIT --> PROCESSOR_INIT
	LORA_CONFIG --> PROCESSOR_INIT

	PROCESSOR_INIT --> TRACKIO_INIT
	PROCESSOR_INIT --> HF_TRAINER
	PROCESSOR_INIT --> TORCH_DEVICE

	TRACKIO_INIT --> HF_TRAINER
	TORCH_DEVICE --> HF_TRAINER

	HF_TRAINER --> FORWARD_PASS
	FORWARD_PASS --> LOSS_CALC
	LOSS_CALC --> BACKWARD_PASS
	BACKWARD_PASS --> OPTIMIZER_STEP
	OPTIMIZER_STEP --> LOGGING

	LOGGING --> CHECKPOINT_SAVING
	LOGGING --> TRACKIO_INIT

	HF_TRAINER --> MODEL_SAVING
	MODEL_SAVING --> LOCAL_STORAGE

	JSONL --> LOADER
	GRANARY --> LOADER
	HFDATA --> LOADER

	classDef input fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
	classDef processing fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
	classDef training fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
	classDef infrastructure fill:#fff3e0,stroke:#f57c00,stroke-width:2px
	classDef execution fill:#fce4ec,stroke:#c2185b,stroke-width:2px
	classDef output fill:#f5f5f5,stroke:#424242,stroke-width:2px

	class JSONL,GRANARY,HFDATA input
	class LOADER,CASTER,COLLATOR processing
	class TRAIN_FULL,TRAIN_LORA,MODEL_INIT,LORA_CONFIG,PROCESSOR_INIT training
	class TRACKIO_INIT,HF_TRAINER,TORCH_DEVICE infrastructure
	class FORWARD_PASS,LOSS_CALC,BACKWARD_PASS,OPTIMIZER_STEP,LOGGING execution
	class CHECKPOINT_SAVING,MODEL_SAVING,LOCAL_STORAGE output
	</div>
	</div>

	<div id="deployment" class="diagram-container">
	<div class="diagram-title">Deployment Pipeline</div>
	<div class="diagram-description">
	Model publishing and demo deployment process from trained model to live interactive demo.
	</div>
	<div class="mermaid">
	graph TB
	subgraph "Inputs"
	TRAINED_MODEL["Trained Model<br/>Local directory"]
	TRAINING_CONFIG["Training Config<br/>JSON/YAML"]
	TRAINING_RESULTS["Training Results<br/>Metrics & logs"]
	MODEL_METADATA["Model Metadata<br/>Name, description, etc."]
	end

	subgraph "Model Publishing"
	PUSH_SCRIPT["push_to_huggingface.py<br/>Model Publisher"]

	subgraph "Publishing Steps"
	REPO_CREATION["Repository Creation<br/>HF Hub API"]
	FILE_UPLOAD["File Upload<br/>Model files to HF"]
	METADATA_UPLOAD["Metadata Upload<br/>Config & results"]
	end
	end

	subgraph "Model Card Generation"
	CARD_SCRIPT["generate_model_card.py<br/>Card Generator"]

	subgraph "Card Components"
	TEMPLATE_LOAD["Template Loading<br/>model_card.md"]
	VARIABLE_REPLACEMENT["Variable Replacement<br/>Config injection"]
	CONDITIONAL_PROCESSING["Conditional Sections<br/>Quantized models, etc."]
	end
	end

	subgraph "Demo Space Deployment"
	DEPLOY_SCRIPT["deploy_demo_space.py<br/>Space Deployer"]

	subgraph "Space Setup"
	SPACE_CREATION["Space Repository<br/>Create HF Space"]
	TEMPLATE_COPY["Template Copying<br/>demo_voxtral/ files"]
	ENV_INJECTION["Environment Setup<br/>Model config injection"]
	SECRET_SETUP["Secret Configuration<br/>HF_TOKEN, model vars"]
	end
	end

	subgraph "Space Building"
	BUILD_TRIGGER[Build Trigger<br/>Automatic build start]
	DEPENDENCY_INSTALL[Dependency Installation<br/>requirements.txt]
	MODEL_DOWNLOAD[Model Download<br/>From HF Hub]
	APP_INITIALIZATION[App Initialization<br/>Gradio app setup]
	end

	subgraph "Live Demo Space"
	GRADIO_INTERFACE[Gradio Interface<br/>Interactive demo]
	MODEL_INFERENCE[Model Inference<br/>Real-time ASR]
	USER_INTERACTION[User Interaction<br/>Audio upload/playback]
	end

	subgraph "External Services"
	HF_HUB[Hugging Face Hub<br/>Model & Space hosting]
	HF_SPACES[HF Spaces Platform<br/>Demo hosting]
	end

	TRAINED_MODEL --> PUSH_SCRIPT
	TRAINING_CONFIG --> PUSH_SCRIPT
	TRAINING_RESULTS --> PUSH_SCRIPT
	MODEL_METADATA --> PUSH_SCRIPT

	PUSH_SCRIPT --> REPO_CREATION
	REPO_CREATION --> FILE_UPLOAD
	FILE_UPLOAD --> METADATA_UPLOAD

	METADATA_UPLOAD --> CARD_SCRIPT
	TRAINING_CONFIG --> CARD_SCRIPT
	TRAINING_RESULTS --> CARD_SCRIPT

	CARD_SCRIPT --> TEMPLATE_LOAD
	TEMPLATE_LOAD --> VARIABLE_REPLACEMENT
	VARIABLE_REPLACEMENT --> CONDITIONAL_PROCESSING

	CONDITIONAL_PROCESSING --> DEPLOY_SCRIPT
	METADATA_UPLOAD --> DEPLOY_SCRIPT

	DEPLOY_SCRIPT --> SPACE_CREATION
	SPACE_CREATION --> TEMPLATE_COPY
	TEMPLATE_COPY --> ENV_INJECTION
	ENV_INJECTION --> SECRET_SETUP

	SECRET_SETUP --> BUILD_TRIGGER
	BUILD_TRIGGER --> DEPENDENCY_INSTALL
	DEPENDENCY_INSTALL --> MODEL_DOWNLOAD
	MODEL_DOWNLOAD --> APP_INITIALIZATION

	APP_INITIALIZATION --> GRADIO_INTERFACE
	GRADIO_INTERFACE --> MODEL_INFERENCE
	MODEL_INFERENCE --> USER_INTERACTION

	HF_HUB --> MODEL_DOWNLOAD
	HF_SPACES --> GRADIO_INTERFACE

	classDef input fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
	classDef publishing fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
	classDef generation fill:#fff3e0,stroke:#f57c00,stroke-width:2px
	classDef deployment fill:#fce4ec,stroke:#c2185b,stroke-width:2px
	classDef building fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
	classDef demo fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
	classDef external fill:#f5f5f5,stroke:#424242,stroke-width:2px

	class TRAINED_MODEL,TRAINING_CONFIG,TRAINING_RESULTS,MODEL_METADATA input
	class PUSH_SCRIPT,REPO_CREATION,FILE_UPLOAD,METADATA_UPLOAD publishing
	class CARD_SCRIPT,TEMPLATE_LOAD,VARIABLE_REPLACEMENT,CONDITIONAL_PROCESSING generation
	class DEPLOY_SCRIPT,SPACE_CREATION,TEMPLATE_COPY,ENV_INJECTION,SECRET_SETUP deployment
	class BUILD_TRIGGER,DEPENDENCY_INSTALL,MODEL_DOWNLOAD,APP_INITIALIZATION building
	class GRADIO_INTERFACE,MODEL_INFERENCE,USER_INTERACTION demo
	class HF_HUB,HF_SPACES external
	</div>
	</div>

	<div id="dataflow" class="diagram-container">
	<div class="diagram-title">Data Flow</div>
	<div class="diagram-description">
	Complete data journey through the Voxtral ASR Fine-tuning application from user input to deployed demo.
	</div>
	<div class="mermaid">
	flowchart TD
	subgraph "User Input"
	MIC["Microphone Recording<br/>Raw audio + timestamps"]
	FILE["File Upload<br/>WAV/FLAC files"]
	TEXT["Manual Transcripts<br/>Text input"]
	LANG["Language Selection<br/>25+ languages"]
	end

	subgraph "Data Processing"
	AUDIO_PROC["Audio Processing<br/>Resampling to 16kHz<br/>Format conversion"]
	TEXT_PROC["Text Processing<br/>Transcript validation<br/>Cleaning & formatting"]
	JSONL_CONV["JSONL Conversion<br/>{'audio_path': '...', 'text': '...'}"]
	end

	subgraph "Dataset Storage"
	LOCAL_DS["Local Dataset<br/>datasets/voxtral_user/<br/>data.jsonl + wavs/"]
	HF_DS["HF Hub Dataset<br/>username/dataset-name<br/>Public sharing"]
	end

	subgraph "Training Data Pipeline"
	DS_LOADER["Dataset Loader<br/>_load_jsonl_dataset()<br/>or load_dataset()"]
	AUDIO_CAST["Audio Casting<br/>Audio(sampling_rate=16000)"]
	TRAIN_SPLIT["Train Split<br/>train_dataset"]
	EVAL_SPLIT["Eval Split<br/>eval_dataset"]
	end

	subgraph "Model Training"
	COLLATOR["VoxtralDataCollator<br/>Audio + Text batching<br/>Prompt construction"]
	FORWARD["Forward Pass<br/>Audio → Features → Text"]
	LOSS["Loss Calculation<br/>Masked LM loss"]
	BACKWARD["Backward Pass<br/>Gradient computation"]
	OPTIMIZE["Parameter Updates<br/>LoRA or full fine-tuning"]
	end

	subgraph "Training Outputs"
	MODEL_FILES["Model Files<br/>model.safetensors<br/>config.json<br/>tokenizer.json"]
	TRAINING_LOGS["Training Logs<br/>train_results.json<br/>training_config.json<br/>loss curves"]
	CHECKPOINTS["Checkpoints<br/>Intermediate models<br/>best model tracking"]
	end

	subgraph "Publishing Pipeline"
	HF_REPO["HF Repository<br/>username/model-name<br/>Model hosting"]
	MODEL_CARD["Model Card<br/>README.md<br/>Training details<br/>Usage examples"]
	METADATA["Training Metadata<br/>Config + results<br/>Performance metrics"]
	end

	subgraph "Demo Deployment"
	SPACE_REPO["HF Space Repository<br/>username/model-name-demo<br/>Demo hosting"]
	DEMO_APP["Demo Application<br/>Gradio interface<br/>Real-time inference"]
	ENV_VARS["Environment Config<br/>HF_MODEL_ID<br/>MODEL_NAME<br/>secrets"]
	end

	MIC --> AUDIO_PROC
	FILE --> AUDIO_PROC
	TEXT --> TEXT_PROC
	LANG --> TEXT_PROC

	AUDIO_PROC --> JSONL_CONV
	TEXT_PROC --> JSONL_CONV

	JSONL_CONV --> LOCAL_DS
	LOCAL_DS --> HF_DS

	LOCAL_DS --> DS_LOADER
	HF_DS --> DS_LOADER

	DS_LOADER --> AUDIO_CAST
	AUDIO_CAST --> TRAIN_SPLIT
	AUDIO_CAST --> EVAL_SPLIT

	TRAIN_SPLIT --> COLLATOR
	EVAL_SPLIT --> COLLATOR

	COLLATOR --> FORWARD
	FORWARD --> LOSS
	LOSS --> BACKWARD
	BACKWARD --> OPTIMIZE

	OPTIMIZE --> MODEL_FILES
	OPTIMIZE --> TRAINING_LOGS
	OPTIMIZE --> CHECKPOINTS

	MODEL_FILES --> HF_REPO
	TRAINING_LOGS --> HF_REPO
	CHECKPOINTS --> HF_REPO

	HF_REPO --> MODEL_CARD
	TRAINING_LOGS --> MODEL_CARD

	MODEL_CARD --> SPACE_REPO
	HF_REPO --> SPACE_REPO
	ENV_VARS --> SPACE_REPO

	SPACE_REPO --> DEMO_APP

	classDef input fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
	classDef processing fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
	classDef storage fill:#fff3e0,stroke:#f57c00,stroke-width:2px
	classDef training fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
	classDef output fill:#fce4ec,stroke:#c2185b,stroke-width:2px
	classDef publishing fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
	classDef deployment fill:#f5f5f5,stroke:#424242,stroke-width:2px

	class MIC,FILE,TEXT,LANG input
	class AUDIO_PROC,TEXT_PROC,JSONL_CONV processing
	class LOCAL_DS,HF_DS storage
	class DS_LOADER,AUDIO_CAST,TRAIN_SPLIT,EVAL_SPLIT,COLLATOR,FORWARD,LOSS,BACKWARD,OPTIMIZE training
	class MODEL_FILES,TRAINING_LOGS,CHECKPOINTS output
	class HF_REPO,MODEL_CARD,METADATA publishing
	class SPACE_REPO,DEMO_APP,ENV_VARS deployment
	</div>
	</div>

	<script>
	// Toggle mermaid code visibility
	function toggleCode(diagramId) {
	const codeBlock = document.querySelector(`#${diagramId} .mermaid-code`);
	if (codeBlock.style.display === 'none' \|\| codeBlock.style.display === '') {
	codeBlock.style.display = 'block';
	} else {
	codeBlock.style.display = 'none';
	}
	}

	// Add toggle buttons to each diagram
	document.addEventListener('DOMContentLoaded', function() {
	const diagrams = document.querySelectorAll('.diagram-container');
	diagrams.forEach((diagram, index) => {
	const diagramId = diagram.id;
	const mermaidDiv = diagram.querySelector('.mermaid');

	if (mermaidDiv) {
	// Create toggle button
	const toggleBtn = document.createElement('button');
	toggleBtn.className = 'code-toggle';
	toggleBtn.textContent = '🔍 Show Mermaid Code';
	toggleBtn.onclick = () => toggleCode(diagramId);

	// Create code block
	const codeBlock = document.createElement('pre');
	codeBlock.className = 'mermaid-code';
	codeBlock.textContent = mermaidDiv.textContent.trim();

	// Insert elements
	mermaidDiv.parentNode.insertBefore(toggleBtn, mermaidDiv);
	mermaidDiv.parentNode.insertBefore(codeBlock, mermaidDiv.nextSibling);
	}
	});
	});

	// Print functionality
	function printDiagrams() {
	window.print();
	}
	</script>
	</body>
	</html>