import { AutoProcessor, AutoModelForImageTextToText, load_image, TextStreamer, } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2'; // Global variables let processor = null; let model = null; let isModelLoaded = false; let currentDevice = 'cpu'; // DOM elements const elements = { imageUrl: document.getElementById('image-url'), loadUrlBtn: document.getElementById('load-url-btn'), fileInput: document.getElementById('file-input'), uploadArea: document.getElementById('upload-area'), uploadBtn: document.getElementById('upload-btn'), previewSection: document.getElementById('preview-section'), previewImage: document.getElementById('preview-image'), customPrompt: document.getElementById('custom-prompt'), loadingSection: document.getElementById('loading-section'), loadingText: document.getElementById('loading-text'), progressFill: document.getElementById('progress-fill'), outputSection: document.getElementById('output-section'), outputContent: document.getElementById('output-content'), copyBtn: document.getElementById('copy-btn'), errorSection: document.getElementById('error-section'), errorMessage: document.getElementById('error-message'), tabBtns: document.querySelectorAll('.tab-btn'), tabContents: document.querySelectorAll('.tab-content'), deviceRadios: document.querySelectorAll('input[name="device"]') }; // Initialize model async function initializeModel() { if (isModelLoaded && currentDevice === getSelectedDevice()) { return; } try { showLoading('Loading model...'); currentDevice = getSelectedDevice(); const model_id = "onnx-community/FastVLM-0.5B-ONNX"; const modelOptions = { dtype: { embed_tokens: "fp16", vision_encoder: "q4", decoder_model_merged: "q4", } }; if (currentDevice === 'webgpu') { modelOptions.device = 'webgpu'; } updateLoadingText('Loading processor...'); processor = await AutoProcessor.from_pretrained(model_id); updateLoadingText('Loading model...'); model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions); isModelLoaded = true; hideLoading(); } catch (error) { console.error('Model initialization error:', error); showError('Failed to load model. Please try again.'); hideLoading(); throw error; } } // Generate caption for image async function generateCaption(imageUrl) { try { hideError(); showLoading('Processing image...'); if (!isModelLoaded) { await initializeModel(); } // Prepare prompt const customPromptText = elements.customPrompt.value.trim(); const promptContent = customPromptText || "Describe this image in detail."; const messages = [ { role: "user", content: `${promptContent}`, }, ]; const prompt = processor.apply_chat_template(messages, { add_generation_prompt: true, }); updateLoadingText('Loading image...'); const image = await load_image(imageUrl); updateLoadingText('Processing inputs...'); const inputs = await processor(image, prompt, { add_special_tokens: false, }); updateLoadingText('Generating caption...'); elements.outputContent.textContent = ''; showOutput(); const outputs = await model.generate({ ...inputs, max_new_tokens: 512, do_sample: false, streamer: new TextStreamer(processor.tokenizer, { skip_prompt: true, skip_special_tokens: false, callback_function: (text) => { elements.outputContent.textContent += text; }, }), }); const decoded = processor.batch_decode( outputs.slice(null, [inputs.input_ids.dims.at(-1), null]), { skip_special_tokens: true }, ); elements.outputContent.textContent = decoded[0]; hideLoading(); } catch (error) { console.error('Caption generation error:', error); showError('Failed to generate caption. Please check your image URL and try again.'); hideLoading(); } } // Helper functions function getSelectedDevice() { const selected = document.querySelector('input[name="device"]:checked'); return selected ? selected.value : 'cpu'; } function showLoading(text) { elements.loadingSection.style.display = 'block'; elements.loadingText.textContent = text; elements.progressFill.style.width = '50%'; } function updateLoadingText(text) { elements.loadingText.textContent = text; const progress = { 'Loading processor...': '30%', 'Loading model...': '60%', 'Loading image...': '70%', 'Processing inputs...': '80%', 'Generating caption...': '90%' }; elements.progressFill.style.width = progress[text] || '50%'; } function hideLoading() { elements.loadingSection.style.display = 'none'; elements.progressFill.style.width = '0%'; } function showOutput() { elements.outputSection.style.display = 'block'; } function hideOutput() { elements.outputSection.style.display = 'none'; } function showError(message) { elements.errorSection.style.display = 'block'; elements.errorMessage.textContent = message; } function hideError() { elements.errorSection.style.display = 'none'; } function showPreview(url) { elements.previewImage.src = url; elements.previewSection.style.display = 'block'; } // Event listeners elements.loadUrlBtn.addEventListener('click', async () => { const url = elements.imageUrl.value.trim(); if (!url) { showError('Please enter a valid image URL'); return; } showPreview(url); await generateCaption(url); }); elements.uploadArea.addEventListener('click', () => { elements.fileInput.click(); }); elements.uploadArea.addEventListener('dragover', (e) => { e.preventDefault(); elements.uploadArea.classList.add('dragover'); }); elements.uploadArea.addEventListener('dragleave', () => { elements.uploadArea.classList.remove('dragover'); }); elements.uploadArea.addEventListener('drop', (e) => { e.preventDefault(); elements.uploadArea.classList.remove('dragover'); handleFiles(e.dataTransfer.files); }); elements.fileInput.addEventListener('change', (e) => { handleFiles(e.target.files); }); async function handleFiles(files) { if (files.length === 0) return; const file = files[0]; if (!file.type.startsWith('image/')) { showError('Please select a valid image file'); return; } const url = URL.createObjectURL(file); showPreview(url); elements.uploadBtn.disabled = false; elements.uploadBtn.dataset.imageUrl = url; } elements.uploadBtn.addEventListener('click', async () => { const url = elements.uploadBtn.dataset.imageUrl; if (url) { await generateCaption(url); } }); elements.copyBtn.addEventListener('click', () => { const text = elements.outputContent.textContent; navigator.clipboard.writeText(text).then(() => { elements.copyBtn.textContent = 'Copied!'; setTimeout(() => { elements.copyBtn.innerHTML = ` Copy Caption `; }, 2000); }); }); // Tab switching elements.tabBtns.forEach(btn => { btn.addEventListener('click', () => { const targetTab = btn.dataset.tab; elements.tabBtns.forEach(b => b.classList.remove('active')); elements.tabContents.forEach(c => c.classList.remove('active')); btn.classList.add('active'); document.getElementById(`${targetTab}-tab`).classList.add('active'); }); }); // Device selection elements.deviceRadios.forEach(radio => { radio.addEventListener('change', () => { if (isModelLoaded && currentDevice !== getSelectedDevice()) { isModelLoaded = false; } }); });