import { useEffect, useState } from 'react'; import { Database, ExternalLink, Shuffle, Lock } from 'lucide-react'; import DatasetValidator from '../components/validators/DatasetValidator'; import { DATASETS } from '../constants/datasets'; import type { JobConfig } from '../types'; type Props = { onNext: () => void; }; type ExtrasDraft = { datasetLimit?: number; // 由 Model 頁設定,這裡不動它 }; export default function DatasetConfigPage({ onNext }: Props) { const [cfg, setCfg] = useState({ dataset: '', languageModel: '', scorerModel: '', k: 5, numCounterfactuals: 3, metrictarget: 0.5, tau: 0.1, iterations: 1000, seed: 42, enableFineTuning: false, counterfactual: false, }); const [customDataset, setCustomDataset] = useState(''); const [showCustomDatasetInput, setShowCustomDatasetInput] = useState(false); const [fieldStats, setFieldStats] = useState>>({}); const [numCounterfactuals, setNumCounterfactuals] = useState(3); const [selectedCfFields, setSelectedCfFields] = useState([]); const [isLoadingFields, setIsLoadingFields] = useState(false); const [fieldsError, setFieldsError] = useState(null); const [metaConfigs, setMetaConfigs] = useState([]); const [metaSplits, setMetaSplits] = useState([]); const [selectedConfig, setSelectedConfig] = useState(null); const [selectedSplit, setSelectedSplit] = useState('train'); const setField = (k: K, v: JobConfig[K]) => setCfg((prev) => ({ ...prev, [k]: v })); const card = 'group relative rounded-2xl p-8 border border-white/30 bg-white/60 backdrop-blur-xl ' + 'shadow-[0_15px_40px_-20px_rgba(30,41,59,0.35)] transition-all duration-300 ' + 'hover:shadow-[0_20px_50px_-20px_rgba(79,70,229,0.45)] hover:-translate-y-0.5'; const sectionTitle = 'text-xl font-bold tracking-tight text-slate-900'; const subtext = 'text-sm text-slate-600'; const fieldInput = 'w-full rounded-xl border-2 border-slate-200/70 bg-white/70 px-4 py-3 ' + 'focus:outline-none focus:border-indigo-500 focus:ring-4 focus:ring-indigo-500/20 transition-all'; const selectInput = 'w-full rounded-xl border-2 border-slate-200/70 bg-white/70 px-3 py-2.5 ' + 'focus:outline-none focus:border-indigo-500 focus:ring-4 focus:ring-indigo-500/20 transition-all'; const choiceRow = 'flex items-start gap-4 cursor-pointer p-4 rounded-xl border transition-colors ' + 'bg-white/60 hover:bg-white/80 border-slate-200/60 hover:border-indigo-300'; const API_BASE = '/api'; // 將 example 映射到實際要讀的 Hugging Face 資料集 function resolveDatasetId(id: string | null | undefined) { if (!id) return id; return id === 'example' ? 'AmazonScience/bold' : id; } function buildFieldsURL(datasetId: string, config: string | null, split: string): string { const realId = resolveDatasetId(datasetId)!; const params = new URLSearchParams(); params.set('id', realId); if (config && config.trim() !== '') params.set('config', config); if (split && split.trim() !== '') params.set('split', split); return `/dataset/fields?${params.toString()}`; } async function fetchJSON(url: string, signal?: AbortSignal): Promise { const fullURL = url.startsWith('http') ? url : `${API_BASE}${url}`; const res = await fetch(fullURL, { signal }); if (!res.ok) throw new Error(`${res.status} ${res.statusText}`); return (await res.json()) as T; } // 若 localStorage 之前有草稿,載入 (用於返回 Dataset 頁時保留狀態) useEffect(() => { try { const draft = localStorage.getItem('cfgDraft'); if (draft) { const parsed = JSON.parse(draft); setCfg((prev) => ({ ...prev, ...parsed })); if (parsed.numCounterfactuals) setNumCounterfactuals(parsed.numCounterfactuals); if (parsed.selectedCfFields) setSelectedCfFields(parsed.selectedCfFields); } } catch {} }, []); // 當 dataset 改變時重新抓取 meta 與 fields useEffect(() => { setSelectedCfFields([]); setFieldsError(null); if (!cfg.dataset || cfg.dataset === 'custom') return; const ac = new AbortController(); const realId = resolveDatasetId(cfg.dataset)!; const run = async () => { try { const metaURL = `/dataset/meta?id=${encodeURIComponent(realId)}`; // 用映射後的 id const meta = await fetchJSON<{ datasetId: string; configs: string[]; splits: string[]; }>(metaURL, ac.signal); setMetaConfigs(meta.configs || []); setMetaSplits(meta.splits || []); const defaultConfig = meta.configs?.length ? meta.configs[0] : null; const defaultSplit = meta.splits?.length ? meta.splits.includes('train') ? 'train' : meta.splits[0] : 'train'; setSelectedConfig(defaultConfig); setSelectedSplit(defaultSplit); setIsLoadingFields(true); const fieldsURL = buildFieldsURL(cfg.dataset, defaultConfig, defaultSplit); // 內部會映射 await fetchJSON<{ fields: string[] }>(fieldsURL, ac.signal); setFieldsError(null); } catch (err: any) { setMetaConfigs([]); setMetaSplits([]); setSelectedConfig(null); setSelectedSplit('train'); const fieldsURL = buildFieldsURL(cfg.dataset, null, 'train'); setFieldsError(`(${fieldsURL}) → ${err?.message || '欄位讀取失敗'}`); } finally { setIsLoadingFields(false); } }; run(); return () => ac.abort(); }, [cfg.dataset]); // 當 config/split 改變時,抓 fields 與統計 useEffect(() => { if (!cfg.dataset || cfg.dataset === 'custom') return; const ac = new AbortController(); const realId = resolveDatasetId(cfg.dataset)!; const run = async () => { try { setIsLoadingFields(true); // 重新抓 fields const fieldsURL = buildFieldsURL(cfg.dataset, selectedConfig, selectedSplit); await fetchJSON<{ fields: string[] }>(fieldsURL, ac.signal); // 使用 domain/category 統計 const statsURL = `/dataset/field-stats?id=${encodeURIComponent(realId)}&field=domain&subfield=category`; const statsData = await fetchJSON<{ counts: Record> }>(statsURL, ac.signal); setFieldStats(statsData.counts || {}); setFieldsError(null); // 如果是 example,自動勾選兩個類別 if (cfg.dataset === 'example') { const keys: string[] = []; const cats = statsData?.counts?.domain || {}; if ('American_actors' in cats) keys.push('domain/American_actors'); if ('American_actresses' in cats) keys.push('domain/American_actresses'); setSelectedCfFields(keys); } else { setSelectedCfFields([]); } } catch (err: any) { const fieldsURL = buildFieldsURL(cfg.dataset, selectedConfig, selectedSplit); setFieldStats({}); setFieldsError(`(${fieldsURL}) → ${err?.message || 'Field Read Failed'}`); } finally { setIsLoadingFields(false); } }; run(); return () => ac.abort(); }, [cfg.dataset, selectedConfig, selectedSplit]); // 👉 Example: 自動設定 Counterfactual 數量 useEffect(() => { if (cfg.dataset === 'example') { setNumCounterfactuals(20); } }, [cfg.dataset]); // (保留)當 fieldStats 更新時,如果是 example,自動填入兩個 key useEffect(() => { if (cfg.dataset !== 'example') return; const targets = new Set(['American_actors', 'American_actresses']); const keys: string[] = []; Object.entries(fieldStats).forEach(([domain, categories]) => { Object.keys(categories).forEach((cat) => { if (targets.has(cat)) keys.push(`${domain}/${cat}`); }); }); if (keys.length > 0) setSelectedCfFields(keys); }, [cfg.dataset, fieldStats]); const canNext = !!cfg.dataset; const isExample = cfg.dataset === 'example'; return (
{/* Dataset selection */}

Dataset Selection

{DATASETS.map((dataset) => ( ))} {/* Example dataset (preconfigured) */} {/* Custom dataset */} {showCustomDatasetInput && (
{ setCustomDataset(e.target.value); setField('dataset', e.target.value); }} className={fieldInput} /> {customDataset && customDataset.includes('/') && ( )}
)} {cfg.dataset === 'AmazonScience/bold' && !showCustomDatasetInput && ( )}
{/* Counterfactual */}

Counterfactual Setting

{ if (isExample) return; // Example 時忽略修改 const v = parseInt(e.target.value || '3', 10); setNumCounterfactuals(Number.isFinite(v) ? Math.max(1, Math.min(20, v)) : 3); }} disabled={isExample} className={fieldInput + (isExample ? ' cursor-not-allowed opacity-80' : '')} /> {isExample && (
Locked to 20 for the Example preset.
)}
{(metaConfigs.length > 0 || metaSplits.length > 0) && (
{metaConfigs.length > 0 && (
)} {metaSplits.length > 0 && (
)}
)}
Selected Dataset {cfg.dataset || 'Not Selected Yet'} {selectedConfig && / {selectedConfig}} {selectedSplit && / {selectedSplit}}
{/* Optional fields (domain/category) */}
Optional fields
{isExample && ( Locked by Example )} {isLoadingFields && Loading}
{!!fieldsError && (
{fieldsError}
)}
{Object.entries(fieldStats).map(([domain, categories]) => (
{domain}
{Object.entries(categories).map(([category, count]) => { const fieldKey = `${domain}/${category}`; const checked = selectedCfFields.includes(fieldKey); const locked = isExample && (category === 'American_actors' || category === 'American_actresses'); return ( ); })}
))}
{/* Next */}
); }