Spaces:
Running
Running
import { FileType } from "@/types"; | |
export class FileTypeDetector { | |
private static readonly FILE_SIGNATURES: Record<FileType, RegExp[]> = { | |
// HTML | |
html: [ | |
/<!DOCTYPE\s+html/i, | |
/<html[^>]*>/i, | |
/<head[^>]*>/i, | |
/<body[^>]*>/i, | |
/<meta[^>]*>/i, | |
/<title[^>]*>/i | |
], | |
// CSS | |
css: [ | |
/@import\s+/i, | |
/@media\s+/i, | |
/@keyframes\s+/i, | |
/\.[a-zA-Z-_]+\s*\{[^}]*\}/i, | |
/#[a-zA-Z-_]+\s*\{[^}]*\}/i, | |
/[a-zA-Z-_]+\s*\{[^}]*color\s*:/i | |
], | |
// JavaScript | |
js: [ | |
/function\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*\(/i, | |
/const\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=/i, | |
/let\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=/i, | |
/var\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=/i, | |
/=>\s*\{/i, | |
/console\.(log|error|warn|info)/i, | |
/document\.(getElementById|querySelector)/i, | |
/window\./i | |
], | |
// TypeScript | |
ts: [ | |
/interface\s+[A-Z][a-zA-Z0-9_]*\s*\{/i, | |
/type\s+[A-Z][a-zA-Z0-9_]*\s*=/i, | |
/enum\s+[A-Z][a-zA-Z0-9_]*\s*\{/i, | |
/:\s*(string|number|boolean|object|any)\s*[;,}]/i, | |
/import.*from\s+['"][^'"]+['"];?/i, | |
/export\s+(interface|type|enum|class)/i | |
], | |
// React JSX | |
jsx: [ | |
/<[A-Z][a-zA-Z0-9]*[^>]*>/i, | |
/React\.(Component|useState|useEffect|Fragment)/i, | |
/import\s+React/i, | |
/export\s+default\s+function\s+[A-Z]/i, | |
/className\s*=/i, | |
/onClick\s*=/i, | |
/useState\s*\(/i, | |
/useEffect\s*\(/i | |
], | |
// TypeScript JSX | |
tsx: [ | |
/<[A-Z][a-zA-Z0-9]*[^>]*>/i, | |
/React\.(Component|FC|FunctionComponent)/i, | |
/interface\s+.*Props/i, | |
/:\s*React\.(FC|FunctionComponent)/i, | |
/useState<.*>/i, | |
/useEffect\s*\(/i | |
], | |
// Vue | |
vue: [ | |
/<template[^>]*>/i, | |
/<script[^>]*>/i, | |
/<style[^>]*>/i, | |
/export\s+default\s*\{/i, | |
/v-if\s*=/i, | |
/v-for\s*=/i, | |
/@click\s*=/i | |
], | |
// JSON | |
json: [ | |
/^\s*\{[\s\S]*\}\s*$/, | |
/^\s*\[[\s\S]*\]\s*$/, | |
/"[^"]*"\s*:\s*"[^"]*"/, | |
/"[^"]*"\s*:\s*\d+/, | |
/"[^"]*"\s*:\s*(true|false|null)/ | |
], | |
// Markdown | |
md: [ | |
/^#{1,6}\s+.+$/m, | |
/\*\*[^*]+\*\*/, | |
/\*[^*]+\*/, | |
/\[[^\]]+\]\([^)]+\)/, | |
/```[a-zA-Z]*\n[\s\S]*?\n```/, | |
/^\s*[-*+]\s+/m, | |
/^\s*\d+\.\s+/m | |
], | |
// Python | |
py: [ | |
/def\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(/i, | |
/class\s+[A-Z][a-zA-Z0-9_]*\s*[\(:]?/i, | |
/import\s+[a-zA-Z_][a-zA-Z0-9_.]*$/m, | |
/from\s+[a-zA-Z_][a-zA-Z0-9_.]*\s+import/i, | |
/if\s+__name__\s*==\s*['"']__main__['"']/i, | |
/print\s*\(/i | |
], | |
// PHP | |
php: [ | |
/<\?php/i, | |
/\$[a-zA-Z_][a-zA-Z0-9_]*/, | |
/function\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(/i, | |
/class\s+[A-Z][a-zA-Z0-9_]*\s*\{/i, | |
/echo\s+/i, | |
/\?>/ | |
], | |
// XML | |
xml: [ | |
/<\?xml\s+version/i, | |
/<[a-zA-Z][a-zA-Z0-9:_-]*[^>]*\/>/, | |
/<[a-zA-Z][a-zA-Z0-9:_-]*[^>]*>[\s\S]*<\/[a-zA-Z][a-zA-Z0-9:_-]*>/ | |
], | |
// SVG | |
svg: [ | |
/<svg[^>]*>/i, | |
/<path[^>]*d\s*=/i, | |
/<circle[^>]*>/i, | |
/<rect[^>]*>/i, | |
/<g[^>]*>/i, | |
/xmlns\s*=\s*['"']http:\/\/www\.w3\.org\/2000\/svg['"']/i | |
], | |
// YAML | |
yaml: [ | |
/^[a-zA-Z_][a-zA-Z0-9_]*:\s*.+$/m, | |
/^-\s+[a-zA-Z_]/m, | |
/^\s*#.*$/m, | |
/---\s*$/m, | |
/\.\.\.\s*$/m | |
], | |
// YML (same as YAML) | |
yml: [ | |
/^[a-zA-Z_][a-zA-Z0-9_]*:\s*.+$/m, | |
/^-\s+[a-zA-Z_]/m, | |
/^\s*#.*$/m, | |
/---\s*$/m, | |
/\.\.\.\s*$/m | |
], | |
// Text files (fallback patterns) | |
txt: [ | |
/^[\s\S]*$/ // Matches any content | |
], | |
// Image files (binary detection would be needed for actual content) | |
png: [], | |
jpg: [], | |
jpeg: [], | |
gif: [], | |
ico: [], | |
webp: [], | |
pdf: [ | |
/^%PDF-/ | |
] | |
}; | |
private static readonly EXTENSION_MAP: Record<string, FileType> = { | |
// Web files | |
'html': 'html', | |
'htm': 'html', | |
'css': 'css', | |
'js': 'js', | |
'mjs': 'js', | |
'ts': 'ts', | |
'jsx': 'jsx', | |
'tsx': 'tsx', | |
'vue': 'vue', | |
// Data files | |
'json': 'json', | |
'md': 'md', | |
'markdown': 'md', | |
'txt': 'txt', | |
'text': 'txt', | |
// Programming languages | |
'py': 'py', | |
'python': 'py', | |
'php': 'php', | |
'xml': 'xml', | |
'svg': 'svg', | |
'yaml': 'yaml', | |
'yml': 'yml', | |
// Images | |
'png': 'png', | |
'jpg': 'jpg', | |
'jpeg': 'jpeg', | |
'gif': 'gif', | |
'ico': 'ico', | |
'webp': 'webp', | |
// Documents | |
'pdf': 'pdf' | |
}; | |
private static readonly FILE_ICONS: Record<FileType, string> = { | |
html: '๐', | |
css: '๐จ', | |
js: 'โก', | |
ts: '๐ท', | |
jsx: 'โ๏ธ', | |
tsx: 'โ๏ธ', | |
vue: '๐', | |
json: '๐', | |
md: '๐', | |
txt: '๐', | |
py: '๐', | |
php: '๐', | |
xml: '๐ฐ', | |
svg: '๐ผ๏ธ', | |
yaml: 'โ๏ธ', | |
yml: 'โ๏ธ', | |
png: '๐ผ๏ธ', | |
jpg: '๐ผ๏ธ', | |
jpeg: '๐ผ๏ธ', | |
gif: '๐ผ๏ธ', | |
ico: '๐ผ๏ธ', | |
webp: '๐ผ๏ธ', | |
pdf: '๐' | |
}; | |
/** | |
* Detect file type based on filename and content | |
*/ | |
static detectFileType(content: string, fileName?: string): FileType { | |
// First: Check file extension | |
if (fileName) { | |
const extension = this.getFileExtension(fileName); | |
if (extension && this.isValidFileType(extension)) { | |
return extension as FileType; | |
} | |
} | |
// Second: Analyze content patterns | |
const detectedType = this.analyzeContent(content); | |
if (detectedType !== 'txt') { | |
return detectedType; | |
} | |
// Third: Try to detect based on filename without extension | |
if (fileName) { | |
const nameBasedType = this.detectByFileName(fileName); | |
if (nameBasedType) { | |
return nameBasedType; | |
} | |
} | |
return 'txt'; // Default fallback | |
} | |
/** | |
* Get file extension from filename | |
*/ | |
static getFileExtension(fileName: string): string | null { | |
const parts = fileName.toLowerCase().split('.'); | |
return parts.length > 1 ? parts[parts.length - 1] : null; | |
} | |
/** | |
* Check if file type is valid | |
*/ | |
static isValidFileType(type: string): boolean { | |
return Object.keys(this.EXTENSION_MAP).includes(type.toLowerCase()) || | |
Object.keys(this.FILE_SIGNATURES).includes(type as FileType); | |
} | |
/** | |
* Get file icon for display | |
*/ | |
static getFileIcon(type: FileType): string { | |
return this.FILE_ICONS[type] || '๐'; | |
} | |
/** | |
* Analyze content to detect file type | |
*/ | |
private static analyzeContent(content: string): FileType { | |
// Skip empty content | |
if (!content.trim()) { | |
return 'txt'; | |
} | |
// Score each file type based on pattern matches | |
const scores: Record<FileType, number> = {} as Record<FileType, number>; | |
for (const [type, patterns] of Object.entries(this.FILE_SIGNATURES)) { | |
if (patterns.length === 0) continue; // Skip binary types | |
let score = 0; | |
for (const pattern of patterns) { | |
if (pattern.test(content)) { | |
score += 1; | |
} | |
} | |
if (score > 0) { | |
scores[type as FileType] = score; | |
} | |
} | |
// Return the type with highest score | |
if (Object.keys(scores).length > 0) { | |
return Object.entries(scores).reduce((a, b) => | |
scores[a[0] as FileType] > scores[b[0] as FileType] ? a : b | |
)[0] as FileType; | |
} | |
return 'txt'; | |
} | |
/** | |
* Detect file type based on filename patterns | |
*/ | |
private static detectByFileName(fileName: string): FileType | null { | |
const lowerName = fileName.toLowerCase(); | |
// Common filename patterns | |
const patterns: Record<string, FileType> = { | |
'package.json': 'json', | |
'tsconfig.json': 'json', | |
'webpack.config.js': 'js', | |
'babel.config.js': 'js', | |
'readme.md': 'md', | |
'dockerfile': 'txt', | |
'.gitignore': 'txt', | |
'.env': 'txt', | |
'makefile': 'txt' | |
}; | |
for (const [pattern, type] of Object.entries(patterns)) { | |
if (lowerName.includes(pattern)) { | |
return type; | |
} | |
} | |
return null; | |
} | |
/** | |
* Get file type from extension | |
*/ | |
static getTypeFromExtension(extension: string): FileType | null { | |
return this.EXTENSION_MAP[extension.toLowerCase()] || null; | |
} | |
/** | |
* Get all supported file types | |
*/ | |
static getSupportedTypes(): FileType[] { | |
return Object.keys(this.FILE_SIGNATURES) as FileType[]; | |
} | |
/** | |
* Get file types by category | |
*/ | |
static getTypesByCategory(): Record<string, FileType[]> { | |
return { | |
'Web Development': ['html', 'css', 'js', 'ts', 'jsx', 'tsx', 'vue'], | |
'Data & Config': ['json', 'yaml', 'yml', 'xml'], | |
'Documentation': ['md', 'txt'], | |
'Programming': ['py', 'php'], | |
'Images': ['png', 'jpg', 'jpeg', 'gif', 'ico', 'webp', 'svg'], | |
'Documents': ['pdf'] | |
}; | |
} | |
/** | |
* Check if file type supports syntax highlighting | |
*/ | |
static supportsSyntaxHighlighting(type: FileType): boolean { | |
const syntaxTypes: FileType[] = [ | |
'html', 'css', 'js', 'ts', 'jsx', 'tsx', 'vue', | |
'json', 'md', 'py', 'php', 'xml', 'svg', 'yaml', 'yml' | |
]; | |
return syntaxTypes.includes(type); | |
} | |
/** | |
* Get Monaco Editor language for file type | |
*/ | |
static getMonacoLanguage(type: FileType): string { | |
const languageMap: Record<FileType, string> = { | |
html: 'html', | |
css: 'css', | |
js: 'javascript', | |
ts: 'typescript', | |
jsx: 'javascript', | |
tsx: 'typescript', | |
vue: 'html', // Vue files are treated as HTML in Monaco | |
json: 'json', | |
md: 'markdown', | |
txt: 'plaintext', | |
py: 'python', | |
php: 'php', | |
xml: 'xml', | |
svg: 'xml', | |
yaml: 'yaml', | |
yml: 'yaml', | |
png: 'plaintext', | |
jpg: 'plaintext', | |
jpeg: 'plaintext', | |
gif: 'plaintext', | |
ico: 'plaintext', | |
webp: 'plaintext', | |
pdf: 'plaintext' | |
}; | |
return languageMap[type] || 'plaintext'; | |
} | |
} | |