|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>LLMScraper | AI Data Collection Platform</title> |
|
<script src="https://cdn.tailwindcss.com"></script> |
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> |
|
<script> |
|
tailwind.config = { |
|
theme: { |
|
extend: { |
|
colors: { |
|
primary: '#4F46E5', |
|
secondary: '#10B981', |
|
dark: '#1F2937', |
|
light: '#F9FAFB' |
|
} |
|
} |
|
} |
|
} |
|
</script> |
|
<style> |
|
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); |
|
|
|
body { |
|
font-family: 'Inter', sans-serif; |
|
background: linear-gradient(135deg, #f0f9ff 0%, #e6f7ff 100%); |
|
min-height: 100vh; |
|
} |
|
|
|
.card-hover { |
|
transition: all 0.3s ease; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05); |
|
} |
|
|
|
.card-hover:hover { |
|
transform: translateY(-5px); |
|
box-shadow: 0 10px 15px rgba(0, 0, 0, 0.1); |
|
} |
|
|
|
.gradient-bg { |
|
background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%); |
|
} |
|
|
|
.progress-bar { |
|
height: 8px; |
|
border-radius: 4px; |
|
overflow: hidden; |
|
} |
|
|
|
.progress-fill { |
|
height: 100%; |
|
border-radius: 4px; |
|
background: linear-gradient(90deg, #4F46E5 0%, #10B981 100%); |
|
width: 75%; |
|
} |
|
|
|
.data-table { |
|
border-collapse: separate; |
|
border-spacing: 0; |
|
} |
|
|
|
.data-table th { |
|
background-color: #f3f4f6; |
|
} |
|
|
|
.data-table tr:nth-child(even) { |
|
background-color: #f9fafb; |
|
} |
|
|
|
.data-table tr:hover { |
|
background-color: #f0f9ff; |
|
} |
|
|
|
.glow { |
|
box-shadow: 0 0 15px rgba(79, 70, 229, 0.3); |
|
} |
|
|
|
.pulse { |
|
animation: pulse 2s infinite; |
|
} |
|
|
|
@keyframes pulse { |
|
0% { box-shadow: 0 0 0 0 rgba(79, 70, 229, 0.4); } |
|
70% { box-shadow: 0 0 0 10px rgba(79, 70, 229, 0); } |
|
100% { box-shadow: 0 0 0 0 rgba(79, 70, 229, 0); } |
|
} |
|
|
|
.tag { |
|
display: inline-block; |
|
padding: 2px 8px; |
|
border-radius: 12px; |
|
font-size: 0.75rem; |
|
font-weight: 500; |
|
} |
|
|
|
.tag-processing { |
|
background-color: #fef3c7; |
|
color: #92400e; |
|
} |
|
|
|
.tag-cleaned { |
|
background-color: #d1fae5; |
|
color: #065f46; |
|
} |
|
|
|
.tag-labeled { |
|
background-color: #dbeafe; |
|
color: #1e40af; |
|
} |
|
</style> |
|
</head> |
|
<body class="text-gray-800"> |
|
|
|
<header class="gradient-bg text-white"> |
|
<div class="container mx-auto px-4 py-6"> |
|
<div class="flex justify-between items-center"> |
|
<div class="flex items-center space-x-2"> |
|
<i class="fas fa-robot text-3xl"></i> |
|
<h1 class="text-2xl font-bold">LLMScraper</h1> |
|
</div> |
|
<nav class="hidden md:block"> |
|
<ul class="flex space-x-8"> |
|
<li><a href="#" class="hover:text-indigo-200 transition">Dashboard</a></li> |
|
<li><a href="#" class="hover:text-indigo-200 transition">Scrape Jobs</a></li> |
|
<li><a href="#" class="hover:text-indigo-200 transition">Data Library</a></li> |
|
<li><a href="#" class="hover:text-indigo-200 transition">Labeling</a></li> |
|
<li><a href="#" class="hover:text-indigo-200 transition">API</a></li> |
|
</ul> |
|
</nav> |
|
<div class="flex items-center space-x-4"> |
|
<button class="bg-white text-primary px-4 py-2 rounded-lg font-medium hover:bg-indigo-50 transition">Sign In</button> |
|
<button class="hidden md:block bg-secondary text-white px-4 py-2 rounded-lg font-medium hover:bg-emerald-500 transition">Get Started</button> |
|
<button class="md:hidden text-2xl"><i class="fas fa-bars"></i></button> |
|
</div> |
|
</div> |
|
</div> |
|
</header> |
|
|
|
|
|
<section class="gradient-bg text-white py-16"> |
|
<div class="container mx-auto px-4 text-center"> |
|
<h1 class="text-4xl md:text-5xl font-bold mb-6">AI-Powered Web Scraping & Data Labeling</h1> |
|
<p class="text-xl max-w-3xl mx-auto mb-10">Collect, clean, and label web data at scale using advanced language models to fine-tune your AI systems</p> |
|
<div class="flex flex-col sm:flex-row justify-center gap-4"> |
|
<button class="bg-white text-primary px-8 py-3 rounded-lg font-bold text-lg hover:bg-indigo-50 transition">Start Free Trial</button> |
|
<button class="bg-transparent border-2 border-white px-8 py-3 rounded-lg font-bold text-lg hover:bg-white hover:text-primary transition">View Demo</button> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
|
|
<section class="py-16 bg-light"> |
|
<div class="container mx-auto px-4"> |
|
<h2 class="text-3xl font-bold text-center mb-4">Powerful Data Collection Workflow</h2> |
|
<p class="text-gray-600 text-center max-w-2xl mx-auto mb-12">Our AI-powered platform handles the entire data pipeline from collection to labeling</p> |
|
|
|
<div class="grid grid-cols-1 md:grid-cols-3 gap-8"> |
|
|
|
<div class="bg-white rounded-xl p-6 card-hover"> |
|
<div class="w-16 h-16 rounded-full bg-indigo-100 flex items-center justify-center mb-6"> |
|
<i class="fas fa-spider text-2xl text-primary"></i> |
|
</div> |
|
<h3 class="text-xl font-bold mb-3">Intelligent Web Scraping</h3> |
|
<p class="text-gray-600 mb-4">Extract structured data from any website using natural language instructions. No complex selectors needed.</p> |
|
<ul class="space-y-2"> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Handles JavaScript-rendered content</span> |
|
</li> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Automatic pagination & navigation</span> |
|
</li> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Anti-bot detection bypass</span> |
|
</li> |
|
</ul> |
|
</div> |
|
|
|
|
|
<div class="bg-white rounded-xl p-6 card-hover"> |
|
<div class="w-16 h-16 rounded-full bg-emerald-100 flex items-center justify-center mb-6"> |
|
<i class="fas fa-broom text-2xl text-secondary"></i> |
|
</div> |
|
<h3 class="text-xl font-bold mb-3">AI Data Cleaning</h3> |
|
<p class="text-gray-600 mb-4">Automatically clean and normalize scraped data using language understanding to fix inconsistencies.</p> |
|
<ul class="space-y-2"> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Entity recognition & normalization</span> |
|
</li> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Duplicate detection & removal</span> |
|
</li> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Context-aware error correction</span> |
|
</li> |
|
</ul> |
|
</div> |
|
|
|
|
|
<div class="bg-white rounded-xl p-6 card-hover"> |
|
<div class="w-16 h-16 rounded-full bg-blue-100 flex items-center justify-center mb-6"> |
|
<i class="fas fa-tags text-2xl text-blue-500"></i> |
|
</div> |
|
<h3 class="text-xl font-bold mb-3">Automated Labeling</h3> |
|
<p class="text-gray-600 mb-4">Generate high-quality labels for your datasets using large language models with human-in-the-loop validation.</p> |
|
<ul class="space-y-2"> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Zero-shot & few-shot classification</span> |
|
</li> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Semantic similarity clustering</span> |
|
</li> |
|
<li class="flex items-start"> |
|
<i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
|
<span>Active learning for model improvement</span> |
|
</li> |
|
</ul> |
|
</div> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
|
|
<section class="py-16 bg-white"> |
|
<div class="container mx-auto px-4"> |
|
<h2 class="text-3xl font-bold text-center mb-4">See It In Action</h2> |
|
<p class="text-gray-600 text-center max-w-2xl mx-auto mb-12">Our platform makes it simple to collect and prepare training data</p> |
|
|
|
<div class="bg-gray-50 rounded-xl p-6 mb-8"> |
|
<div class="flex flex-wrap gap-4 mb-6"> |
|
<div class="flex-1 min-w-[300px]"> |
|
<label class="block text-sm font-medium mb-2">Enter Website URL</label> |
|
<div class="flex"> |
|
<input type="text" class="flex-1 border border-gray-300 rounded-l-lg px-4 py-2 focus:outline-none focus:ring-2 focus:ring-primary" placeholder="https://example.com/products"> |
|
<button class="bg-primary text-white px-6 py-2 rounded-r-lg font-medium">Scrape</button> |
|
</div> |
|
</div> |
|
<div class="flex-1 min-w-[300px]"> |
|
<label class="block text-sm font-medium mb-2">Data Schema</label> |
|
<select class="w-full border border-gray-300 rounded-lg px-4 py-2 focus:outline-none focus:ring-2 focus:ring-primary"> |
|
<option>E-commerce Product Data</option> |
|
<option>News Articles</option> |
|
<option>Job Listings</option> |
|
<option>Real Estate Listings</option> |
|
<option>Custom Schema</option> |
|
</select> |
|
</div> |
|
</div> |
|
|
|
<div class="bg-white rounded-lg border border-gray-200 overflow-hidden"> |
|
<div class="overflow-x-auto"> |
|
<table class="w-full data-table"> |
|
<thead> |
|
<tr> |
|
<th class="text-left py-3 px-4 font-semibold text-sm border-b">Status</th> |
|
<th class="text-left py-3 px-4 font-semibold text-sm border-b">Page URL</th> |
|
<th class="text-left py-3 px-4 font-semibold text-sm border-b">Items Found</th> |
|
<th class="text-left py-3 px-4 font-semibold text-sm border-b">Progress</th> |
|
</tr> |
|
</thead> |
|
<tbody> |
|
<tr> |
|
<td class="py-3 px-4 border-b"> |
|
<span class="tag tag-processing">Processing</span> |
|
</td> |
|
<td class="py-3 px-4 border-b">https://example.com/products</td> |
|
<td class="py-3 px-4 border-b">24</td> |
|
<td class="py-3 px-4 border-b"> |
|
<div class="progress-bar bg-gray-200 w-full"> |
|
<div class="progress-fill"></div> |
|
</div> |
|
<div class="text-xs text-gray-500 mt-1">75% complete</div> |
|
</td> |
|
</tr> |
|
<tr> |
|
<td class="py-3 px-4 border-b"> |
|
<span class="tag tag-cleaned">Cleaned</span> |
|
</td> |
|
<td class="py-3 px-4 border-b">https://example.com/specials</td> |
|
<td class="py-3 px-4 border-b">18</td> |
|
<td class="py-3 px-4 border-b"> |
|
<div class="progress-bar bg-gray-200 w-full"> |
|
<div class="progress-fill" style="width: 100%"></div> |
|
</div> |
|
<div class="text-xs text-gray-500 mt-1">100% complete</div> |
|
</td> |
|
</tr> |
|
<tr> |
|
<td class="py-3 px-4"> |
|
<span class="tag tag-labeled">Labeled</span> |
|
</td> |
|
<td class="py-3 px-4">https://example.com/new-arrivals</td> |
|
<td class="py-3 px-4">32</td> |
|
<td class="py-3 px-4"> |
|
<div class="progress-bar bg-gray-200 w-full"> |
|
<div class="progress-fill" style="width: 100%"></div> |
|
</div> |
|
<div class="text-xs text-gray-500 mt-1">100% complete</div> |
|
</td> |
|
</tr> |
|
</tbody> |
|
</table> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<div class="grid grid-cols-1 md:grid-cols-2 gap-8"> |
|
<div class="bg-indigo-50 rounded-xl p-6"> |
|
<h3 class="text-xl font-bold mb-4">Raw Scraped Data</h3> |
|
<div class="bg-white rounded-lg p-4 font-mono text-sm overflow-x-auto max-h-60"> |
|
<pre>{ |
|
"products": [ |
|
{ |
|
"title": "Premium Headphones - Wireless", |
|
"price": "$199.99", |
|
"description": "Experience crystal-clear audio with our premium wireless headphones...", |
|
"rating": "4.5 out of 5 stars", |
|
"availability": "In Stock" |
|
}, |
|
... |
|
] |
|
}</pre> |
|
</div> |
|
</div> |
|
|
|
<div class="bg-emerald-50 rounded-xl p-6"> |
|
<h3 class="text-xl font-bold mb-4">Cleaned & Labeled Data</h3> |
|
<div class="bg-white rounded-lg p-4 font-mono text-sm overflow-x-auto max-h-60"> |
|
<pre>{ |
|
"products": [ |
|
{ |
|
"title": "Premium Headphones Wireless", |
|
"price": 199.99, |
|
"currency": "USD", |
|
"description": "Experience crystal-clear audio with premium wireless headphones...", |
|
"rating": 4.5, |
|
"max_rating": 5, |
|
"availability": true, |
|
"category": "Electronics > Audio > Headphones", |
|
"features": ["wireless", "noise-cancelling", "bluetooth"] |
|
}, |
|
... |
|
] |
|
}</pre> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
|
|
<section class="py-16 gradient-bg text-white"> |
|
<div class="container mx-auto px-4 text-center"> |
|
<h2 class="text-3xl font-bold mb-6">Ready to Enhance Your AI Models?</h2> |
|
<p class="text-xl max-w-2xl mx-auto mb-10">Start collecting high-quality training data today with our AI-powered platform</p> |
|
<div class="flex flex-col sm:flex-row justify-center gap-4"> |
|
<button class="bg-white text-primary px-8 py-3 rounded-lg font-bold text-lg hover:bg-indigo-50 transition">Get Started Free</button> |
|
<button class="bg-transparent border-2 border-white px-8 py-3 rounded-lg font-bold text-lg hover:bg-white hover:text-primary transition">Schedule a Demo</button> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
|
|
<footer class="bg-dark text-gray-300 py-12"> |
|
<div class="container mx-auto px-4"> |
|
<div class="grid grid-cols-1 md:grid-cols-4 gap-8"> |
|
<div> |
|
<div class="flex items-center space-x-2 mb-4"> |
|
<i class="fas fa-robot text-2xl text-primary"></i> |
|
<h3 class="text-xl font-bold text-white">LLMScraper</h3> |
|
</div> |
|
<p class="mb-4">AI-powered web scraping and data labeling for machine learning teams.</p> |
|
<div class="flex space-x-4"> |
|
<a href="#" class="text-gray-400 hover:text-white"><i class="fab fa-twitter"></i></a> |
|
<a href="#" class="text-gray-400 hover:text-white"><i class="fab fa-linkedin"></i></a> |
|
<a href="#" class="text-gray-400 hover:text-white"><i class="fab fa-github"></i></a> |
|
</div> |
|
</div> |
|
|
|
<div> |
|
<h4 class="text-lg font-semibold text-white mb-4">Product</h4> |
|
<ul class="space-y-2"> |
|
<li><a href="#" class="hover:text-white transition">Features</a></li> |
|
<li><a href="#" class="hover:text-white transition">Pricing</a></li> |
|
<li><a href="#" class="hover:text-white transition">Integrations</a></li> |
|
<li><a href="#" class="hover:text-white transition">Roadmap</a></li> |
|
</ul> |
|
</div> |
|
|
|
<div> |
|
<h4 class="text-lg font-semibold text-white mb-4">Resources</h4> |
|
<ul class="space-y-2"> |
|
<li><a href="#" class="hover:text-white transition">Documentation</a></li> |
|
<li><a href="#" class="hover:text-white transition">API Reference</a></li> |
|
<li><a href="#" class="hover:text-white transition">Tutorials</a></li> |
|
<li><a href="#" class="hover:text-white transition">Blog</a></li> |
|
</ul> |
|
</div> |
|
|
|
<div> |
|
<h4 class="text-lg font-semibold text-white mb-4">Legal</h4> |
|
<ul class="space-y-2"> |
|
<li><a href="#" class="hover:text-white transition">Privacy Policy</a></li> |
|
<li><a href="#" class="hover:text-white transition">Terms of Service</a></li> |
|
<li><a href="#" class="hover:text-white transition">Compliance</a></li> |
|
<li><a href="#" class="hover:text-white transition">Scraping Ethics</a></li> |
|
</ul> |
|
</div> |
|
</div> |
|
|
|
<div class="border-t border-gray-700 mt-12 pt-8 text-center"> |
|
<p>© 2023 LLMScraper. All rights reserved.</p> |
|
</div> |
|
</div> |
|
</footer> |
|
|
|
|
|
<div class="fixed bottom-6 right-6"> |
|
<button class="w-14 h-14 rounded-full bg-primary text-white flex items-center justify-center shadow-lg glow pulse"> |
|
<i class="fas fa-plus text-xl"></i> |
|
</button> |
|
</div> |
|
|
|
<script> |
|
|
|
document.addEventListener('DOMContentLoaded', function() { |
|
|
|
const scrapeBtn = document.querySelector('button:contains("Scrape")'); |
|
if(scrapeBtn) { |
|
scrapeBtn.addEventListener('click', function() { |
|
const urlInput = document.querySelector('input[type="text"]'); |
|
if(urlInput.value.trim() === '') { |
|
alert('Please enter a valid URL'); |
|
return; |
|
} |
|
|
|
|
|
const originalText = scrapeBtn.textContent; |
|
scrapeBtn.innerHTML = '<i class="fas fa-spinner fa-spin mr-2"></i> Scraping...'; |
|
scrapeBtn.disabled = true; |
|
|
|
|
|
setTimeout(function() { |
|
scrapeBtn.innerHTML = originalText; |
|
scrapeBtn.disabled = false; |
|
|
|
|
|
alert(`Successfully started scraping: ${urlInput.value}`); |
|
|
|
|
|
const tableBody = document.querySelector('.data-table tbody'); |
|
const newRow = document.createElement('tr'); |
|
newRow.innerHTML = ` |
|
<td class="py-3 px-4 border-b"> |
|
<span class="tag tag-processing">Processing</span> |
|
</td> |
|
<td class="py-3 px-4 border-b">${urlInput.value}</td> |
|
<td class="py-3 px-4 border-b">0</td> |
|
<td class="py-3 px-4 border-b"> |
|
<div class="progress-bar bg-gray-200 w-full"> |
|
<div class="progress-fill" style="width: 5%"></div> |
|
</div> |
|
<div class="text-xs text-gray-500 mt-1">5% complete</div> |
|
</td> |
|
`; |
|
tableBody.insertBefore(newRow, tableBody.firstChild); |
|
|
|
|
|
simulateProgress(newRow); |
|
}, 2000); |
|
}); |
|
} |
|
|
|
function simulateProgress(row) { |
|
let progress = 5; |
|
const interval = setInterval(() => { |
|
progress += 5; |
|
if(progress > 100) progress = 100; |
|
|
|
const progressBar = row.querySelector('.progress-fill'); |
|
const progressText = row.querySelector('.text-xs'); |
|
|
|
if(progressBar && progressText) { |
|
progressBar.style.width = `${progress}%`; |
|
progressText.textContent = `${progress}% complete`; |
|
|
|
if(progress === 100) { |
|
clearInterval(interval); |
|
const statusTag = row.querySelector('.tag'); |
|
if(statusTag) { |
|
statusTag.textContent = "Cleaned"; |
|
statusTag.className = "tag tag-cleaned"; |
|
} |
|
} |
|
} |
|
}, 500); |
|
} |
|
}); |
|
</script> |
|
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=MagicMeWizard/ai-generate-website-ui" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body> |
|
</html> |