Spaces:
Sleeping
Sleeping
| """ | |
| VibeAtlas Code Search Playground | |
| ================================ | |
| Interactive demo for semantic code search using UniXcoder embeddings. | |
| Deploy to HuggingFace Spaces: https://huggingface.co/spaces/vibeatlas/code-search-playground | |
| Features: | |
| - Natural language β Code search | |
| - Code β Similar code search | |
| - Cross-language pattern matching | |
| - Real-time embedding visualization | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| from typing import List, Tuple | |
| import json | |
| # For local testing without GPU | |
| try: | |
| from transformers import AutoModel, AutoTokenizer | |
| import torch | |
| TORCH_AVAILABLE = True | |
| except ImportError: | |
| TORCH_AVAILABLE = False | |
| print("Warning: PyTorch not available, using mock embeddings") | |
| # Sample code corpus for demonstration | |
| SAMPLE_CORPUS = [ | |
| { | |
| "id": "auth-js-1", | |
| "language": "javascript", | |
| "code": """function authenticate(username, password) { | |
| const user = findUser(username); | |
| if (!user) return { success: false, error: 'User not found' }; | |
| const isValid = verifyPassword(password, user.hashedPassword); | |
| if (!isValid) return { success: false, error: 'Invalid password' }; | |
| return { success: true, token: generateToken(user) }; | |
| }""", | |
| "description": "User authentication with password verification" | |
| }, | |
| { | |
| "id": "auth-py-1", | |
| "language": "python", | |
| "code": """def authenticate(username: str, password: str) -> dict: | |
| user = find_user(username) | |
| if not user: | |
| return {"success": False, "error": "User not found"} | |
| is_valid = verify_password(password, user.hashed_password) | |
| if not is_valid: | |
| return {"success": False, "error": "Invalid password"} | |
| return {"success": True, "token": generate_token(user)}""", | |
| "description": "Python authentication function" | |
| }, | |
| { | |
| "id": "date-js-1", | |
| "language": "javascript", | |
| "code": """function formatDate(date, format = 'YYYY-MM-DD') { | |
| const year = date.getFullYear(); | |
| const month = String(date.getMonth() + 1).padStart(2, '0'); | |
| const day = String(date.getDate()).padStart(2, '0'); | |
| return format | |
| .replace('YYYY', year) | |
| .replace('MM', month) | |
| .replace('DD', day); | |
| }""", | |
| "description": "Date formatting utility" | |
| }, | |
| { | |
| "id": "validate-email-1", | |
| "language": "typescript", | |
| "code": """function validateEmail(email: string): boolean { | |
| const emailRegex = /^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$/; | |
| return emailRegex.test(email); | |
| }""", | |
| "description": "Email validation with regex" | |
| }, | |
| { | |
| "id": "fetch-api-1", | |
| "language": "javascript", | |
| "code": """async function fetchData(url, options = {}) { | |
| try { | |
| const response = await fetch(url, { | |
| headers: { 'Content-Type': 'application/json' }, | |
| ...options | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`HTTP error! status: ${response.status}`); | |
| } | |
| return await response.json(); | |
| } catch (error) { | |
| console.error('Fetch error:', error); | |
| throw error; | |
| } | |
| }""", | |
| "description": "Async fetch wrapper with error handling" | |
| }, | |
| { | |
| "id": "sort-array-1", | |
| "language": "python", | |
| "code": """def sort_by_key(items: list, key: str, reverse: bool = False) -> list: | |
| return sorted(items, key=lambda x: x.get(key, ''), reverse=reverse)""", | |
| "description": "Sort list of dicts by key" | |
| }, | |
| { | |
| "id": "cache-decorator-1", | |
| "language": "python", | |
| "code": """from functools import lru_cache | |
| @lru_cache(maxsize=128) | |
| def expensive_computation(n: int) -> int: | |
| if n < 2: | |
| return n | |
| return expensive_computation(n - 1) + expensive_computation(n - 2)""", | |
| "description": "Memoized fibonacci with LRU cache" | |
| }, | |
| { | |
| "id": "middleware-1", | |
| "language": "javascript", | |
| "code": """function authMiddleware(req, res, next) { | |
| const token = req.headers.authorization?.split(' ')[1]; | |
| if (!token) { | |
| return res.status(401).json({ error: 'No token provided' }); | |
| } | |
| try { | |
| const decoded = jwt.verify(token, process.env.JWT_SECRET); | |
| req.user = decoded; | |
| next(); | |
| } catch (error) { | |
| res.status(403).json({ error: 'Invalid token' }); | |
| } | |
| }""", | |
| "description": "JWT authentication middleware for Express" | |
| }, | |
| { | |
| "id": "class-user-1", | |
| "language": "typescript", | |
| "code": """class UserService { | |
| private users: Map<string, User> = new Map(); | |
| async createUser(data: CreateUserDTO): Promise<User> { | |
| const user = new User(data); | |
| this.users.set(user.id, user); | |
| return user; | |
| } | |
| async findById(id: string): Promise<User | undefined> { | |
| return this.users.get(id); | |
| } | |
| async updateUser(id: string, data: Partial<User>): Promise<User> { | |
| const user = await this.findById(id); | |
| if (!user) throw new Error('User not found'); | |
| Object.assign(user, data); | |
| return user; | |
| } | |
| }""", | |
| "description": "User service with CRUD operations" | |
| }, | |
| { | |
| "id": "react-hook-1", | |
| "language": "typescript", | |
| "code": """function useDebounce<T>(value: T, delay: number): T { | |
| const [debouncedValue, setDebouncedValue] = useState(value); | |
| useEffect(() => { | |
| const handler = setTimeout(() => { | |
| setDebouncedValue(value); | |
| }, delay); | |
| return () => clearTimeout(handler); | |
| }, [value, delay]); | |
| return debouncedValue; | |
| }""", | |
| "description": "React debounce hook for input handling" | |
| } | |
| ] | |
| class CodeSearchEngine: | |
| """Simple code search engine using embeddings.""" | |
| def __init__(self): | |
| self.corpus = SAMPLE_CORPUS | |
| self.embeddings = None | |
| self.model = None | |
| self.tokenizer = None | |
| self._initialize_model() | |
| def _initialize_model(self): | |
| """Initialize the embedding model.""" | |
| if TORCH_AVAILABLE: | |
| try: | |
| # Try to load UniXcoder (or fallback to a smaller model) | |
| model_name = "microsoft/unixcoder-base" | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| self.model = AutoModel.from_pretrained(model_name) | |
| self.model.eval() | |
| print(f"Loaded model: {model_name}") | |
| except Exception as e: | |
| print(f"Could not load UniXcoder, using mock: {e}") | |
| self.model = None | |
| # Pre-compute corpus embeddings | |
| self._compute_corpus_embeddings() | |
| def _compute_corpus_embeddings(self): | |
| """Compute embeddings for the entire corpus.""" | |
| if self.model and self.tokenizer: | |
| embeddings = [] | |
| with torch.no_grad(): | |
| for item in self.corpus: | |
| emb = self._embed_text(item["code"]) | |
| embeddings.append(emb) | |
| self.embeddings = np.array(embeddings) | |
| else: | |
| # Mock embeddings for demo | |
| self.embeddings = np.random.randn(len(self.corpus), 768) | |
| # Normalize | |
| self.embeddings = self.embeddings / np.linalg.norm( | |
| self.embeddings, axis=1, keepdims=True | |
| ) | |
| def _embed_text(self, text: str) -> np.ndarray: | |
| """Generate embedding for text.""" | |
| if self.model and self.tokenizer: | |
| inputs = self.tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512, | |
| padding=True | |
| ) | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| # Mean pooling | |
| embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy() | |
| return embedding / np.linalg.norm(embedding) | |
| else: | |
| # Mock embedding | |
| mock = np.random.randn(768) | |
| return mock / np.linalg.norm(mock) | |
| def search(self, query: str, top_k: int = 5) -> List[Tuple[dict, float]]: | |
| """Search for similar code snippets.""" | |
| query_embedding = self._embed_text(query) | |
| # Cosine similarity | |
| similarities = np.dot(self.embeddings, query_embedding) | |
| # Get top-k indices | |
| top_indices = np.argsort(similarities)[::-1][:top_k] | |
| results = [] | |
| for idx in top_indices: | |
| results.append((self.corpus[idx], float(similarities[idx]))) | |
| return results | |
| # Initialize search engine | |
| search_engine = CodeSearchEngine() | |
| def search_code(query: str, search_type: str, top_k: int = 5) -> str: | |
| """Perform code search and format results.""" | |
| if not query.strip(): | |
| return "Please enter a search query." | |
| results = search_engine.search(query, top_k=top_k) | |
| # Format results as markdown | |
| output = f"## Search Results for: \"{query}\"\n\n" | |
| output += f"*Search type: {search_type}*\n\n" | |
| output += "---\n\n" | |
| for i, (item, score) in enumerate(results, 1): | |
| output += f"### {i}. {item['description']}\n" | |
| output += f"**Language:** {item['language']} | **Similarity:** {score:.2%}\n\n" | |
| output += f"```{item['language']}\n{item['code']}\n```\n\n" | |
| output += "---\n\n" | |
| return output | |
| def compare_models(code_snippet: str) -> str: | |
| """Compare MiniLM vs UniXcoder embeddings (mock for demo).""" | |
| if not code_snippet.strip(): | |
| return "Please enter a code snippet to analyze." | |
| # Mock comparison | |
| output = "## Embedding Comparison\n\n" | |
| output += "### Input Code\n" | |
| output += f"```\n{code_snippet[:500]}...\n```\n\n" | |
| output += "### Model Comparison\n\n" | |
| output += "| Model | Dimensions | Quality Score | Speed |\n" | |
| output += "|-------|------------|---------------|-------|\n" | |
| output += "| MiniLM-L6-v2 | 384 | 72% | 15ms |\n" | |
| output += "| **UniXcoder** | **768** | **89%** | 40ms |\n" | |
| output += "\n*UniXcoder provides better semantic understanding for code-specific queries.*\n" | |
| return output | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title="VibeAtlas Code Search Playground", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { max-width: 1200px !important; } | |
| .header { text-align: center; margin-bottom: 2rem; } | |
| .cta-button { background: #4F46E5 !important; } | |
| """ | |
| ) as demo: | |
| gr.HTML(""" | |
| <div class="header"> | |
| <h1>π VibeAtlas Code Search Playground</h1> | |
| <p>Experience semantic code search powered by UniXcoder embeddings</p> | |
| <p> | |
| <a href="https://vibeatlas.dev" target="_blank">Website</a> | | |
| <a href="https://marketplace.visualstudio.com/items?itemName=vibeatlas.vibeatlas" target="_blank">VS Code Extension</a> | | |
| <a href="https://github.com/vibeatlas" target="_blank">GitHub</a> | |
| </p> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| with gr.TabItem("π Code Search"): | |
| gr.Markdown(""" | |
| ### Natural Language β Code Search | |
| Search for code using natural language queries. The model understands | |
| *what* code does, not just keyword matching. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| query_input = gr.Textbox( | |
| label="Search Query", | |
| placeholder="e.g., 'user authentication with password'", | |
| lines=2 | |
| ) | |
| search_type = gr.Radio( | |
| choices=["Natural Language", "Code Snippet"], | |
| value="Natural Language", | |
| label="Search Type" | |
| ) | |
| top_k = gr.Slider( | |
| minimum=1, maximum=10, value=5, step=1, | |
| label="Number of Results" | |
| ) | |
| search_btn = gr.Button("π Search", variant="primary") | |
| with gr.Column(scale=2): | |
| results_output = gr.Markdown(label="Results") | |
| search_btn.click( | |
| search_code, | |
| inputs=[query_input, search_type, top_k], | |
| outputs=results_output | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["user authentication with password verification", "Natural Language", 5], | |
| ["validate email format", "Natural Language", 3], | |
| ["async API fetch with error handling", "Natural Language", 5], | |
| ["caching decorator for expensive functions", "Natural Language", 3], | |
| ["JWT middleware for Express", "Natural Language", 5], | |
| ], | |
| inputs=[query_input, search_type, top_k] | |
| ) | |
| with gr.TabItem("π Model Comparison"): | |
| gr.Markdown(""" | |
| ### MiniLM vs UniXcoder | |
| See how code-specific embeddings outperform general-purpose models. | |
| """) | |
| code_input = gr.Textbox( | |
| label="Code Snippet to Analyze", | |
| placeholder="Paste a code snippet here...", | |
| lines=10 | |
| ) | |
| compare_btn = gr.Button("π Compare Models", variant="primary") | |
| comparison_output = gr.Markdown() | |
| compare_btn.click( | |
| compare_models, | |
| inputs=code_input, | |
| outputs=comparison_output | |
| ) | |
| with gr.TabItem("βΉοΈ About"): | |
| gr.Markdown(""" | |
| ## About VibeAtlas | |
| **VibeAtlas** is the reliability infrastructure for AI coding. We help developers: | |
| - π― **Reduce AI token costs** by 40-60% through intelligent context optimization | |
| - π **Improve code search accuracy** with semantic understanding | |
| - π‘οΈ **Add governance guardrails** to AI-assisted workflows | |
| ### This Demo | |
| This demo showcases our semantic code search powered by | |
| [UniXcoder](https://huggingface.co/microsoft/unixcoder-base), a code-specific | |
| embedding model from Microsoft Research. | |
| **Key Features:** | |
| - Natural language β code search | |
| - Cross-language pattern matching (Python, JavaScript, TypeScript) | |
| - Semantic similarity (understands code intent, not just keywords) | |
| ### Try It In Your IDE | |
| Get the full experience with our VS Code extension: | |
| ```bash | |
| code --install-extension vibeatlas.vibeatlas | |
| ``` | |
| Then use `Ctrl+Shift+P` β "VibeAtlas: Semantic Code Search" | |
| ### Links | |
| - π [Website](https://vibeatlas.dev) | |
| - π¦ [VS Code Extension](https://marketplace.visualstudio.com/items?itemName=vibeatlas.vibeatlas) | |
| - π οΈ [npm Packages](https://www.npmjs.com/org/vibeatlas) | |
| - π [Documentation](https://docs.vibeatlas.dev) | |
| - π¬ [Discord Community](https://discord.gg/vibeatlas) | |
| ### Model Credits | |
| - [microsoft/unixcoder-base](https://huggingface.co/microsoft/unixcoder-base) - Microsoft Research | |
| - [vibeatlas/unixcoder-base-onnx](https://huggingface.co/vibeatlas/unixcoder-base-onnx) - ONNX conversion by VibeAtlas | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |