import time import random import re import html from io import StringIO import tokenize import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # ----------------------------- # Config # ----------------------------- MODEL_NAME = "Salesforce/codegen-350M-multi" FALLBACK_SNIPPETS = [ """def greet(name: str) -> str: return f"Hello, {name}!" print(greet("World"))""", """nums = [1, 2, 3, 4, 5] squares = [n * n for n in nums if n % 2 == 1] print(squares)""", """def factorial(n: int) -> int: if n <= 1: return 1 return n * factorial(n - 1) print(factorial(5))""", ] PROMPT_TEMPLATES = { "fundamentals": "Write a short Python code snippet (5-12 lines) demonstrating {topic}. " "Keep it self-contained, clean, and runnable. Avoid external I/O unless necessary. " "Output only code with no comments or explanations.", "intermediate": "Write a short Python code snippet (6-14 lines) demonstrating {topic}. " "Prefer clarity and idiomatic Python. Keep it self-contained. " "Output only code with no comments or explanations.", "advanced": "Write a concise Python example (6-14 lines) demonstrating {topic}. " "Use clear variable names and include a minimal demo. " "Output only code with no comments or explanations." } TOPICS = [ "list comprehensions", "dictionary usage", "function definition with type hints", "class with __init__ and method", "file reading and writing", "decorators", "generators and yield", "context managers", "error handling with try/except", "sorting with key functions", "lambda functions and map/filter", ] # ----------------------------- # Model loading # ----------------------------- tokenizer = None model = None model_load_error = None try: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) except Exception as e: model_load_error = str(e) # ----------------------------- # Snippet generation # ----------------------------- def _strip_comment_only_lines(code: str) -> str: lines = code.splitlines() # Trim leading comment/empty lines while lines and (not lines[0].strip() or lines[0].lstrip().startswith("#")): lines.pop(0) # Trim trailing comment/empty lines while lines and (not lines[-1].strip() or lines[-1].lstrip().startswith("#")): lines.pop() return "\n".join(lines) def generate_snippet(difficulty: str, topic: str, seed: int | None = None) -> str: random.seed(seed) if model is None: return random.choice(FALLBACK_SNIPPETS) key = ("fundamentals" if difficulty == "Easy" else "intermediate" if difficulty == "Medium" else "advanced") prompt = PROMPT_TEMPLATES[key].format(topic=topic) inputs = tokenizer(prompt, return_tensors="pt") output_ids = model.generate( **inputs, max_new_tokens=160, do_sample=True, temperature=0.7, top_p=0.9, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id )[0] text = tokenizer.decode(output_ids, skip_special_tokens=True) # Extract fenced code or fallback blocks = re.findall(r"```(?:python)?\n(.*?)```", text, flags=re.DOTALL) if blocks: code = blocks[0].strip() else: parts = text.split("\n", 1) code = parts[1] if len(parts) > 1 else text lines = code.splitlines() filtered = [] for ln in lines: if len(filtered) == 0 and ln.strip().startswith("#"): continue filtered.append(ln) code = "\n".join(filtered).strip() code = _strip_comment_only_lines(code) if not code or code.count("\n") < 3 or code.count("\n") > 30: return random.choice(FALLBACK_SNIPPETS) return code # ----------------------------- # Masking logic # ----------------------------- PY_KEYWORDS = { "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class", "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield" } def select_mask_positions(code: str, difficulty: str, rng: random.Random): reader = StringIO(code).readline maskables = [] try: for tok in tokenize.generate_tokens(reader): ttype, tstr, (sr, sc), (er, ec), _ = tok if ttype == tokenize.NAME and tstr not in PY_KEYWORDS: maskables.append((sr, sc, er, ec, tstr, "name")) elif ttype == tokenize.NAME: maskables.append((sr, sc, er, ec, tstr, "keyword")) elif ttype == tokenize.NUMBER: maskables.append((sr, sc, er, ec, tstr, "number")) elif ttype == tokenize.STRING and len(tstr) <= 18: maskables.append((sr, sc, er, ec, tstr, "string")) except tokenize.TokenError: words = re.finditer(r"\b[A-Za-z_][A-Za-z_0-9]*\b", code) return [{"start": m.start(), "end": m.end(), "text": m.group()} for m in words][:6] weights = [] for *_, text, kind in maskables: if kind == "name": w = 1.0 elif kind == "keyword": w = (0.3 if difficulty == "Easy" else 0.6 if difficulty == "Medium" else 0.9) elif kind == "number": w = 0.6 else: w = 0.5 weights.append(max(w, 0.05)) if not maskables: return [] base = 4 if difficulty == "Easy" else (6 if difficulty == "Medium" else 8) count = min(base, len(maskables)) chosen = [] avail = list(range(len(maskables))) for _ in range(count): total = sum(weights[i] for i in avail) pick = rng.random() * total cum = 0 for i in avail: cum += weights[i] if cum >= pick: chosen.append(i) avail.remove(i) break # Convert row/col to absolute offsets lines = code.splitlines(keepends=True) offsets = [] cur = 0 for line in lines: offsets.append(cur) cur += len(line) def to_abs(sr, sc, er, ec): return offsets[sr - 1] + sc, offsets[er - 1] + ec spans = [] for idx in chosen: sr, sc, er, ec, txt, _ = maskables[idx] s, e = to_abs(sr, sc, er, ec) spans.append({"start": s, "end": e, "text": txt}) spans.sort(key=lambda x: x["start"]) return spans def apply_masks(code: str, spans: list[dict]): masked, last, answers = [], 0, [] for i, sp in enumerate(spans, 1): s, e, txt = sp["start"], sp["end"], sp["text"] normal = code[last:s] masked.append(html.escape(normal)) placeholder = f"__[{i}]__" masked.append( f"{html.escape(placeholder)}" ) answers.append(txt) last = e masked.append(html.escape(code[last:])) return "".join(masked), answers # ----------------------------- # Theme CSS (Single Vibrant Yellow Theme) # ----------------------------- vibrant_css = """ """ def get_instruction_html(): return ( "