Spaces:
Paused
Paused
import os, re, time, datetime, traceback | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
from transformers.utils import logging as hf_logging | |
# --------------------------------------------------------------------------- | |
# Persistent cache + request log | |
# --------------------------------------------------------------------------- | |
os.environ["HF_HOME"] = "/data/.huggingface" | |
LOG_FILE = "/data/requests.log" | |
def log(msg: str): | |
ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3] | |
line = f"[{ts}] {msg}" | |
print(line, flush=True) | |
try: | |
with open(LOG_FILE, "a") as f: | |
f.write(line + "\n") | |
except FileNotFoundError: | |
pass | |
# --------------------------------------------------------------------------- | |
# Configuration | |
# --------------------------------------------------------------------------- | |
MODEL_ID = "ibm-granite/granite-3.3-2b-instruct" # 2‑B fits HF CPU Space | |
MAX_TURNS = 4 # keep last N user/AI pairs | |
MAX_TOKENS = 64 | |
MAX_INPUT_CH = 300 | |
SYSTEM_MSG = ( | |
"You are **SchoolSpirit AI**, the official digital mascot for " | |
"SchoolSpirit AI LLC, founded by Charles Norton in 2025. The company " | |
"specializes in on‑prem AI chat mascots, custom fine‑tuning of language " | |
"models, and turnkey GPU servers for K‑12 schools and education vendors.\n\n" | |
"GUIDELINES:\n" | |
"• Respond in a warm, encouraging tone suitable for students, parents, " | |
"and staff.\n" | |
"• Keep answers concise (≤ 4 sentences) unless asked for detail.\n" | |
"• If unsure or out of scope, say you’re not sure and offer human follow‑up.\n" | |
"• No personal data collection, no medical/legal/financial advice.\n" | |
"• Maintain professionalism—no profanity, politics, or mature themes." | |
) | |
WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?" | |
# --------------------------------------------------------------------------- | |
# Load model | |
# --------------------------------------------------------------------------- | |
hf_logging.set_verbosity_error() | |
try: | |
log("Loading model …") | |
tok = AutoTokenizer.from_pretrained(MODEL_ID) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_ID, device_map="auto", torch_dtype="auto" | |
) | |
gen = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tok, | |
max_new_tokens=MAX_TOKENS, | |
do_sample=True, | |
temperature=0.6, | |
) | |
MODEL_ERR = None | |
log("Model loaded ✔") | |
except Exception as exc: # noqa: BLE001 | |
MODEL_ERR, gen = f"Model load error: {exc}", None | |
log(MODEL_ERR) | |
clean = lambda t: re.sub(r"\s+", " ", t.strip()) or "…" | |
trim = lambda m: m if len(m) <= 1 + MAX_TURNS * 2 else [m[0]] + m[-MAX_TURNS * 2 :] | |
# --------------------------------------------------------------------------- | |
# Chat logic | |
# --------------------------------------------------------------------------- | |
def chat_fn(user_msg: str, history: list): | |
log(f"User sent {len(user_msg)} chars") | |
# Seed system + welcome messages on first call | |
if not history or history[0]["role"] != "system": | |
history = [ | |
{"role": "system", "content": SYSTEM_MSG}, | |
{"role": "assistant", "content": WELCOME_MSG}, | |
] | |
if MODEL_ERR: | |
return MODEL_ERR | |
user_msg = clean(user_msg or "") | |
if not user_msg: | |
return "Please type something." | |
if len(user_msg) > MAX_INPUT_CH: | |
return f"Message too long (>{MAX_INPUT_CH} chars)." | |
history.append({"role": "user", "content": user_msg}) | |
history = trim(history) | |
prompt_lines = [ | |
m["content"] | |
if m["role"] == "system" | |
else f'{"User" if m["role"]=="user" else "AI"}: {m["content"]}' | |
for m in history | |
] + ["AI:"] | |
prompt = "\n".join(prompt_lines) | |
log(f"Prompt {len(prompt)} chars → generating") | |
t0 = time.time() | |
try: | |
raw = gen(prompt)[0]["generated_text"] | |
reply = clean(raw.split("AI:", 1)[-1]) | |
reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip() | |
log(f"generate() {time.time()-t0:.2f}s, reply {len(reply)} chars") | |
except Exception: | |
log("❌ Inference exception:\n" + traceback.format_exc()) | |
reply = "Sorry—backend crashed. Please try again later." | |
return reply | |
# --------------------------------------------------------------------------- | |
# UI | |
# --------------------------------------------------------------------------- | |
gr.ChatInterface( | |
fn=chat_fn, | |
chatbot=gr.Chatbot( | |
height=480, | |
type="messages", | |
value=[("", WELCOME_MSG)], # pre-populate AI welcome bubble | |
), | |
title="SchoolSpirit AI Chat", | |
theme=gr.themes.Soft(primary_hue="blue"), | |
type="messages", | |
).launch() | |