|
|
|
|
|
import os, time, subprocess, sys, signal |
|
|
import requests |
|
|
import json |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
REPO = "Qwen/Qwen3-Embedding-0.6B-GGUF" |
|
|
FNAME = "Qwen3-Embedding-0.6B-Q8_0.gguf" |
|
|
LOCAL_MODEL_PATH = os.path.join(os.getcwd(), FNAME) |
|
|
|
|
|
|
|
|
LLAMA_SERVER_BIN = os.path.join("llama.cpp", "build", "bin", "llama-server") |
|
|
LLAMA_PORT = 8080 |
|
|
LLAMA_HOST = "127.0.0.1" |
|
|
LLAMA_URL = f"http://{LLAMA_HOST}:{LLAMA_PORT}" |
|
|
|
|
|
def download_model(): |
|
|
if os.path.exists(LOCAL_MODEL_PATH) and os.path.getsize(LOCAL_MODEL_PATH) > 1000_000_000: |
|
|
print("Model already exists:", LOCAL_MODEL_PATH) |
|
|
return LOCAL_MODEL_PATH |
|
|
print("Downloading model (this can take a while)...") |
|
|
path = hf_hub_download(repo_id=REPO, filename=FNAME, local_dir=".", resume_download=True) |
|
|
print("Downloaded to:", path) |
|
|
return path |
|
|
|
|
|
def build_llama_if_needed(): |
|
|
|
|
|
if os.path.exists(LLAMA_SERVER_BIN) and os.access(LLAMA_SERVER_BIN, os.X_OK): |
|
|
print("llama-server already built:", LLAMA_SERVER_BIN) |
|
|
return |
|
|
|
|
|
print("Building llama.cpp (may take many minutes)...") |
|
|
res = subprocess.run(["bash", "build_llama.sh"], check=False) |
|
|
if res.returncode != 0: |
|
|
print("Build failed with code", res.returncode) |
|
|
raise SystemExit("Failed to build llama.cpp") |
|
|
|
|
|
def start_llama_server(model_path): |
|
|
cmd = [ |
|
|
LLAMA_SERVER_BIN, |
|
|
"-m", model_path, |
|
|
"--embedding", |
|
|
"--pooling", "last", |
|
|
"--host", LLAMA_HOST, |
|
|
"--port", str(LLAMA_PORT), |
|
|
"--verbose" |
|
|
] |
|
|
print("Starting llama-server:", " ".join(cmd)) |
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) |
|
|
return proc |
|
|
|
|
|
def wait_server_ready(timeout=180): |
|
|
start = time.time() |
|
|
while time.time() - start < timeout: |
|
|
try: |
|
|
r = requests.get(LLAMA_URL + "/v1/models", timeout=3) |
|
|
if r.status_code == 200: |
|
|
print("Server ready") |
|
|
return True |
|
|
except Exception: |
|
|
pass |
|
|
time.sleep(1) |
|
|
return False |
|
|
|
|
|
def get_embeddings_from_server(texts): |
|
|
url = LLAMA_URL + "/v1/embeddings" |
|
|
payload = {"input": texts} |
|
|
headers = {"Content-Type": "application/json"} |
|
|
r = requests.post(url, json=payload, headers=headers, timeout=120) |
|
|
if r.status_code != 200: |
|
|
raise RuntimeError(f"Embeddings request failed: {r.status_code} {r.text}") |
|
|
data = r.json() |
|
|
|
|
|
if "data" in data and len(data["data"]) >= 1 and "embedding" in data["data"][0]: |
|
|
embeddings = [np.array(item["embedding"], dtype=np.float32) for item in data["data"]] |
|
|
return embeddings |
|
|
if "embedding" in data: |
|
|
return [np.array(data["embedding"], dtype=np.float32)] |
|
|
raise RuntimeError("Unexpected embeddings response: " + str(data)) |
|
|
|
|
|
def cosine(a, b): |
|
|
|
|
|
na = np.linalg.norm(a) |
|
|
nb = np.linalg.norm(b) |
|
|
if na == 0 or nb == 0: |
|
|
return 0.0 |
|
|
return float(np.dot(a, b) / (na * nb)) |
|
|
|
|
|
|
|
|
model_path = download_model() |
|
|
build_llama_if_needed() |
|
|
proc = start_llama_server(model_path) |
|
|
if not wait_server_ready(timeout=240): |
|
|
stderr = "" |
|
|
try: |
|
|
stderr = proc.stderr.read()[:2000] |
|
|
except Exception: |
|
|
pass |
|
|
raise SystemExit("llama-server did not become ready in time. Stderr head:\n" + stderr) |
|
|
|
|
|
|
|
|
import re |
|
|
def _norm_words(text): |
|
|
text = text.lower() |
|
|
text = re.sub(r"[^0-9a-zа-яё\s]", " ", text, flags=re.I) |
|
|
words = [w for w in text.split() if len(w) > 1] |
|
|
return words |
|
|
|
|
|
def match_boolean_pattern(pattern: str, message: str) -> bool: |
|
|
msg_words = set(_norm_words(message)) |
|
|
pat = pattern.strip() |
|
|
def check_or_group(group_text): |
|
|
parts = [p.strip() for p in re.split(r"\bOR\b", group_text, flags=re.I)] |
|
|
for p in parts: |
|
|
if p.lower() in msg_words: |
|
|
return True |
|
|
return False |
|
|
|
|
|
remaining = pat |
|
|
and_conditions = [] |
|
|
for m in re.finditer(r"\((.*?)\)", pat): |
|
|
group = m.group(1) |
|
|
and_conditions.append(("or_group", group)) |
|
|
remaining = remaining.replace(m.group(0), " ") |
|
|
top_tokens = re.split(r"\bAND\b", remaining, flags=re.I) |
|
|
for t in top_tokens: |
|
|
t = t.strip() |
|
|
if not t: |
|
|
continue |
|
|
if re.search(r"\bOR\b", t, flags=re.I): |
|
|
and_conditions.append(("or_group", t)) |
|
|
else: |
|
|
w = t.split()[0].strip() |
|
|
if w: |
|
|
and_conditions.append(("word", w)) |
|
|
for typ, val in and_conditions: |
|
|
if typ == "or_group": |
|
|
if not check_or_group(val): |
|
|
return False |
|
|
else: |
|
|
if val.lower() not in msg_words: |
|
|
return False |
|
|
return True |
|
|
|
|
|
|
|
|
def similarity_ui(pattern, message, use_boolean=False, show_raw=False): |
|
|
if use_boolean: |
|
|
ok = match_boolean_pattern(pattern, message) |
|
|
if not ok: |
|
|
return "Boolean check: FAILED (no keyword match)" |
|
|
emb_list = get_embeddings_from_server([pattern, message]) |
|
|
s = cosine(emb_list[0], emb_list[1]) |
|
|
if show_raw: |
|
|
return f"cosine={s:.4f}\n\npattern_emb(first10)={emb_list[0][:10].tolist()}\nmessage_emb(first10)={emb_list[1][:10].tolist()}" |
|
|
return f"{s:.4f}" |
|
|
|
|
|
def search_ui(query, docs_text, topk): |
|
|
docs = [d.strip() for d in docs_text.splitlines() if d.strip()] |
|
|
if not docs: |
|
|
return "Empty corpus" |
|
|
embs = get_embeddings_from_server(docs + [query]) |
|
|
D = np.stack(embs[:-1]) |
|
|
q = embs[-1] |
|
|
scores = (D @ q) / (np.linalg.norm(D, axis=1) * np.linalg.norm(q)) |
|
|
order = np.argsort(scores)[::-1][:int(topk)] |
|
|
out_lines = [] |
|
|
for rank, idx in enumerate(order, start=1): |
|
|
out_lines.append(f"{rank}. score={scores[idx]:.4f}\n{docs[idx]}") |
|
|
return "\n\n".join(out_lines) |
|
|
|
|
|
demo = gr.Blocks() |
|
|
with demo: |
|
|
gr.Markdown("# Qwen3-Embedding-0.6B GGUF — тест паттерн ↔ сообщение") |
|
|
with gr.Tab("Сходство (cosine)"): |
|
|
p = gr.Textbox(label="Паттерн", value="Meeting between Trump and Putin") |
|
|
m = gr.Textbox(label="Сообщение", value="Встреча Трампа и Путина прошла в Женеве.") |
|
|
use_bool = gr.Checkbox(label="Boolean pattern match (быстрая фильтрация)", value=False) |
|
|
show_raw = gr.Checkbox(label="Показать первые значения embedding (debug)", value=False) |
|
|
btn = gr.Button("Сравнить") |
|
|
out = gr.Textbox(label="Результат (cosine или debug)", interactive=False, lines=6) |
|
|
btn.click(similarity_ui, inputs=[p, m, use_bool, show_raw], outputs=out) |
|
|
with gr.Tab("Семантический поиск"): |
|
|
q = gr.Textbox(label="Запрос", value="саммит Трамп Путин") |
|
|
corpus = gr.Textbox(label="Корпус (по строкам)", lines=12, value=( |
|
|
"Встреча президентов России и США прошла в Женеве.\n" |
|
|
"Лукашенко провёл переговоры с Евросоюзом.\n" |
|
|
"Джо Байден выступал в Давосе.\n" |
|
|
"Футбольный чемпионат прошёл на стадионе." |
|
|
)) |
|
|
k = gr.Number(label="Top-K", value=3, precision=0) |
|
|
btn2 = gr.Button("Найти") |
|
|
out2 = gr.Textbox(label="Результаты", lines=12) |
|
|
btn2.click(search_ui, inputs=[q, corpus, k], outputs=out2) |
|
|
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|