Spaces:

CodCodingCode
/

medical-test

Paused

App Files Files Community

CodCodingCode commited on 18 days ago

Commit

fe61d2d

1 Parent(s): dff880e

fxied tokenizer bug

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -8,23 +8,29 @@ import gradio as gr
 REPO_ID = "CodCodingCode/llama-3.1-8b-clinical"
 SUBFOLDER = "checkpoint-45000"
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
-# ——— DOWNLOAD ONLY THE CHECKPOINT FILES ———
 local_cache = snapshot_download(
     repo_id=REPO_ID,
     token=HF_TOKEN,
-    allow_patterns=[f"{SUBFOLDER}/*.json", f"{SUBFOLDER}/*.safetensors"],
 )
-# ——— POINT AT THE REAL FILES ———
 MODEL_DIR = os.path.join(local_cache, SUBFOLDER)
-# ——— LOAD MODEL & TOKENIZER ———
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_DIR,
     use_fast=False,
     trust_remote_code=True,
 )
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_DIR,
     device_map="auto",
@@ -45,9 +51,7 @@ class RoleAgent:
             f"Input: {input_text}\n"
             f"Output:"
         )
-        # 1) Tokenize
         encoding = tokenizer(prompt, return_tensors="pt")
-        # 2) Move each tensor to the model's device
         inputs = {k: v.to(model.device) for k, v in encoding.items()}
         outputs = model.generate(
@@ -59,19 +63,14 @@ class RoleAgent:
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # extract THINKING / ANSWER if present
-        thinking, answer = "", response
-        if "THINKING:" in response and "ANSWER:" in response and "END" in response:
             block = response.split("THINKING:")[1].split("END")[0]
             thinking = block.split("ANSWER:")[0].strip()
             answer = block.split("ANSWER:")[1].strip()
-        return {
-            "instruction": f"You are {self.role_instruction}.",
-            "input": input_text,
-            "thinking": thinking,
-            "output": answer,
-        }
 # === Agents ===

 REPO_ID = "CodCodingCode/llama-3.1-8b-clinical"
 SUBFOLDER = "checkpoint-45000"
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
+if not HF_TOKEN:
+    raise RuntimeError("Missing HUGGINGFACE_HUB_TOKEN in env")
+# ——— 1) Download only the files in checkpoint-45000/ ———
 local_cache = snapshot_download(
     repo_id=REPO_ID,
     token=HF_TOKEN,
+    allow_patterns=[
+        f"{SUBFOLDER}/*.json",
+        f"{SUBFOLDER}/*.safetensors",
+    ],
 )
+# ——— 2) Point MODEL_DIR at that subfolder ———
 MODEL_DIR = os.path.join(local_cache, SUBFOLDER)
+# ——— 3) Load tokenizer & model from disk ———
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_DIR,
     use_fast=False,
     trust_remote_code=True,
 )
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_DIR,
     device_map="auto",
             f"Input: {input_text}\n"
             f"Output:"
         )
         encoding = tokenizer(prompt, return_tensors="pt")
         inputs = {k: v.to(model.device) for k, v in encoding.items()}
         outputs = model.generate(
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        thinking = ""
+        answer = response
+        if all(tag in response for tag in ("THINKING:", "ANSWER:", "END")):
             block = response.split("THINKING:")[1].split("END")[0]
             thinking = block.split("ANSWER:")[0].strip()
             answer = block.split("ANSWER:")[1].strip()
+        return {"thinking": thinking, "output": answer}
 # === Agents ===