CodCodingCode commited on
Commit
fe61d2d
Β·
1 Parent(s): dff880e

fxied tokenizer bug

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -8,23 +8,29 @@ import gradio as gr
8
  REPO_ID = "CodCodingCode/llama-3.1-8b-clinical"
9
  SUBFOLDER = "checkpoint-45000"
10
  HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
 
 
11
 
12
- # β€”β€”β€” DOWNLOAD ONLY THE CHECKPOINT FILES β€”β€”β€”
13
  local_cache = snapshot_download(
14
  repo_id=REPO_ID,
15
  token=HF_TOKEN,
16
- allow_patterns=[f"{SUBFOLDER}/*.json", f"{SUBFOLDER}/*.safetensors"],
 
 
 
17
  )
18
 
19
- # β€”β€”β€” POINT AT THE REAL FILES β€”β€”β€”
20
  MODEL_DIR = os.path.join(local_cache, SUBFOLDER)
21
 
22
- # β€”β€”β€” LOAD MODEL & TOKENIZER β€”β€”β€”
23
  tokenizer = AutoTokenizer.from_pretrained(
24
  MODEL_DIR,
25
  use_fast=False,
26
  trust_remote_code=True,
27
  )
 
28
  model = AutoModelForCausalLM.from_pretrained(
29
  MODEL_DIR,
30
  device_map="auto",
@@ -45,9 +51,7 @@ class RoleAgent:
45
  f"Input: {input_text}\n"
46
  f"Output:"
47
  )
48
- # 1) Tokenize
49
  encoding = tokenizer(prompt, return_tensors="pt")
50
- # 2) Move each tensor to the model's device
51
  inputs = {k: v.to(model.device) for k, v in encoding.items()}
52
 
53
  outputs = model.generate(
@@ -59,19 +63,14 @@ class RoleAgent:
59
  )
60
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
61
 
62
- # extract THINKING / ANSWER if present
63
- thinking, answer = "", response
64
- if "THINKING:" in response and "ANSWER:" in response and "END" in response:
65
  block = response.split("THINKING:")[1].split("END")[0]
66
  thinking = block.split("ANSWER:")[0].strip()
67
  answer = block.split("ANSWER:")[1].strip()
68
 
69
- return {
70
- "instruction": f"You are {self.role_instruction}.",
71
- "input": input_text,
72
- "thinking": thinking,
73
- "output": answer,
74
- }
75
 
76
 
77
  # === Agents ===
 
8
  REPO_ID = "CodCodingCode/llama-3.1-8b-clinical"
9
  SUBFOLDER = "checkpoint-45000"
10
  HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
11
+ if not HF_TOKEN:
12
+ raise RuntimeError("Missing HUGGINGFACE_HUB_TOKEN in env")
13
 
14
+ # β€”β€”β€” 1) Download only the files in checkpoint-45000/ β€”β€”β€”
15
  local_cache = snapshot_download(
16
  repo_id=REPO_ID,
17
  token=HF_TOKEN,
18
+ allow_patterns=[
19
+ f"{SUBFOLDER}/*.json",
20
+ f"{SUBFOLDER}/*.safetensors",
21
+ ],
22
  )
23
 
24
+ # β€”β€”β€” 2) Point MODEL_DIR at that subfolder β€”β€”β€”
25
  MODEL_DIR = os.path.join(local_cache, SUBFOLDER)
26
 
27
+ # β€”β€”β€” 3) Load tokenizer & model from disk β€”β€”β€”
28
  tokenizer = AutoTokenizer.from_pretrained(
29
  MODEL_DIR,
30
  use_fast=False,
31
  trust_remote_code=True,
32
  )
33
+
34
  model = AutoModelForCausalLM.from_pretrained(
35
  MODEL_DIR,
36
  device_map="auto",
 
51
  f"Input: {input_text}\n"
52
  f"Output:"
53
  )
 
54
  encoding = tokenizer(prompt, return_tensors="pt")
 
55
  inputs = {k: v.to(model.device) for k, v in encoding.items()}
56
 
57
  outputs = model.generate(
 
63
  )
64
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
65
 
66
+ thinking = ""
67
+ answer = response
68
+ if all(tag in response for tag in ("THINKING:", "ANSWER:", "END")):
69
  block = response.split("THINKING:")[1].split("END")[0]
70
  thinking = block.split("ANSWER:")[0].strip()
71
  answer = block.split("ANSWER:")[1].strip()
72
 
73
+ return {"thinking": thinking, "output": answer}
 
 
 
 
 
74
 
75
 
76
  # === Agents ===