File size: 11,736 Bytes
6b29344
 
 
 
 
 
65aa5a5
 
 
 
fe61d2d
 
65aa5a5
d0726f5
65aa5a5
6b29344
 
 
6555fdc
 
dfc02f9
 
 
 
 
6b29344
d0726f5
 
 
6555fdc
 
d0726f5
6b29344
fe61d2d
6b29344
 
d0726f5
6b29344
d0726f5
 
 
5d7bd20
d0726f5
 
 
 
 
 
fe61d2d
6b29344
 
d0726f5
6b29344
 
 
 
dfc02f9
 
 
 
 
 
6b29344
 
 
 
2ebe745
678ce59
 
6a2015d
6b29344
 
 
0576ce3
 
 
6b29344
d1e0ac0
 
dff880e
d1e0ac0
6b29344
ceaba60
6b29344
 
d1e0ac0
6b29344
d1e0ac0
6b29344
ceaba60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d25cc99
ceaba60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330b156
ceaba60
330b156
6b29344
ceaba60
fe61d2d
6b29344
 
 
 
d1e0ac0
 
 
6b29344
 
d1e0ac0
 
 
01e6a62
 
d1e0ac0
 
 
6b29344
 
d1e0ac0
 
 
6b29344
 
d25cc99
 
 
 
 
 
 
 
 
 
 
 
 
6b29344
 
 
 
 
 
 
 
474ac56
 
 
 
 
 
 
6b29344
474ac56
 
 
 
6b29344
474ac56
 
 
6b29344
474ac56
 
 
6b29344
474ac56
 
 
 
 
6b29344
2401693
 
 
 
 
474ac56
2401693
6b29344
 
 
 
474ac56
6b29344
 
 
474ac56
6b29344
 
 
474ac56
 
6b29344
 
474ac56
6b29344
 
474ac56
 
6b29344
474ac56
 
6b29344
 
19f6384
c20d411
 
474ac56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b29344
474ac56
 
 
 
19f6384
474ac56
 
 
 
 
 
 
 
 
 
 
 
c20d411
 
474ac56
 
 
 
03c8c47
c20d411
 
474ac56
19f6384
c20d411
474ac56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b29344
 
 
474ac56
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
import os
import torch
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# β€”β€”β€” CONFIG β€”β€”β€”
REPO_ID = "CodCodingCode/llama-3.1-8b-clinical"
SUBFOLDER = "checkpoint-45000"
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
if not HF_TOKEN:
    raise RuntimeError("Missing HUGGINGFACE_HUB_TOKEN in env")

# β€”β€”β€” 1) Download the full repo β€”β€”β€”
local_cache = snapshot_download(
    repo_id=REPO_ID,
    token=HF_TOKEN,
)
print("[DEBUG] snapshot_download β†’ local_cache:", local_cache)
import pathlib

print(
    "[DEBUG] MODEL root contents:",
    list(pathlib.Path(local_cache).glob(f"{SUBFOLDER}/*")),
)

# β€”β€”β€” 2) Repo root contains tokenizer.json; model shards live in the checkpoint subfolder β€”β€”β€”
MODEL_DIR = local_cache
MODEL_SUBFOLDER = SUBFOLDER
print("[DEBUG] MODEL_DIR:", MODEL_DIR)
print("[DEBUG] MODEL_DIR files:", os.listdir(MODEL_DIR))
print("[DEBUG] Checkpoint files:", os.listdir(os.path.join(MODEL_DIR, MODEL_SUBFOLDER)))

# β€”β€”β€” 3) Load tokenizer & model from disk β€”β€”β€”
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_DIR,
    use_fast=True,
)
print("[DEBUG] Loaded fast tokenizer object:", tokenizer, "type:", type(tokenizer))
# Confirm tokenizer files are present
import os

print("[DEBUG] Files in MODEL_DIR for tokenizer:", os.listdir(MODEL_DIR))
# Inspect tokenizer's initialization arguments
try:
    print("[DEBUG] Tokenizer init_kwargs:", tokenizer.init_kwargs)
except AttributeError:
    print("[DEBUG] No init_kwargs attribute on tokenizer.")

model = AutoModelForCausalLM.from_pretrained(
    MODEL_DIR,
    subfolder=MODEL_SUBFOLDER,
    device_map="auto",
    torch_dtype=torch.float16,
)
model.eval()
print(
    "[DEBUG] Loaded model object:",
    model.__class__.__name__,
    "device:",
    next(model.parameters()).device,
)


# === Role Agent with instruction/input/output format ===
class RoleAgent:
    def __init__(self, role_instruction, tokenizer, model):
        self.tokenizer = tokenizer
        self.model = model
        self.role_instruction = role_instruction

    def act(self, input_text):
        prompt = (
            f"instruction: {self.role_instruction}\n"
            f"input: {input_text}\n"
            f"output:"
        )
        encoding = self.tokenizer(prompt, return_tensors="pt")
        inputs = {k: v.to(self.model.device) for k, v in encoding.items()}

        outputs = self.model.generate(
            **inputs,
            max_new_tokens=128,
            do_sample=True,
            temperature=0.7,
            pad_token_id=self.tokenizer.eos_token_id,
        )
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the new generated content after the prompt
        prompt_length = len(prompt)
        if len(response) > prompt_length:
            generated_text = response[prompt_length:].strip()
        else:
            generated_text = response.strip()

        # Clean up the response - remove any repeated instruction/input/output patterns
        lines = generated_text.split("\n")
        clean_lines = []

        for line in lines:
            line = line.strip()
            # Skip lines that look like instruction formatting
            if (
                line.startswith("instruction:")
                or line.startswith("input:")
                or line.startswith("output:")
                or line == ""
            ):
                continue
            clean_lines.append(line)

        # Join the clean lines and take the first substantial response
        if clean_lines:
            answer = clean_lines[0]
            # If there are multiple clean lines, take the first one that's substantial
            for line in clean_lines:
                if len(line) > 20:  # Arbitrary threshold for substantial content
                    answer = line
                    break
        else:
            # Fallback: try to extract after "output:" if present
            if "output:" in generated_text.lower():
                parts = generated_text.lower().split("output:")
                if len(parts) > 1:
                    answer = parts[-1].strip()
                else:
                    answer = generated_text
            else:
                answer = generated_text

        # Additional cleanup - remove any remaining instruction artifacts
        answer = (
            answer.replace("instruction:", "")
            .replace("input:", "")
            .replace("output:", "")
            .strip()
        )

        # If answer is still messy, try to extract the actual medical content
        if "patient" in answer.lower() and len(answer) > 100:
            # Look for sentences that contain medical information
            sentences = answer.split(".")
            medical_sentences = []
            for sentence in sentences:
                sentence = sentence.strip()
                if len(sentence) > 10 and any(
                    word in sentence.lower()
                    for word in [
                        "patient",
                        "pain",
                        "symptom",
                        "diagnosis",
                        "treatment",
                        "knee",
                        "reports",
                        "experiencing",
                    ]
                ):
                    medical_sentences.append(sentence)

            if medical_sentences:
                answer = ". ".join(
                    medical_sentences[:2]
                )  # Take first 2 medical sentences
                if not answer.endswith("."):
                    answer += "."

        print(
            f"[CLEANED RESPONSE] Original length: {len(response)}, Cleaned: '{answer}'"
        )

        thinking = ""  # For now, we'll focus on getting clean answers
        return {"thinking": thinking, "output": answer}


# === Agents ===
summarizer = RoleAgent(
    role_instruction="You are a clinical summarizer trained to extract structured vignettes from doctor–patient dialogues.",
    tokenizer=tokenizer,
    model=model,
)
diagnoser = RoleAgent(
    role_instruction="You are a board-certified diagnostician that diagnoses patients.",
    tokenizer=tokenizer,
    model=model,
)
questioner = RoleAgent(
    role_instruction="You are a physician asking questions to diagnose a patient.",
    tokenizer=tokenizer,
    model=model,
)
treatment_agent = RoleAgent(
    role_instruction="You are a board-certified clinician. Based on the diagnosis and patient vignette provided below, suggest a concise treatment plan that could realistically be initiated by a primary care physician or psychiatrist.",
    tokenizer=tokenizer,
    model=model,
)

"""[DEBUG] prompt: Instruction: You are a clinical summarizer trained to extract structured vignettes from doctor–patient dialogues.
Input: Doctor: What brings you in today?
Patient: I am a male. I am 15. My knee hurts. What may be the issue with my knee?

Previous Vignette: 
Output:
Instruction: You are a clinical summarizer trained to extract structured vignettes from doctor–patient dialogues.
Input: Doctor: What brings you in today?
Patient: I am a male. I am 15. My knee hurts. What may be the issue with my knee?

Previous Vignette: 
Output: The patient is a 15-year-old male presenting with knee pain."""


# === Inference State ===
conversation_history = []
summary = ""
diagnosis = ""


# === Gradio Inference ===
def simulate_interaction(user_input, conversation_history=None):
    """Single turn interaction - no iterations, uses accumulated history"""
    if conversation_history is None:
        history = [f"Doctor: What brings you in today?", f"Patient: {user_input}"]
    else:
        history = conversation_history.copy()
        history.append(f"Patient: {user_input}")

    # Summarize the full conversation history
    sum_in = "\n".join(history)
    sum_out = summarizer.act(sum_in)
    summary = sum_out["output"]

    # Diagnose based on summary
    diag_out = diagnoser.act(summary)
    diagnosis = diag_out["output"]

    # Generate next question based on current understanding
    q_in = f"Vignette: {summary}\nCurrent Estimated Diagnosis: {diagnosis}"
    q_out = questioner.act(q_in)

    # Add doctor's response to history
    history.append(f"Doctor: {q_out['output']}")

    # Generate treatment plan (but don't end conversation)
    treatment_out = treatment_agent.act(f"Diagnosis: {diagnosis}\nVignette: {summary}")

    return {
        "summary": sum_out,
        "diagnosis": diag_out,
        "question": q_out,
        "treatment": treatment_out,
        "conversation": history,  # Return full history list
    }


# === Gradio UI ===
def ui_fn(user_input):
    """Non-stateful version for testing"""
    res = simulate_interaction(user_input)
    return f"""πŸ“‹ Vignette Summary:
πŸ’­ THINKING: {res['summary']['thinking']}
πŸ“ SUMMARY: {res['summary']['output']}

🩺 Diagnosis:
πŸ’­ THINKING: {res['diagnosis']['thinking']}
πŸ” DIAGNOSIS: {res['diagnosis']['output']}

❓ Follow-up Question:
πŸ’­ THINKING: {res['question']['thinking']}
πŸ‘¨β€βš•οΈ DOCTOR: {res['question']['output']}

πŸ’Š Treatment Plan:
πŸ’­ THINKING: {res['treatment']['thinking']}
πŸ“‹ TREATMENT: {res['treatment']['output']}

πŸ’¬ Full Conversation:
{chr(10).join(res['conversation'])}
"""


# === Stateful Gradio UI ===
def stateful_ui_fn(user_input, history):
    """Proper stateful conversation handler"""
    # Initialize history if first interaction
    if history is None:
        history = []

    # Run one turn of interaction with accumulated history
    res = simulate_interaction(user_input, history)

    # Get the updated conversation history
    updated_history = res["conversation"]

    # Format the display output
    display_output = f"""πŸ’¬ Conversation:
{chr(10).join(updated_history)}

πŸ“‹ Current Assessment:
πŸ” Diagnosis: {res['diagnosis']['output']}
πŸ’Š Treatment Plan: {res['treatment']['output']}
"""

    # Return display text and updated history for next turn
    return display_output, updated_history


def chat_interface(user_input, history):
    """Alternative chat-style interface"""
    if history is None:
        history = []

    # Run interaction
    res = simulate_interaction(user_input, history)
    updated_history = res["conversation"]

    # Return just the doctor's latest response and updated history
    doctor_response = res["question"]["output"]

    return doctor_response, updated_history


# Create two different interfaces
demo_stateful = gr.Interface(
    fn=stateful_ui_fn,
    inputs=[
        gr.Textbox(
            label="Patient Response",
            placeholder="Describe your symptoms or answer the doctor's question...",
        ),
        gr.State(),  # holds the conversation history
    ],
    outputs=[
        gr.Textbox(label="Medical Consultation", lines=15),
        gr.State(),  # returns the updated history
    ],
    title="🧠 AI Doctor - Full Medical Consultation",
    description="Have a conversation with an AI doctor. Each response builds on the previous conversation.",
)

demo_chat = gr.Interface(
    fn=chat_interface,
    inputs=[
        gr.Textbox(label="Your Response", placeholder="Tell me about your symptoms..."),
        gr.State(),
    ],
    outputs=[
        gr.Textbox(label="Doctor", lines=5),
        gr.State(),
    ],
    title="🩺 AI Doctor Chat",
    description="Simple chat interface with the AI doctor.",
)

if __name__ == "__main__":
    # Launch the stateful version by default
    demo_stateful.launch(share=True)
    # Uncomment the line below to use the chat version instead:
    # demo_chat.launch(share=True)