KingNish commited on
Commit
ce2ebf6
·
verified ·
1 Parent(s): 4b48140

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -38
app.py CHANGED
@@ -6,44 +6,37 @@ from threading import Thread
6
  import spaces
7
 
8
  # --- 1. Model and Processor Setup ---
9
- # This part is loaded only once when the script starts.
10
-
11
- try:
12
- model_id = "bharatgenai/patram-7b-instruct"
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
- print(f"Using device: {device}")
15
-
16
- # Load processor and model
17
- processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_id,
20
- torch_dtype=torch.float16, # Use float16 for less memory usage on GPU
21
- device_map="auto", # Automatically uses available GPUs
22
- trust_remote_code=True
23
- )
24
- print("Model and processor loaded successfully.")
25
-
26
- # --- Define and apply the chat template ---
27
- # This is crucial for multi-turn conversation
28
- chat_template = """{% for message in messages -%}
29
- {%- if (loop.index % 2 == 1 and message['role'] != 'user') or
30
- (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}
31
- {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
32
- {%- endif -%}
33
- {{ message['role'].capitalize() + ': ' + message['content'] }}
34
- {%- if not loop.last -%}
35
- {{ ' ' }}
36
- {%- endif %}
37
- {%- endfor -%}
38
- {%- if add_generation_prompt -%}
39
- {{ ' Assistant:' }}
40
- {%- endif %}"""
41
- processor.tokenizer.chat_template = chat_template
42
-
43
- except Exception as e:
44
- print(f"Error during model loading: {e}")
45
- # Exit if model can't be loaded, as the app is unusable.
46
- exit()
47
 
48
  # --- 2. Gradio Chatbot Logic with Streaming ---
49
  @spaces.GPU
 
6
  import spaces
7
 
8
  # --- 1. Model and Processor Setup ---
9
+
10
+ model_id = "bharatgenai/patram-7b-instruct"
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ print(f"Using device: {device}")
13
+
14
+ # Load processor and model
15
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_id,
18
+ torch_dtype=torch.float16, # Use float16 for less memory usage on GPU
19
+ device_map="auto", # Automatically uses available GPUs
20
+ trust_remote_code=True
21
+ )
22
+ print("Model and processor loaded successfully.")
23
+
24
+ # --- Define and apply the chat template ---
25
+ # This is crucial for multi-turn conversation
26
+ chat_template = """{% for message in messages -%}
27
+ {%- if (loop.index % 2 == 1 and message['role'] != 'user') or
28
+ (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}
29
+ {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
30
+ {%- endif -%}
31
+ {{ message['role'].capitalize() + ': ' + message['content'] }}
32
+ {%- if not loop.last -%}
33
+ {{ ' ' }}
34
+ {%- endif %}
35
+ {%- endfor -%}
36
+ {%- if add_generation_prompt -%}
37
+ {{ ' Assistant:' }}
38
+ {%- endif %}"""
39
+ processor.tokenizer.chat_template = chat_template
 
 
 
 
 
 
 
40
 
41
  # --- 2. Gradio Chatbot Logic with Streaming ---
42
  @spaces.GPU