Spaces:

KingNish
/

Patram-7b-Demo

Paused

App Files Files Community

KingNish commited on 5 days ago

Commit

ce2ebf6

verified ·

1 Parent(s): 4b48140

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -38

app.py CHANGED Viewed

@@ -6,44 +6,37 @@ from threading import Thread
 import spaces
 # --- 1. Model and Processor Setup ---
-# This part is loaded only once when the script starts.
-try:
-    model_id = "bharatgenai/patram-7b-instruct"
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Using device: {device}")
-    # Load processor and model
-    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch.float16, # Use float16 for less memory usage on GPU
-        device_map="auto",        # Automatically uses available GPUs
-        trust_remote_code=True
-    )
-    print("Model and processor loaded successfully.")
-    # --- Define and apply the chat template ---
-    # This is crucial for multi-turn conversation
-    chat_template = """{% for message in messages -%}
-            {%- if (loop.index % 2 == 1 and message['role'] != 'user') or
-              (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}
-            {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
-            {%- endif -%}
-            {{ message['role'].capitalize() + ': ' + message['content'] }}
-            {%- if not loop.last -%}
-            {{ ' ' }}
-            {%- endif %}
-            {%- endfor -%}
-            {%- if add_generation_prompt -%}
-            {{ ' Assistant:' }}
-            {%- endif %}"""
-    processor.tokenizer.chat_template = chat_template
-except Exception as e:
-    print(f"Error during model loading: {e}")
-    # Exit if model can't be loaded, as the app is unusable.
-    exit()
 # --- 2. Gradio Chatbot Logic with Streaming ---
 @spaces.GPU

 import spaces
 # --- 1. Model and Processor Setup ---
+model_id = "bharatgenai/patram-7b-instruct"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load processor and model
+processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16, # Use float16 for less memory usage on GPU
+    device_map="auto",        # Automatically uses available GPUs
+    trust_remote_code=True
+)
+print("Model and processor loaded successfully.")
+# --- Define and apply the chat template ---
+# This is crucial for multi-turn conversation
+chat_template = """{% for message in messages -%}
+        {%- if (loop.index % 2 == 1 and message['role'] != 'user') or
+          (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}
+        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
+        {%- endif -%}
+        {{ message['role'].capitalize() + ': ' + message['content'] }}
+        {%- if not loop.last -%}
+        {{ ' ' }}
+        {%- endif %}
+        {%- endfor -%}
+        {%- if add_generation_prompt -%}
+        {{ ' Assistant:' }}
+        {%- endif %}"""
+processor.tokenizer.chat_template = chat_template
 # --- 2. Gradio Chatbot Logic with Streaming ---
 @spaces.GPU