Rausda6 commited on
Commit
e7f1392
Β·
verified Β·
1 Parent(s): 9c80064

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -21
app.py CHANGED
@@ -60,43 +60,34 @@ def test_llm_generation():
60
 
61
 
62
  def initialize_model():
63
- """Separate model initialization with better error handling"""
64
  global model, tokenizer, generation_config
65
-
66
  try:
67
  add_log("πŸ”„ Initializing model...")
68
-
69
  tokenizer = AutoTokenizer.from_pretrained(
70
  MODEL_ID,
71
  trust_remote_code=True,
72
- use_fast=False # Sometimes fast tokenizers cause issues
73
  )
74
-
75
- # Ensure proper padding token
76
  if tokenizer.pad_token is None:
77
  tokenizer.pad_token = tokenizer.eos_token
78
  add_log("βœ… Set pad_token to eos_token")
79
-
80
- # Load model with proper device management
81
- device = "cuda" if torch.cuda.is_available() else "cpu"
82
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
83
-
84
  model = AutoModelForCausalLM.from_pretrained(
85
  MODEL_ID,
86
- torch_dtype=torch_dtype,
87
- device_map="auto" if torch.cuda.is_available() else None,
88
  trust_remote_code=True,
 
89
  low_cpu_mem_usage=True
90
  )
91
-
92
- if not torch.cuda.is_available():
93
- model = model.to(device)
94
-
95
  model.eval()
96
-
97
- # Configure generation parameters
98
  generation_config = GenerationConfig(
99
- max_new_tokens=4095, # Reduced for stability
100
  temperature=0.7,
101
  top_p=0.9,
102
  do_sample=True,
@@ -105,9 +96,16 @@ def initialize_model():
105
  repetition_penalty=1.1,
106
  length_penalty=1.0
107
  )
108
-
109
  add_log(f"βœ… Model loaded successfully on device: {model.device}")
110
  return True
 
 
 
 
 
 
 
111
 
112
  except Exception as e:
113
  error_msg = f"❌ Model initialization failed: {str(e)}"
 
60
 
61
 
62
  def initialize_model():
 
63
  global model, tokenizer, generation_config
64
+
65
  try:
66
  add_log("πŸ”„ Initializing model...")
67
+
68
  tokenizer = AutoTokenizer.from_pretrained(
69
  MODEL_ID,
70
  trust_remote_code=True,
71
+ use_fast=False
72
  )
73
+
 
74
  if tokenizer.pad_token is None:
75
  tokenizer.pad_token = tokenizer.eos_token
76
  add_log("βœ… Set pad_token to eos_token")
77
+
78
+ # Force GPU settings
 
 
 
79
  model = AutoModelForCausalLM.from_pretrained(
80
  MODEL_ID,
81
+ torch_dtype=torch.float16,
 
82
  trust_remote_code=True,
83
+ device_map={"": 0}, # <- force GPU:0
84
  low_cpu_mem_usage=True
85
  )
86
+
 
 
 
87
  model.eval()
88
+
 
89
  generation_config = GenerationConfig(
90
+ max_new_tokens=4096,
91
  temperature=0.7,
92
  top_p=0.9,
93
  do_sample=True,
 
96
  repetition_penalty=1.1,
97
  length_penalty=1.0
98
  )
99
+
100
  add_log(f"βœ… Model loaded successfully on device: {model.device}")
101
  return True
102
+
103
+ except Exception as e:
104
+ error_msg = f"❌ Model initialization failed: {str(e)}"
105
+ add_log(error_msg)
106
+ add_log(f"Traceback: {traceback.format_exc()}")
107
+ return False
108
+
109
 
110
  except Exception as e:
111
  error_msg = f"❌ Model initialization failed: {str(e)}"