Coool2 commited on
Commit
05072aa
·
verified ·
1 Parent(s): 3ff7703

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +4 -7
agent.py CHANGED
@@ -78,20 +78,17 @@ proj_llm = HuggingFaceLLM(
78
  model_name=model_id,
79
  tokenizer_name=model_id,
80
  device_map="auto",
81
- model_kwargs={
82
- "torch_dtype": "auto",
83
- "max_memory": get_max_memory_config("10GB")
84
- },
85
  generate_kwargs={"temperature": 0.1, "top_p": 0.3} # More focused
86
  )
87
 
88
  code_llm = HuggingFaceLLM(
89
  model_name="Qwen/Qwen2.5-Coder-3B",
90
  tokenizer_name="Qwen/Qwen2.5-Coder-3B",
91
- device_map="auto",
92
  model_kwargs={
93
- "torch_dtype": "auto",
94
- "max_memory": get_max_memory_config("3GB")
95
  },
96
  # Set generation parameters for precise, non-creative code output
97
  generate_kwargs={"temperature": 0.0, "do_sample": False}
 
78
  model_name=model_id,
79
  tokenizer_name=model_id,
80
  device_map="auto",
81
+ model_kwargs={"torch_dtype": torch.float16},
 
 
 
82
  generate_kwargs={"temperature": 0.1, "top_p": 0.3} # More focused
83
  )
84
 
85
  code_llm = HuggingFaceLLM(
86
  model_name="Qwen/Qwen2.5-Coder-3B",
87
  tokenizer_name="Qwen/Qwen2.5-Coder-3B",
88
+ device ="cpu",
89
  model_kwargs={
90
+ "torch_dtype": torch.float32, # Use float32 for CPU
91
+ "low_cpu_mem_usage": True, # Still get memory optimization
92
  },
93
  # Set generation parameters for precise, non-creative code output
94
  generate_kwargs={"temperature": 0.0, "do_sample": False}