Spaces:

onisj
/

jarvis_gaia_agent

Starting

onisj commited on May 29

Commit

46c7672

verified ·

1 Parent(s): 9cd535d

Create app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -86,7 +86,7 @@ def initialize_llm():
     try:
         tokenizer = AutoTokenizer.from_pretrained(HF_MODEL, token=HF_API_TOKEN)
-        model = AutoModelForCausalLM.from_pretrained(HF_MODEL, token=HF_API_TOKEN, device_map="mps")
         logger.info(f"Initialized local Hugging Face model: {HF_MODEL}")
         return (model, tokenizer), "hf_local"
     except Exception as e:
@@ -155,7 +155,7 @@ async def parse_question(state: JARVISState) -> JARVISState:
                     inputs = tokenizer.apply_chat_template(
                         [{"role": "system", "content": prompt[0].content}, {"role": "user", "content": prompt[1].content}],
                         return_tensors="pt"
-                    ).to("mps")
                     outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
                     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                     tools_needed = json.loads(response.strip())
@@ -322,7 +322,7 @@ Document results: {document_results}""")
             try:
                 if llm_type == "hf_local":
                     model, tokenizer = llm_client
-                    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("mps")
                     outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
                     answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
                 else:

     try:
         tokenizer = AutoTokenizer.from_pretrained(HF_MODEL, token=HF_API_TOKEN)
+        model = AutoModelForCausalLM.from_pretrained(HF_MODEL, token=HF_API_TOKEN, device_map="auto")
         logger.info(f"Initialized local Hugging Face model: {HF_MODEL}")
         return (model, tokenizer), "hf_local"
     except Exception as e:
                     inputs = tokenizer.apply_chat_template(
                         [{"role": "system", "content": prompt[0].content}, {"role": "user", "content": prompt[1].content}],
                         return_tensors="pt"
+                    ).to(model.device)
                     outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
                     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                     tools_needed = json.loads(response.strip())
             try:
                 if llm_type == "hf_local":
                     model, tokenizer = llm_client
+                    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
                     outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
                     answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
                 else: