ai-chatbot / utils /hf_api.py
jpatel20's picture
Upload 1353 files
e227a15 verified
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
tokenizer = AutoTokenizer.from_pretrained("bitext/Mistral-7B-Customer-Support")
model = AutoModelForCausalLM.from_pretrained("bitext/Mistral-7B-Customer-Support").to('cuda')
def query_huggingface(prompt):
try:
messages = [
{"role": "user", "content": prompt},
]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to('cuda')
outputs = model.generate(**inputs, max_new_tokens=100)
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
return response.strip()
except Exception as e:
logging.error(f"Local Mistral-7B-Customer-Support inference failed: {e}")
return "Sorry, I'm having trouble responding right now."