aidapal-space / app.py
ejschwartz's picture
Try to use pipeline
2144fdb
raw
history blame
692 Bytes
import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
model_id = "AverageBusinessUser/aidapal"
filename = "aidapal-8k.Q4_K_M.gguf"
print("Downloading model")
pipe = pipeline(task="text-generation", model_id, gguf_file=filename, device_map="auto")
#torch_dtype = torch.float32 # could be torch.float16 or torch.bfloat16 too
#tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
#model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)
@spaces.GPU
def greet(name):
return pipe(name)
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
demo.launch()