|
import gradio as gr |
|
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM |
|
import gc |
|
import torch |
|
|
|
def clear_memory(): |
|
gc.collect() |
|
torch.cuda.empty_cache() |
|
|
|
model_name = "GIGAParviz/Firooze_test" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
model = model.to("cpu") |
|
|
|
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128) |
|
|
|
def generate_response(prompt): |
|
clear_memory() |
|
instruction = f"### Instruction:\n{prompt}\n\n### Response:\n" |
|
result = pipe(instruction) |
|
|
|
return result[0]['generated_text'][len(instruction):] |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("<h1 style='text-align: center;'>🔮 Persian LLM made by A.M.Parviz</h1>") |
|
|
|
prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Type your prompt here...", lines=2) |
|
|
|
generate_button = gr.Button("Generate Response") |
|
|
|
response_output = gr.Textbox(label="Generated Response", lines=5) |
|
|
|
generate_button.click(fn=generate_response, inputs=prompt_input, outputs=response_output) |
|
|
|
clear_button = gr.ClearButton([prompt_input, response_output]) |
|
|
|
demo.launch() |
|
|