Spaces:
Build error
Build error
import numpy as np | |
import gradio as gr | |
import onnxruntime as rt | |
import onnxruntime_genai as og | |
so = rt.SessionOptions() | |
so.intra_op_num_threads = 2 | |
so.inter_op_num_threads = 1 | |
so.add_session_config_entry("session.intra_op.allow_spinning", "0") | |
model = og.Model("/phi4_model/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4") | |
tokenizer = og.Tokenizer(model) | |
def respond(message, history): | |
prompt = "".join(f"<|user|>{u}<|end|><|assistant|>{a}<|end|>" for u, a in history) | |
prompt += f"<|user|>{message}<|end|><|assistant|>" | |
input_ids = tokenizer.encode(prompt) | |
params = og.GeneratorParams(model) | |
params.set_input_sequences(input_ids[np.newaxis, :]) | |
params.set_search_option("max_length", input_ids.shape[0] + 256) | |
params.set_search_option("do_sample", False) | |
output_ids = model.generate(params) | |
gen = output_ids[0, input_ids.shape[0]:] | |
return tokenizer.decode(gen.astype(np.int32)) | |
gr.ChatInterface(respond, title="🧠 Phi‑4 ONNX Chat (2‑Core)")\ | |
.launch(server_name="0.0.0.0", server_port=7860) | |