Spaces:
Sleeping
Sleeping
import torch | |
from peft import PeftModel | |
from transformers import AutoTokenizer, AutoModel | |
import gradio as gr | |
BASE_MODEL = "Qwen/Qwen3-Embedding-0.6B" | |
LORA_MODEL = "gauravprasadgp/qwen3-embedding_0.6B_lora" | |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) | |
base_model = AutoModel.from_pretrained(BASE_MODEL, trust_remote_code=True, torch_dtype=torch.float16) | |
model = PeftModel.from_pretrained(base_model, LORA_MODEL, torch_dtype=torch.float16) | |
model.eval() | |
def get_embedding(text): | |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
embedding = outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy() | |
return embedding.tolist() | |
iface = gr.Interface( | |
fn=get_embedding, | |
inputs=gr.Textbox(lines=2, placeholder="Enter text to embed..."), | |
outputs="json", | |
title="Qwen3 Embedding (LoRA Fine-Tuned)", | |
description="Enter a sentence to get the embedding vector using the fine-tuned Qwen3-0.6B model.", | |
allow_flagging="never" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |