|
""" |
|
TinyGPT2 Model Wrapper for easy integration (CPU-friendly) |
|
""" |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
import os |
|
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") |
|
class TinyGPT2Model: |
|
""" |
|
Wrapper for sshleifer/tiny-gpt2 model with caching and optimization |
|
Suitable for CPU-only Hugging Face Spaces |
|
""" |
|
_instance = None |
|
_model = None |
|
_tokenizer = None |
|
|
|
def __new__(cls): |
|
if cls._instance is None: |
|
cls._instance = super().__new__(cls) |
|
return cls._instance |
|
|
|
def __init__(self): |
|
if TinyGPT2Model._model is None: |
|
self._initialize_model() |
|
|
|
def _initialize_model(self): |
|
"""Initialize Tiny-GPT2 model""" |
|
print("Loading TinyGPT2 model...") |
|
|
|
model_id = "sshleifer/tiny-gpt2" |
|
|
|
|
|
TinyGPT2Model._tokenizer = AutoTokenizer.from_pretrained(model_id,token=HUGGINGFACE_TOKEN) |
|
|
|
|
|
TinyGPT2Model._model = AutoModelForCausalLM.from_pretrained( |
|
model_id,token=HUGGINGFACE_TOKEN, |
|
torch_dtype=torch.float32 |
|
) |
|
|
|
print("TinyGPT2 model loaded successfully!") |
|
|
|
def generate( |
|
self, |
|
prompt: str, |
|
max_length: int = 64, |
|
temperature: float = 0.7, |
|
top_p: float = 0.95 |
|
) -> str: |
|
"""Generate response from TinyGPT2""" |
|
|
|
|
|
formatted_prompt = prompt |
|
|
|
|
|
inputs = TinyGPT2Model._tokenizer( |
|
formatted_prompt, |
|
return_tensors="pt", |
|
truncation=True, |
|
max_length=256 |
|
) |
|
|
|
|
|
inputs = {k: v.cpu() for k, v in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = TinyGPT2Model._model.generate( |
|
**inputs, |
|
max_new_tokens=max_length, |
|
temperature=temperature, |
|
top_p=top_p, |
|
do_sample=True, |
|
pad_token_id=TinyGPT2Model._tokenizer.eos_token_id |
|
) |
|
|
|
|
|
response = TinyGPT2Model._tokenizer.decode( |
|
outputs[0][inputs['input_ids'].shape[1]:], |
|
skip_special_tokens=True |
|
) |
|
|
|
return response.strip() |
|
|
|
def generate_embedding(self, text: str) -> torch.Tensor: |
|
"""Generate embeddings for text using last hidden state""" |
|
inputs = TinyGPT2Model._tokenizer( |
|
text, |
|
return_tensors="pt", |
|
truncation=True, |
|
max_length=256 |
|
) |
|
inputs = {k: v.cpu() for k, v in inputs.items()} |
|
|
|
with torch.no_grad(): |
|
outputs = TinyGPT2Model._model(**inputs, output_hidden_states=True) |
|
embeddings = outputs.hidden_states[-1].mean(dim=1) |
|
|
|
return embeddings |
|
|