TatTwamAI / models /tinygpt2_model.py
Jayashree Sridhar
Added TinyGPT2Model file
e93f267
"""
TinyGPT2 Model Wrapper for easy integration (CPU-friendly)
"""
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
class TinyGPT2Model:
"""
Wrapper for sshleifer/tiny-gpt2 model with caching and optimization
Suitable for CPU-only Hugging Face Spaces
"""
_instance = None
_model = None
_tokenizer = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if TinyGPT2Model._model is None:
self._initialize_model()
def _initialize_model(self):
"""Initialize Tiny-GPT2 model"""
print("Loading TinyGPT2 model...")
model_id = "sshleifer/tiny-gpt2"
# Load tokenizer (no need for token argument, model is public)
TinyGPT2Model._tokenizer = AutoTokenizer.from_pretrained(model_id,token=HUGGINGFACE_TOKEN)
# Load model (no quantization, pure CPU)
TinyGPT2Model._model = AutoModelForCausalLM.from_pretrained(
model_id,token=HUGGINGFACE_TOKEN,
torch_dtype=torch.float32 # Safe for CPU only
)
print("TinyGPT2 model loaded successfully!")
def generate(
self,
prompt: str,
max_length: int = 64,
temperature: float = 0.7,
top_p: float = 0.95
) -> str:
"""Generate response from TinyGPT2"""
# For TinyGPT2, no special prompt formatting needed
formatted_prompt = prompt
# Tokenize
inputs = TinyGPT2Model._tokenizer(
formatted_prompt,
return_tensors="pt",
truncation=True,
max_length=256
)
# Move to CPU (optional, for explicitness)
inputs = {k: v.cpu() for k, v in inputs.items()}
# Generate on CPU
with torch.no_grad():
outputs = TinyGPT2Model._model.generate(
**inputs,
max_new_tokens=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=TinyGPT2Model._tokenizer.eos_token_id
)
# Decode only the newly generated tokens (after the prompt)
response = TinyGPT2Model._tokenizer.decode(
outputs[0][inputs['input_ids'].shape[1]:],
skip_special_tokens=True
)
return response.strip()
def generate_embedding(self, text: str) -> torch.Tensor:
"""Generate embeddings for text using last hidden state"""
inputs = TinyGPT2Model._tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=256
)
inputs = {k: v.cpu() for k, v in inputs.items()}
with torch.no_grad():
outputs = TinyGPT2Model._model(**inputs, output_hidden_states=True)
embeddings = outputs.hidden_states[-1].mean(dim=1)
return embeddings