Spaces:

jayashree
/

TatTwamAI

Sleeping

TatTwamAI / models /tinygpt2_model.py

Jayashree Sridhar

Added TinyGPT2Model file

e93f267 2 months ago

3.04 kB

	"""
	TinyGPT2 Model Wrapper for easy integration (CPU-friendly)
	"""
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import os
	HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
	class TinyGPT2Model:
	"""
	Wrapper for sshleifer/tiny-gpt2 model with caching and optimization
	Suitable for CPU-only Hugging Face Spaces
	"""
	_instance = None
	_model = None
	_tokenizer = None

	def __new__(cls):
	if cls._instance is None:
	cls._instance = super().__new__(cls)
	return cls._instance

	def __init__(self):
	if TinyGPT2Model._model is None:
	self._initialize_model()

	def _initialize_model(self):
	"""Initialize Tiny-GPT2 model"""
	print("Loading TinyGPT2 model...")

	model_id = "sshleifer/tiny-gpt2"

	# Load tokenizer (no need for token argument, model is public)
	TinyGPT2Model._tokenizer = AutoTokenizer.from_pretrained(model_id,token=HUGGINGFACE_TOKEN)

	# Load model (no quantization, pure CPU)
	TinyGPT2Model._model = AutoModelForCausalLM.from_pretrained(
	model_id,token=HUGGINGFACE_TOKEN,
	torch_dtype=torch.float32 # Safe for CPU only
	)

	print("TinyGPT2 model loaded successfully!")

	def generate(
	self,
	prompt: str,
	max_length: int = 64,
	temperature: float = 0.7,
	top_p: float = 0.95
	) -> str:
	"""Generate response from TinyGPT2"""

	# For TinyGPT2, no special prompt formatting needed
	formatted_prompt = prompt

	# Tokenize
	inputs = TinyGPT2Model._tokenizer(
	formatted_prompt,
	return_tensors="pt",
	truncation=True,
	max_length=256
	)

	# Move to CPU (optional, for explicitness)
	inputs = {k: v.cpu() for k, v in inputs.items()}

	# Generate on CPU
	with torch.no_grad():
	outputs = TinyGPT2Model._model.generate(
	**inputs,
	max_new_tokens=max_length,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=TinyGPT2Model._tokenizer.eos_token_id
	)

	# Decode only the newly generated tokens (after the prompt)
	response = TinyGPT2Model._tokenizer.decode(
	outputs[0][inputs['input_ids'].shape[1]:],
	skip_special_tokens=True
	)

	return response.strip()

	def generate_embedding(self, text: str) -> torch.Tensor:
	"""Generate embeddings for text using last hidden state"""
	inputs = TinyGPT2Model._tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=256
	)
	inputs = {k: v.cpu() for k, v in inputs.items()}

	with torch.no_grad():
	outputs = TinyGPT2Model._model(**inputs, output_hidden_states=True)
	embeddings = outputs.hidden_states[-1].mean(dim=1)

	return embeddings