Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Load CPU-optimized model | |
tokenizer = AutoTokenizer.from_pretrained("distilgpt2") | |
model = AutoModelForCausalLM.from_pretrained("distilgpt2") | |
def generate_answer(context, question, max_new_tokens=100): | |
"""Generate answer with CPU optimizations""" | |
# Create concise prompt | |
prompt = f"""Based on the context, answer the question conversationally. | |
Context: | |
{context[:1000]} | |
Question: {question} | |
Answer:""" | |
# Tokenize with truncation | |
inputs = tokenizer( | |
prompt, | |
return_tensors="pt", | |
max_length=512, | |
truncation=True | |
) | |
# Generate with CPU-optimized settings | |
outputs = model.generate( | |
inputs.input_ids, | |
max_new_tokens=max_new_tokens, | |
num_beams=1, # Faster than beam search | |
do_sample=True, # More natural responses | |
temperature=0.7, # Balance creativity/focus | |
top_k=40, # Focus on likely tokens | |
top_p=0.9, # Nucleus sampling | |
pad_token_id=tokenizer.eos_token_id, | |
early_stopping=True | |
) | |
# Extract only the new text | |
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return full_text.split("Answer:")[-1].strip() |