Spaces:
Runtime error
Runtime error
File size: 1,320 Bytes
768d1ad b985953 2e65f14 768d1ad 9e97937 2e65f14 768d1ad 2e65f14 768d1ad 2e65f14 768d1ad 2e65f14 768d1ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load CPU-optimized model
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilgpt2")
def generate_answer(context, question, max_new_tokens=100):
"""Generate answer with CPU optimizations"""
# Create concise prompt
prompt = f"""Based on the context, answer the question conversationally.
Context:
{context[:1000]}
Question: {question}
Answer:"""
# Tokenize with truncation
inputs = tokenizer(
prompt,
return_tensors="pt",
max_length=512,
truncation=True
)
# Generate with CPU-optimized settings
outputs = model.generate(
inputs.input_ids,
max_new_tokens=max_new_tokens,
num_beams=1, # Faster than beam search
do_sample=True, # More natural responses
temperature=0.7, # Balance creativity/focus
top_k=40, # Focus on likely tokens
top_p=0.9, # Nucleus sampling
pad_token_id=tokenizer.eos_token_id,
early_stopping=True
)
# Extract only the new text
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return full_text.split("Answer:")[-1].strip() |