Scaper_search / llm.py
gaur3009's picture
Update llm.py
768d1ad verified
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load CPU-optimized model
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilgpt2")
def generate_answer(context, question, max_new_tokens=100):
"""Generate answer with CPU optimizations"""
# Create concise prompt
prompt = f"""Based on the context, answer the question conversationally.
Context:
{context[:1000]}
Question: {question}
Answer:"""
# Tokenize with truncation
inputs = tokenizer(
prompt,
return_tensors="pt",
max_length=512,
truncation=True
)
# Generate with CPU-optimized settings
outputs = model.generate(
inputs.input_ids,
max_new_tokens=max_new_tokens,
num_beams=1, # Faster than beam search
do_sample=True, # More natural responses
temperature=0.7, # Balance creativity/focus
top_k=40, # Focus on likely tokens
top_p=0.9, # Nucleus sampling
pad_token_id=tokenizer.eos_token_id,
early_stopping=True
)
# Extract only the new text
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return full_text.split("Answer:")[-1].strip()