import sys import os import torch sys.path.append(os.path.dirname(os.path.dirname(__file__))) from all_models import models def query_(query, doc): input_text = f""" You are an AI assistant designed to extract relevant information from a document and generate a clear, concise answer. Question: {query} Provide a *single-paragraph response of 250 words* that summarizes key details, explains the answer logically, and avoids repetition. Ignore irrelevant details like page numbers, author names, and metadata. Context: "{doc}" Answer: """ # Move inputs to the same device as the model device = next(models.flan_model.parameters()).device inputs = models.flan_tokenizer(input_text, return_tensors="pt").to(device) input_length = inputs["input_ids"].shape[1] max_tokens = input_length + 180 with torch.no_grad(): outputs = models.flan_model.generate( **inputs, do_sample=True, max_length=max_tokens, min_length=100, early_stopping=True, temperature=0.7, top_k=50, top_p=0.9, repetition_penalty=1.2, num_beams=3 ) answer = models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True) # print(answer) # answer = extract_answer(answer) return answer