import logging import torch import random from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed class TextGenerator: def __init__( self, model_name="gpt2", device="cuda", max_new_tokens=50, temperature=1.0, top_p=0.95, seed=None ): self.model_name = model_name self.device = device self.max_new_tokens = max_new_tokens self.temperature = temperature self.top_p = top_p self.seed = seed logging.info(f"[TextGenerator] Загрузка модели {model_name} на {device} ...") self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForCausalLM.from_pretrained(model_name).to(device) if seed is not None: set_seed(seed) logging.info(f"[TextGenerator] Сид генерации установлен через transformers.set_seed({seed})") else: logging.info("[TextGenerator] Сид генерации не установлен (seed=None)") # --- Примеры для few-shot обучения --- self.fewshot_examples = [ ("happy", "We finally made it!", "We finally made it! I’ve never felt so alive and proud of what we accomplished."), ("sad", "He didn't come back.", "He didn't come back. I waited all night, hoping to see him again."), ("anger", "Why would you do that?", "Why would you do that? You had no right to interfere!"), ("fear", "Did you hear that?", "Did you hear that? Something’s moving outside the window..."), ("surprise", "Oh wow, really?", "Oh wow, really? I didn’t see that coming at all!"), ("disgust", "That smell is awful.", "That smell is awful. I feel like I’m going to be sick."), ("neutral", "Let's meet at noon.", "Let's meet at noon. We’ll have plenty of time to talk then.") ] def build_prompt(self, emotion: str, partial_text: str) -> str: few_shot = random.sample(self.fewshot_examples, 2) examples_str = "" for emo, text, cont in few_shot: examples_str += ( f"Example:\n" f"Emotion: {emo}\n" f"Text: {text}\n" f"Continuation: {cont}\n\n" ) prompt = ( "You are a helpful assistant that generates emotionally-aligned sentence continuations.\n" "You must include the original sentence in the output, and then continue it in a fluent and emotionally appropriate way.\n\n" f"{examples_str}" f"Now try:\n" f"Emotion: {emotion}\n" f"Text: {partial_text}\n" f"Continuation:" ) return prompt def generate_text(self, emotion: str, partial_text: str = "") -> str: prompt = self.build_prompt(emotion, partial_text) logging.debug(f"[TextGenerator] prompt:\n{prompt}") inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) output_ids = self.model.generate( **inputs, max_new_tokens=self.max_new_tokens, do_sample=True, top_p=self.top_p, temperature=self.temperature, pad_token_id=self.tokenizer.eos_token_id ) full_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) logging.debug(f"[TextGenerator] decoded:\n{full_text}") # Вытаскиваем то, что идёт после последнего "Continuation:" if "Continuation:" in full_text: result = full_text.split("Continuation:")[-1].strip() else: result = full_text.strip() result = result.split("\n")[0].strip() return result