Spaces:

UcsTurkey
/

mistral7b

Paused

App Files Files Community

mistral7b / inference_test_turkcell_with_intents.py

ciyidogan

Update inference_test_turkcell_with_intents.py

0b8c8b5 verified 3 months ago

raw

history blame contribute delete

12.3 kB

	import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
	from fastapi import FastAPI
	from fastapi.responses import HTMLResponse, JSONResponse
	from pydantic import BaseModel
	from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, Trainer, TrainingArguments, default_data_collator, AutoConfig
	from peft import PeftModel
	from datasets import Dataset
	from datetime import datetime

	# === Ortam
	HF_TOKEN = os.getenv("HF_TOKEN")
	os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
	os.environ["TORCH_HOME"] = "/app/.torch_cache"
	os.makedirs("/app/.torch_cache", exist_ok=True)

	# === Ayarlar
	MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1"
	USE_FINE_TUNE = False
	FINE_TUNE_REPO = "UcsTurkey/trained-zips"
	FINE_TUNE_ZIP = "trained_model_000_009.zip"
	USE_SAMPLING = False
	INTENT_CONFIDENCE_THRESHOLD = 0.5
	LLM_CONFIDENCE_THRESHOLD = 0.2
	TRAIN_CONFIDENCE_THRESHOLD = 0.7
	FALLBACK_ANSWERS = [
	"Bu konuda maalesef bilgim yok.",
	"Ne demek istediğinizi tam anlayamadım.",
	"Bu soruya şu an yanıt veremiyorum."
	]

	INTENT_MODEL_PATH = "intent_model"
	INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
	INTENT_MODEL = None
	INTENT_TOKENIZER = None
	LABEL2ID = {}
	INTENT_DEFINITIONS = {}

	# === FastAPI
	app = FastAPI()
	chat_history = []
	model = None
	tokenizer = None
	eos_token_id = None

	class Message(BaseModel):
	user_input: str

	class TrainInput(BaseModel):
	intents: list

	@app.get("/")
	def health():
	return {"status": "ok"}

	@app.get("/start", response_class=HTMLResponse)
	def root():
	return """
	<html><body>
	<h2>Turkcell LLM Chat</h2>
	<textarea id='input' rows='4' cols='60'></textarea><br>
	<button onclick='send()'>Gönder</button><br><br>
	<label>Model Cevabı:</label><br>
	<textarea id='output' rows='10' cols='80' readonly style='white-space: pre-wrap;'></textarea>
	<script>
	async function send() {
	const input = document.getElementById("input").value;
	const res = await fetch('/chat', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({ user_input: input })
	});
	const data = await res.json();
	document.getElementById('output').value = data.answer \|\| data.response \|\| data.error \|\| 'Hata oluştu.';
	}
	</script>
	</body></html>
	"""

	@app.post("/train_intents", status_code=202)
	def train_intents(train_input: TrainInput):
	global INTENT_DEFINITIONS
	log("📥 POST /train_intents çağrıldı.")
	intents = train_input.intents
	INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents}
	threading.Thread(target=lambda: background_training(intents), daemon=True).start()
	return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."}

	def background_training(intents):
	try:
	log("🔧 Intent eğitimi başlatıldı...")
	texts, labels, label2id = [], [], {}
	for idx, intent in enumerate(intents):
	label2id[intent["name"]] = idx
	for ex in intent["examples"]:
	texts.append(ex)
	labels.append(idx)

	dataset = Dataset.from_dict({"text": texts, "label": labels})
	tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
	config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
	config.problem_type = "single_label_classification"
	config.num_labels = len(label2id)
	model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, config=config)

	tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
	for row in dataset:
	out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128)
	tokenized_data["input_ids"].append(out["input_ids"])
	tokenized_data["attention_mask"].append(out["attention_mask"])
	tokenized_data["label"].append(row["label"])

	tokenized = Dataset.from_dict(tokenized_data)
	tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

	output_dir = "/app/intent_train_output"
	os.makedirs(output_dir, exist_ok=True)
	trainer = Trainer(
	model=model,
	args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]),
	train_dataset=tokenized,
	data_collator=default_data_collator
	)
	trainer.train()

	# ✅ Başarı raporu üret
	log("🔧 Başarı raporu üretiliyor...")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	input_ids_tensor = tokenized["input_ids"].to(device)
	attention_mask_tensor = tokenized["attention_mask"].to(device)

	with torch.no_grad():
	outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
	predictions = outputs.logits.argmax(dim=-1).tolist()

	actuals = tokenized["label"]
	counts = {}
	correct = {}
	for pred, actual in zip(predictions, actuals):
	intent = list(label2id.keys())[list(label2id.values()).index(actual)]
	counts[intent] = counts.get(intent, 0) + 1
	if pred == actual:
	correct[intent] = correct.get(intent, 0) + 1
	for intent, total in counts.items():
	accuracy = correct.get(intent, 0) / total
	log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
	if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
	log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")

	log("📦 Intent modeli eğitimi kaydediliyor...")
	if os.path.exists(INTENT_MODEL_PATH):
	shutil.rmtree(INTENT_MODEL_PATH)
	model.save_pretrained(INTENT_MODEL_PATH)
	tokenizer.save_pretrained(INTENT_MODEL_PATH)
	with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
	json.dump(label2id, f)

	log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")

	except Exception as e:
	log(f"❌ Intent eğitimi hatası: {e}")
	traceback.print_exc()

	@app.post("/load_intent_model")
	def load_intent_model():
	global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
	try:
	INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
	INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
	with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f:
	LABEL2ID = json.load(f)
	return {"status": "ok", "message": "Intent modeli yüklendi."}
	except Exception as e:
	return JSONResponse(content={"error": str(e)}, status_code=500)

	async def detect_intent(text):
	inputs = INTENT_TOKENIZER(text, return_tensors="pt")
	outputs = INTENT_MODEL(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
	confidence, pred_id = torch.max(probs, dim=-1)
	id2label = {v: k for k, v in LABEL2ID.items()}
	return id2label[pred_id.item()], confidence.item()

	async def generate_response(text):
	messages = [{"role": "user", "content": text}]
	encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
	eos_token = tokenizer("<\|im_end\|>", add_special_tokens=False)["input_ids"][0]
	input_ids = encodeds.to(model.device)
	attention_mask = (input_ids != tokenizer.pad_token_id).long()

	with torch.no_grad():
	output = model.generate(
	input_ids=input_ids,
	attention_mask=attention_mask,
	max_new_tokens=128,
	do_sample=USE_SAMPLING,
	eos_token_id=eos_token,
	pad_token_id=tokenizer.pad_token_id,
	return_dict_in_generate=True,
	output_scores=True
	)

	if not USE_SAMPLING:
	scores = torch.stack(output.scores, dim=1)
	probs = torch.nn.functional.softmax(scores[0], dim=-1)
	top_conf = probs.max().item()
	else:
	top_conf = None

	decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
	for tag in ["assistant", "<\|im_start\|>assistant"]:
	start = decoded.find(tag)
	if start != -1:
	decoded = decoded[start + len(tag):].strip()
	break
	return decoded, top_conf

	def extract_parameters(variables_list, user_input):
	for pattern in variables_list:
	regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
	match = re.match(regex, user_input)
	if match:
	return [{"key": k, "value": v} for k, v in match.groupdict().items()]
	return []

	def execute_intent(intent_name, user_input):
	if intent_name in INTENT_DEFINITIONS:
	definition = INTENT_DEFINITIONS[intent_name]
	variables = extract_parameters(definition.get("variables", []), user_input)
	log(f"🚀 execute_intent('{intent_name}', {variables})")
	return {"intent": intent_name, "parameters": variables}
	return {"intent": intent_name, "parameters": []}

	@app.post("/chat")
	async def chat(msg: Message):
	user_input = msg.user_input.strip()
	try:
	if model is None or tokenizer is None:
	return {"error": "Model yüklenmedi."}

	if INTENT_MODEL:
	intent_task = asyncio.create_task(detect_intent(user_input))
	response_task = asyncio.create_task(generate_response(user_input))
	intent, intent_conf = await intent_task
	log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
	if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
	result = execute_intent(intent, user_input)
	return result
	else:
	response, response_conf = await response_task
	if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
	return {"response": random.choice(FALLBACK_ANSWERS)}
	return {"response": response}
	else:
	response, response_conf = await generate_response(user_input)
	if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
	return {"response": random.choice(FALLBACK_ANSWERS)}
	return {"response": response}
	except Exception as e:
	traceback.print_exc()
	return JSONResponse(content={"error": str(e)}, status_code=500)

	def log(message):
	timestamp = datetime.now().strftime("%H:%M:%S")
	print(f"[{timestamp}] {message}", flush=True)

	def setup_model():
	global model, tokenizer, eos_token_id
	try:
	log("🧠 setup_model() başladı")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	log(f"📡 Kullanılan cihaz: {device}")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
	log("📦 Tokenizer yüklendi. Ana model indiriliyor...")
	model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
	log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...")
	tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
	model.config.pad_token_id = tokenizer.pad_token_id
	eos_token_id = tokenizer("<\|im_end\|>", add_special_tokens=False)["input_ids"][0]
	model.eval()
	log("✅ Ana model eval() çağrıldı")
	log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
	_ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
	_ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
	log("✅ Intent modeli önbelleğe alındı.")
	log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
	except Exception as e:
	log(f"❌ setup_model() hatası: {e}")
	traceback.print_exc()

	threading.Thread(target=setup_model, daemon=True).start()
	threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start()
	while True:
	time.sleep(60)