SecurePython / app.py
alikayh's picture
Create app.py
f47ceb8 verified
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# SECUREPYTHO با Unsloth — 100% امن + توکن مخفی
# فقط Run All بزن — توکن رو ازت می‌گیره و هیچ جا نشون نمی‌ده
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# ۱. نصب Unsloth
import subprocess
print("نصب Unsloth و کتابخونه‌ها...")
subprocess.run([
"pip", "install", "-q",
"unsloth[colab-new]@git+https://github.com/unslothai/unsloth.git"
], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.run([
"pip", "install", "-q", "--no-deps",
"xformers<0.0.27", "trl", "peft", "accelerate", "bitsandbytes"
], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# ۲. گرفتن توکن از کاربر (کاملاً مخفی!)
from getpass import getpass
from huggingface_hub import login
token = getpass("🤫 توکن Hugging Face (Write) رو وارد کن و Enter بزن (هیچ جا نشون داده نمی‌شه): ")
login(token=token)
print("✅ لاگین شدی داداش!")
# ۳. آپلود دیتاست
from google.colab import files
print("دیتاست JSONL یا CSV رو آپلود کن...")
uploaded = files.upload()
# ۴. فاین‌تیون با Unsloth
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
repo_name = "alikayh/SecurePytho" # عوض کن اگه خواستی
# مدل 8B اما فقط 6 گیگ رم!
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/llama-3-8b-bnb-4bit",
max_seq_length = 2048,
dtype = None,
load_in_4bit = True
)
model = FastLanguageModel.get_peft_model(
model,
r = 64,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_alpha = 128,
lora_dropout = 0.05,
bias = "none",
use_gradient_checkpointing = "unsloth",
random_state = 42,
)
# دیتاست
filename = list(uploaded.keys())[0]
if filename.endswith(".csv"):
dataset = load_dataset("csv", data_files=filename)["train"]
else:
dataset = load_dataset("json", data_files=filename)["train"]
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = 2048,
args = TrainingArguments(
per_device_train_batch_size = 4,
gradient_accumulation_steps = 8,
warmup_steps = 10,
learning_rate = 2e-4,
fp16 = True,
logging_steps = 10,
output_dir = repo_name,
optim = "adamw_8bit",
seed = 42,
num_train_epochs = 3,
push_to_hub = True,
hub_model_id = repo_name,
report_to = "none",
),
)
print("شروع فاین‌تیون با Unsloth — فقط ۸–۱۰ دقیقه صبر کن داداش!")
trainer.train()
print("تموم شد! SecurePytho آماده‌ست و روی HF آپلود شد:")
print(f"https://huggingface.co/{repo_name}")
print("توکنت هیچ جا ذخیره نشد — 100% امن!")