File size: 5,683 Bytes
849104b
0b0ca02
849104b
0b0ca02
849104b
 
 
0b0ca02
849104b
 
 
 
00e87e2
849104b
 
 
 
00e87e2
849104b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00e87e2
 
849104b
 
 
 
 
0b0ca02
849104b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e149e3
 
 
 
 
 
 
 
0b0ca02
 
849104b
 
 
 
 
 
 
 
 
 
00e87e2
849104b
1e149e3
849104b
1e149e3
0b5c836
849104b
0b5c836
849104b
1e149e3
 
849104b
 
00e87e2
849104b
1e149e3
849104b
 
0b0ca02
 
 
849104b
 
1e149e3
849104b
00e87e2
849104b
00e87e2
849104b
 
0b0ca02
849104b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# app.py — Falcon H1 7B Instruct loader + Gradio UI (HF Spaces)

import os, sys, subprocess

# ---- 0) Environment hygiene (Spaces shows "libgomp" warning) ----
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"   # faster downloads when available

# ---- 1) Upgrade critical deps BEFORE importing transformers ----
def pipi(cmd: str):
    print(f"[pip] {cmd}")
    subprocess.run([sys.executable, "-m", "pip"] + cmd.split(), check=True)

pipi("install --upgrade pip")
pipi("install --upgrade --no-cache-dir safetensors>=0.4.5 tokenizers>=0.19.1 accelerate>=0.33.0")
# Falcon-H1 needs the newest transformers; install from GitHub to be safe (per model card)
pipi("install --no-cache-dir git+https://github.com/huggingface/transformers.git")

# ---- 2) Imports AFTER upgrades ----
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import snapshot_download

# ---- 3) Config ----
MODEL_NAME_PRIMARY   = "tiiuae/Falcon-H1-7B-Instruct"
MODEL_NAME_FALLBACK  = "tiiuae/falcon-7b-instruct"  # stable fallback
MODEL_LOCAL_DIR      = "./falcon_local"
MAX_NEW_TOKENS       = 120
TEMPERATURE          = 0.30
TOP_P                = 0.90
REPETITION_PENALTY   = 1.8

print("🚀 Preparing model…")
print("ℹ️  Target primary model:", MODEL_NAME_PRIMARY)

# ---- 4) Get a clean local snapshot (avoid corrupted cache) ----
def get_model_snapshot(repo_id: str, local_dir: str) -> str:
    # allow_patterns keeps it lean; remove if anything’s missing
    return snapshot_download(
        repo_id,
        local_dir=local_dir,
        local_dir_use_symlinks=False,
        force_download=True  # ensure fresh download if previous attempt was partial
    )

model_path = None
primary_ok = True
try:
    print(f"⬇️  Downloading {MODEL_NAME_PRIMARY} …")
    model_path = get_model_snapshot(MODEL_NAME_PRIMARY, MODEL_LOCAL_DIR)
except Exception as e:
    print(f"⚠️  Primary download failed: {e}")
    primary_ok = False

if not primary_ok:
    try:
        print(f"➡️  Falling back to {MODEL_NAME_FALLBACK} …")
        model_path = get_model_snapshot(MODEL_NAME_FALLBACK, MODEL_LOCAL_DIR)
    except Exception as e:
        print(f"❌ Fallback download failed: {e}")
        model_path = None

# ---- 5) Load model/tokenizer ----
generator = None
model_loaded = False

if model_path:
    try:
        print("🔄 Loading tokenizer & model…")
        tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)

        # H1 is BF16 on card; FP16 may work but BF16 is safer if hardware supports it.
        dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=dtype,
            device_map="auto",
            trust_remote_code=True,
            low_cpu_mem_usage=True,
        )

        generator = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=dtype,
            device=0 if torch.cuda.is_available() else -1
        )
        model_loaded = True
        print("✅ Model loaded successfully")

    except Exception as e:
        print(f"❌ Model loading failed: {e}")
        model_loaded = False

# ---- 6) App logic ----
test_questions = [
    "بدي شقة بالمالكي فيها شرفة وغسالة صحون.",
    "هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟",
    "بدي بيت عربي قديم بباب توما مع حديقة داخلية.",
    "أرخص شقة بالشعلان شو سعرها؟",
    "هل يوجد شقق بإطلالة جبلية في أبو رمانة؟",
    "بدي شقة مفروشة بالكامل بالمزة ٨٦، الطابق الأول.",
    "عندك منزل مستقل بالمهاجرين مع موقد حطب؟"
]

def chat_falcon(user_input: str) -> str:
    if not model_loaded or generator is None:
        return "❌ النموذج غير محمّل. الرجاء إعادة المحاولة لاحقاً."

    # Compact, anchored instruction to reduce looping & keep it on-topic.
    prompt = (
        "أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين فقط.\n"
        f"السؤال: {user_input}\n"
        "الجواب:"
    )

    out = generator(
        prompt,
        max_new_tokens=MAX_NEW_TOKENS,
        do_sample=True,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        repetition_penalty=REPETITION_PENALTY,
        eos_token_id=generator.tokenizer.eos_token_id
    )[0]["generated_text"]

    # remove prompt echo if present
    return out.replace(prompt, "").strip()

# ---- 7) Gradio UI (no share=True inside Spaces) ----
with gr.Blocks() as demo:
    gr.Markdown("## 🏠 Falcon H1 7B Instruct — Damascus Real Estate Test")
    gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (فصحى ولهجة شامية).")

    with gr.Row():
        with gr.Column(scale=2):
            tb = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون")
            btn = gr.Button("🔎 أرسل")
        with gr.Column(scale=1):
            dd = gr.Dropdown(choices=test_questions, value=test_questions[0], label="🧾 أسئلة جاهزة")

    out = gr.Textbox(label="إجابة النموذج", lines=8)

    btn.click(chat_falcon, inputs=tb, outputs=out)
    dd.change(chat_falcon, inputs=dd, outputs=out)

demo.launch(server_name="0.0.0.0", server_port=7860)