Spaces:

ahmadbeilouni
/

Falcon_testing

Sleeping

File size: 5,683 Bytes

# app.py — Falcon H1 7B Instruct loader + Gradio UI (HF Spaces)

import os, sys, subprocess

# ---- 0) Environment hygiene (Spaces shows "libgomp" warning) ----
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"   # faster downloads when available

# ---- 1) Upgrade critical deps BEFORE importing transformers ----
def pipi(cmd: str):
    print(f"[pip] {cmd}")
    subprocess.run([sys.executable, "-m", "pip"] + cmd.split(), check=True)

pipi("install --upgrade pip")
pipi("install --upgrade --no-cache-dir safetensors>=0.4.5 tokenizers>=0.19.1 accelerate>=0.33.0")
# Falcon-H1 needs the newest transformers; install from GitHub to be safe (per model card)
pipi("install --no-cache-dir git+https://github.com/huggingface/transformers.git")

# ---- 2) Imports AFTER upgrades ----
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import snapshot_download

# ---- 3) Config ----
MODEL_NAME_PRIMARY   = "tiiuae/Falcon-H1-7B-Instruct"
MODEL_NAME_FALLBACK  = "tiiuae/falcon-7b-instruct"  # stable fallback
MODEL_LOCAL_DIR      = "./falcon_local"
MAX_NEW_TOKENS       = 120
TEMPERATURE          = 0.30
TOP_P                = 0.90
REPETITION_PENALTY   = 1.8

print("🚀 Preparing model…")
print("ℹ️  Target primary model:", MODEL_NAME_PRIMARY)

# ---- 4) Get a clean local snapshot (avoid corrupted cache) ----
def get_model_snapshot(repo_id: str, local_dir: str) -> str:
    # allow_patterns keeps it lean; remove if anything’s missing
    return snapshot_download(
        repo_id,
        local_dir=local_dir,
        local_dir_use_symlinks=False,
        force_download=True  # ensure fresh download if previous attempt was partial
    )

model_path = None
primary_ok = True
try:
    print(f"⬇️  Downloading {MODEL_NAME_PRIMARY} …")
    model_path = get_model_snapshot(MODEL_NAME_PRIMARY, MODEL_LOCAL_DIR)
except Exception as e:
    print(f"⚠️  Primary download failed: {e}")
    primary_ok = False

if not primary_ok:
    try:
        print(f"➡️  Falling back to {MODEL_NAME_FALLBACK} …")
        model_path = get_model_snapshot(MODEL_NAME_FALLBACK, MODEL_LOCAL_DIR)
    except Exception as e:
        print(f"❌ Fallback download failed: {e}")
        model_path = None

# ---- 5) Load model/tokenizer ----
generator = None
model_loaded = False

if model_path:
    try:
        print("🔄 Loading tokenizer & model…")
        tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)

        # H1 is BF16 on card; FP16 may work but BF16 is safer if hardware supports it.
        dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=dtype,
            device_map="auto",
            trust_remote_code=True,
            low_cpu_mem_usage=True,
        )

        generator = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=dtype,
            device=0 if torch.cuda.is_available() else -1
        )
        model_loaded = True
        print("✅ Model loaded successfully")

    except Exception as e:
        print(f"❌ Model loading failed: {e}")
        model_loaded = False

# ---- 6) App logic ----
test_questions = [
    "بدي شقة بالمالكي فيها شرفة وغسالة صحون.",
    "هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟",
    "بدي بيت عربي قديم بباب توما مع حديقة داخلية.",
    "أرخص شقة بالشعلان شو سعرها؟",
    "هل يوجد شقق بإطلالة جبلية في أبو رمانة؟",
    "بدي شقة مفروشة بالكامل بالمزة ٨٦، الطابق الأول.",
    "عندك منزل مستقل بالمهاجرين مع موقد حطب؟"
]

def chat_falcon(user_input: str) -> str:
    if not model_loaded or generator is None:
        return "❌ النموذج غير محمّل. الرجاء إعادة المحاولة لاحقاً."

    # Compact, anchored instruction to reduce looping & keep it on-topic.
    prompt = (
        "أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين فقط.\n"
        f"السؤال: {user_input}\n"
        "الجواب:"
    )

    out = generator(
        prompt,
        max_new_tokens=MAX_NEW_TOKENS,
        do_sample=True,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        repetition_penalty=REPETITION_PENALTY,
        eos_token_id=generator.tokenizer.eos_token_id
    )[0]["generated_text"]

    # remove prompt echo if present
    return out.replace(prompt, "").strip()

# ---- 7) Gradio UI (no share=True inside Spaces) ----
with gr.Blocks() as demo:
    gr.Markdown("## 🏠 Falcon H1 7B Instruct — Damascus Real Estate Test")
    gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (فصحى ولهجة شامية).")

    with gr.Row():
        with gr.Column(scale=2):
            tb = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون")
            btn = gr.Button("🔎 أرسل")
        with gr.Column(scale=1):
            dd = gr.Dropdown(choices=test_questions, value=test_questions[0], label="🧾 أسئلة جاهزة")

    out = gr.Textbox(label="إجابة النموذج", lines=8)

    btn.click(chat_falcon, inputs=tb, outputs=out)
    dd.change(chat_falcon, inputs=dd, outputs=out)

demo.launch(server_name="0.0.0.0", server_port=7860)