# app.py — Falcon H1 7B Instruct loader + Gradio UI (HF Spaces) import os, sys, subprocess # ---- 0) Environment hygiene (Spaces shows "libgomp" warning) ---- os.environ["OMP_NUM_THREADS"] = "1" os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" # faster downloads when available # ---- 1) Upgrade critical deps BEFORE importing transformers ---- def pipi(cmd: str): print(f"[pip] {cmd}") subprocess.run([sys.executable, "-m", "pip"] + cmd.split(), check=True) pipi("install --upgrade pip") pipi("install --upgrade --no-cache-dir safetensors>=0.4.5 tokenizers>=0.19.1 accelerate>=0.33.0") # Falcon-H1 needs the newest transformers; install from GitHub to be safe (per model card) pipi("install --no-cache-dir git+https://github.com/huggingface/transformers.git") # ---- 2) Imports AFTER upgrades ---- import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from huggingface_hub import snapshot_download # ---- 3) Config ---- MODEL_NAME_PRIMARY = "tiiuae/Falcon-H1-7B-Instruct" MODEL_NAME_FALLBACK = "tiiuae/falcon-7b-instruct" # stable fallback MODEL_LOCAL_DIR = "./falcon_local" MAX_NEW_TOKENS = 120 TEMPERATURE = 0.30 TOP_P = 0.90 REPETITION_PENALTY = 1.8 print("🚀 Preparing model…") print("ℹ️ Target primary model:", MODEL_NAME_PRIMARY) # ---- 4) Get a clean local snapshot (avoid corrupted cache) ---- def get_model_snapshot(repo_id: str, local_dir: str) -> str: # allow_patterns keeps it lean; remove if anything’s missing return snapshot_download( repo_id, local_dir=local_dir, local_dir_use_symlinks=False, force_download=True # ensure fresh download if previous attempt was partial ) model_path = None primary_ok = True try: print(f"⬇️ Downloading {MODEL_NAME_PRIMARY} …") model_path = get_model_snapshot(MODEL_NAME_PRIMARY, MODEL_LOCAL_DIR) except Exception as e: print(f"⚠️ Primary download failed: {e}") primary_ok = False if not primary_ok: try: print(f"➡️ Falling back to {MODEL_NAME_FALLBACK} …") model_path = get_model_snapshot(MODEL_NAME_FALLBACK, MODEL_LOCAL_DIR) except Exception as e: print(f"❌ Fallback download failed: {e}") model_path = None # ---- 5) Load model/tokenizer ---- generator = None model_loaded = False if model_path: try: print("🔄 Loading tokenizer & model…") tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True) # H1 is BF16 on card; FP16 may work but BF16 is safer if hardware supports it. dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=dtype, device_map="auto", trust_remote_code=True, low_cpu_mem_usage=True, ) generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=dtype, device=0 if torch.cuda.is_available() else -1 ) model_loaded = True print("✅ Model loaded successfully") except Exception as e: print(f"❌ Model loading failed: {e}") model_loaded = False # ---- 6) App logic ---- test_questions = [ "بدي شقة بالمالكي فيها شرفة وغسالة صحون.", "هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟", "بدي بيت عربي قديم بباب توما مع حديقة داخلية.", "أرخص شقة بالشعلان شو سعرها؟", "هل يوجد شقق بإطلالة جبلية في أبو رمانة؟", "بدي شقة مفروشة بالكامل بالمزة ٨٦، الطابق الأول.", "عندك منزل مستقل بالمهاجرين مع موقد حطب؟" ] def chat_falcon(user_input: str) -> str: if not model_loaded or generator is None: return "❌ النموذج غير محمّل. الرجاء إعادة المحاولة لاحقاً." # Compact, anchored instruction to reduce looping & keep it on-topic. prompt = ( "أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين فقط.\n" f"السؤال: {user_input}\n" "الجواب:" ) out = generator( prompt, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=TEMPERATURE, top_p=TOP_P, repetition_penalty=REPETITION_PENALTY, eos_token_id=generator.tokenizer.eos_token_id )[0]["generated_text"] # remove prompt echo if present return out.replace(prompt, "").strip() # ---- 7) Gradio UI (no share=True inside Spaces) ---- with gr.Blocks() as demo: gr.Markdown("## 🏠 Falcon H1 7B Instruct — Damascus Real Estate Test") gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (فصحى ولهجة شامية).") with gr.Row(): with gr.Column(scale=2): tb = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون") btn = gr.Button("🔎 أرسل") with gr.Column(scale=1): dd = gr.Dropdown(choices=test_questions, value=test_questions[0], label="🧾 أسئلة جاهزة") out = gr.Textbox(label="إجابة النموذج", lines=8) btn.click(chat_falcon, inputs=tb, outputs=out) dd.change(chat_falcon, inputs=dd, outputs=out) demo.launch(server_name="0.0.0.0", server_port=7860)