Spaces:
Sleeping
Sleeping
# app.py — Falcon H1 7B Instruct loader + Gradio UI (HF Spaces) | |
import os, sys, subprocess | |
# ---- 0) Environment hygiene (Spaces shows "libgomp" warning) ---- | |
os.environ["OMP_NUM_THREADS"] = "1" | |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" # faster downloads when available | |
# ---- 1) Upgrade critical deps BEFORE importing transformers ---- | |
def pipi(cmd: str): | |
print(f"[pip] {cmd}") | |
subprocess.run([sys.executable, "-m", "pip"] + cmd.split(), check=True) | |
pipi("install --upgrade pip") | |
pipi("install --upgrade --no-cache-dir safetensors>=0.4.5 tokenizers>=0.19.1 accelerate>=0.33.0") | |
# Falcon-H1 needs the newest transformers; install from GitHub to be safe (per model card) | |
pipi("install --no-cache-dir git+https://github.com/huggingface/transformers.git") | |
# ---- 2) Imports AFTER upgrades ---- | |
import torch | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
from huggingface_hub import snapshot_download | |
# ---- 3) Config ---- | |
MODEL_NAME_PRIMARY = "tiiuae/Falcon-H1-7B-Instruct" | |
MODEL_NAME_FALLBACK = "tiiuae/falcon-7b-instruct" # stable fallback | |
MODEL_LOCAL_DIR = "./falcon_local" | |
MAX_NEW_TOKENS = 120 | |
TEMPERATURE = 0.30 | |
TOP_P = 0.90 | |
REPETITION_PENALTY = 1.8 | |
print("🚀 Preparing model…") | |
print("ℹ️ Target primary model:", MODEL_NAME_PRIMARY) | |
# ---- 4) Get a clean local snapshot (avoid corrupted cache) ---- | |
def get_model_snapshot(repo_id: str, local_dir: str) -> str: | |
# allow_patterns keeps it lean; remove if anything’s missing | |
return snapshot_download( | |
repo_id, | |
local_dir=local_dir, | |
local_dir_use_symlinks=False, | |
force_download=True # ensure fresh download if previous attempt was partial | |
) | |
model_path = None | |
primary_ok = True | |
try: | |
print(f"⬇️ Downloading {MODEL_NAME_PRIMARY} …") | |
model_path = get_model_snapshot(MODEL_NAME_PRIMARY, MODEL_LOCAL_DIR) | |
except Exception as e: | |
print(f"⚠️ Primary download failed: {e}") | |
primary_ok = False | |
if not primary_ok: | |
try: | |
print(f"➡️ Falling back to {MODEL_NAME_FALLBACK} …") | |
model_path = get_model_snapshot(MODEL_NAME_FALLBACK, MODEL_LOCAL_DIR) | |
except Exception as e: | |
print(f"❌ Fallback download failed: {e}") | |
model_path = None | |
# ---- 5) Load model/tokenizer ---- | |
generator = None | |
model_loaded = False | |
if model_path: | |
try: | |
print("🔄 Loading tokenizer & model…") | |
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True) | |
# H1 is BF16 on card; FP16 may work but BF16 is safer if hardware supports it. | |
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 | |
model = AutoModelForCausalLM.from_pretrained( | |
model_path, | |
torch_dtype=dtype, | |
device_map="auto", | |
trust_remote_code=True, | |
low_cpu_mem_usage=True, | |
) | |
generator = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
torch_dtype=dtype, | |
device=0 if torch.cuda.is_available() else -1 | |
) | |
model_loaded = True | |
print("✅ Model loaded successfully") | |
except Exception as e: | |
print(f"❌ Model loading failed: {e}") | |
model_loaded = False | |
# ---- 6) App logic ---- | |
test_questions = [ | |
"بدي شقة بالمالكي فيها شرفة وغسالة صحون.", | |
"هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟", | |
"بدي بيت عربي قديم بباب توما مع حديقة داخلية.", | |
"أرخص شقة بالشعلان شو سعرها؟", | |
"هل يوجد شقق بإطلالة جبلية في أبو رمانة؟", | |
"بدي شقة مفروشة بالكامل بالمزة ٨٦، الطابق الأول.", | |
"عندك منزل مستقل بالمهاجرين مع موقد حطب؟" | |
] | |
def chat_falcon(user_input: str) -> str: | |
if not model_loaded or generator is None: | |
return "❌ النموذج غير محمّل. الرجاء إعادة المحاولة لاحقاً." | |
# Compact, anchored instruction to reduce looping & keep it on-topic. | |
prompt = ( | |
"أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين فقط.\n" | |
f"السؤال: {user_input}\n" | |
"الجواب:" | |
) | |
out = generator( | |
prompt, | |
max_new_tokens=MAX_NEW_TOKENS, | |
do_sample=True, | |
temperature=TEMPERATURE, | |
top_p=TOP_P, | |
repetition_penalty=REPETITION_PENALTY, | |
eos_token_id=generator.tokenizer.eos_token_id | |
)[0]["generated_text"] | |
# remove prompt echo if present | |
return out.replace(prompt, "").strip() | |
# ---- 7) Gradio UI (no share=True inside Spaces) ---- | |
with gr.Blocks() as demo: | |
gr.Markdown("## 🏠 Falcon H1 7B Instruct — Damascus Real Estate Test") | |
gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (فصحى ولهجة شامية).") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
tb = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون") | |
btn = gr.Button("🔎 أرسل") | |
with gr.Column(scale=1): | |
dd = gr.Dropdown(choices=test_questions, value=test_questions[0], label="🧾 أسئلة جاهزة") | |
out = gr.Textbox(label="إجابة النموذج", lines=8) | |
btn.click(chat_falcon, inputs=tb, outputs=out) | |
dd.change(chat_falcon, inputs=dd, outputs=out) | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |