Spaces:
Sleeping
Sleeping
File size: 5,683 Bytes
849104b 0b0ca02 849104b 0b0ca02 849104b 0b0ca02 849104b 00e87e2 849104b 00e87e2 849104b 00e87e2 849104b 0b0ca02 849104b 1e149e3 0b0ca02 849104b 00e87e2 849104b 1e149e3 849104b 1e149e3 0b5c836 849104b 0b5c836 849104b 1e149e3 849104b 00e87e2 849104b 1e149e3 849104b 0b0ca02 849104b 1e149e3 849104b 00e87e2 849104b 00e87e2 849104b 0b0ca02 849104b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# app.py — Falcon H1 7B Instruct loader + Gradio UI (HF Spaces)
import os, sys, subprocess
# ---- 0) Environment hygiene (Spaces shows "libgomp" warning) ----
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" # faster downloads when available
# ---- 1) Upgrade critical deps BEFORE importing transformers ----
def pipi(cmd: str):
print(f"[pip] {cmd}")
subprocess.run([sys.executable, "-m", "pip"] + cmd.split(), check=True)
pipi("install --upgrade pip")
pipi("install --upgrade --no-cache-dir safetensors>=0.4.5 tokenizers>=0.19.1 accelerate>=0.33.0")
# Falcon-H1 needs the newest transformers; install from GitHub to be safe (per model card)
pipi("install --no-cache-dir git+https://github.com/huggingface/transformers.git")
# ---- 2) Imports AFTER upgrades ----
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import snapshot_download
# ---- 3) Config ----
MODEL_NAME_PRIMARY = "tiiuae/Falcon-H1-7B-Instruct"
MODEL_NAME_FALLBACK = "tiiuae/falcon-7b-instruct" # stable fallback
MODEL_LOCAL_DIR = "./falcon_local"
MAX_NEW_TOKENS = 120
TEMPERATURE = 0.30
TOP_P = 0.90
REPETITION_PENALTY = 1.8
print("🚀 Preparing model…")
print("ℹ️ Target primary model:", MODEL_NAME_PRIMARY)
# ---- 4) Get a clean local snapshot (avoid corrupted cache) ----
def get_model_snapshot(repo_id: str, local_dir: str) -> str:
# allow_patterns keeps it lean; remove if anything’s missing
return snapshot_download(
repo_id,
local_dir=local_dir,
local_dir_use_symlinks=False,
force_download=True # ensure fresh download if previous attempt was partial
)
model_path = None
primary_ok = True
try:
print(f"⬇️ Downloading {MODEL_NAME_PRIMARY} …")
model_path = get_model_snapshot(MODEL_NAME_PRIMARY, MODEL_LOCAL_DIR)
except Exception as e:
print(f"⚠️ Primary download failed: {e}")
primary_ok = False
if not primary_ok:
try:
print(f"➡️ Falling back to {MODEL_NAME_FALLBACK} …")
model_path = get_model_snapshot(MODEL_NAME_FALLBACK, MODEL_LOCAL_DIR)
except Exception as e:
print(f"❌ Fallback download failed: {e}")
model_path = None
# ---- 5) Load model/tokenizer ----
generator = None
model_loaded = False
if model_path:
try:
print("🔄 Loading tokenizer & model…")
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
# H1 is BF16 on card; FP16 may work but BF16 is safer if hardware supports it.
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=dtype,
device_map="auto",
trust_remote_code=True,
low_cpu_mem_usage=True,
)
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=dtype,
device=0 if torch.cuda.is_available() else -1
)
model_loaded = True
print("✅ Model loaded successfully")
except Exception as e:
print(f"❌ Model loading failed: {e}")
model_loaded = False
# ---- 6) App logic ----
test_questions = [
"بدي شقة بالمالكي فيها شرفة وغسالة صحون.",
"هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟",
"بدي بيت عربي قديم بباب توما مع حديقة داخلية.",
"أرخص شقة بالشعلان شو سعرها؟",
"هل يوجد شقق بإطلالة جبلية في أبو رمانة؟",
"بدي شقة مفروشة بالكامل بالمزة ٨٦، الطابق الأول.",
"عندك منزل مستقل بالمهاجرين مع موقد حطب؟"
]
def chat_falcon(user_input: str) -> str:
if not model_loaded or generator is None:
return "❌ النموذج غير محمّل. الرجاء إعادة المحاولة لاحقاً."
# Compact, anchored instruction to reduce looping & keep it on-topic.
prompt = (
"أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين فقط.\n"
f"السؤال: {user_input}\n"
"الجواب:"
)
out = generator(
prompt,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=True,
temperature=TEMPERATURE,
top_p=TOP_P,
repetition_penalty=REPETITION_PENALTY,
eos_token_id=generator.tokenizer.eos_token_id
)[0]["generated_text"]
# remove prompt echo if present
return out.replace(prompt, "").strip()
# ---- 7) Gradio UI (no share=True inside Spaces) ----
with gr.Blocks() as demo:
gr.Markdown("## 🏠 Falcon H1 7B Instruct — Damascus Real Estate Test")
gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (فصحى ولهجة شامية).")
with gr.Row():
with gr.Column(scale=2):
tb = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون")
btn = gr.Button("🔎 أرسل")
with gr.Column(scale=1):
dd = gr.Dropdown(choices=test_questions, value=test_questions[0], label="🧾 أسئلة جاهزة")
out = gr.Textbox(label="إجابة النموذج", lines=8)
btn.click(chat_falcon, inputs=tb, outputs=out)
dd.change(chat_falcon, inputs=dd, outputs=out)
demo.launch(server_name="0.0.0.0", server_port=7860)
|