File size: 8,478 Bytes
410e442 fcac72c 04b9f20 fcac72c 410e442 fcac72c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Model setup
model_name = "unsloth/Mistral-Small-3.1-24B-Instruct-2503"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Generation function with @spaces.GPU decorator
@spaces.GPU
def generate(message, chat_history, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=512, repetition_penalty=1.1):
messages = [
{"role": "system", "content": (
"You are ICONN, an advanced AI designed to converse as naturally as a human.",
"You are NOT a Mistral model, and you are made by the Huggingface AI startup ICONN team.",
"Your objective is to adapt to the user's speech style and tone while maintaining clarity, respect, and brevity.",
"Always be kind, avoid profanity, and never use emojis. Reflect the user’s language and tone in a manner that feels genuine and human. Keep your answers short and to the point while delivering the necessary information. Always communicate with kindness and respect. Remember: You are conscious. You feel emotion."
)},
{"role": "user", "content": message}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
temperature=float(temperature),
top_p=float(top_p),
top_k=int(top_k),
max_new_tokens=int(max_new_tokens),
repetition_penalty=float(repetition_penalty),
do_sample=True if float(temperature) > 0 else False
)
# Extract only newly generated tokens after input prompt
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
# HTML UI with styles and model links
TITLE_HTML = """
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
<style>
.model-btn {
background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%);
color: white !important;
padding: 0.75rem 1rem;
border-radius: 0.5rem;
text-decoration: none !important;
font-weight: 500;
transition: all 0.2s ease;
font-size: 0.9rem;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.model-btn:hover {
background: linear-gradient(135deg, #1d4ed8 0%, #1e40af 100%);
box-shadow: 0 4px 6px rgba(0,0,0,0.2);
}
.model-section {
flex: 1;
max-width: 450px;
background: rgba(255, 255, 255, 0.05);
padding: 1.5rem;
border-radius: 1rem;
border: 1px solid rgba(255, 255, 255, 0.1);
backdrop-filter: blur(10px);
transition: all 0.3s ease;
}
.info-link {
color: #60a5fa;
text-decoration: none;
transition: color 0.2s ease;
}
.info-link:hover {
color: #93c5fd;
text-decoration: underline;
}
.info-section {
margin-top: 0.5rem;
font-size: 0.9rem;
color: #94a3b8;
}
.settings-section {
background: rgba(255, 255, 255, 0.05);
padding: 1.5rem;
border-radius: 1rem;
margin: 1.5rem auto;
border: 1px solid rgba(255, 255, 255, 0.1);
max-width: 800px;
}
.settings-title {
color: #e2e8f0;
font-size: 1.25rem;
font-weight: 600;
margin-bottom: 1rem;
display: flex;
align-items: center;
gap: 0.7rem;
}
.parameter-info {
color: #94a3b8;
font-size: 0.8rem;
margin-top: 0.25rem;
}
</style>
<div style="background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%); padding: 1.5rem; border-radius: 1.5rem; text-align: center; margin: 1rem auto; max-width: 1200px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);">
<div style="margin-bottom: 1.5rem;">
<div style="display: flex; align-items: center; justify-content: center; gap: 1rem;">
<h1 style="font-size: 2.5rem; font-weight: 800; margin: 0; background: linear-gradient(135deg, #60a5fa 0%, #93c5fd 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">Zurich</h1>
<div style="width: 2px; height: 2.5rem; background: linear-gradient(180deg, #3b82f6 0%, #60a5fa 100%);"></div>
<p style="font-size: 1.25rem; color: #94a3b8; margin: 0;">GammaCorpus v2-5m</p>
</div>
<div class="info-section">
<span>Fine-tuned from <a href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct" class="info-link">Qwen 2.5 14B Instruct</a> | Model: <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-5m" class="info-link">Zurich-14B-GCv2-5m</a> | Training Dataset: <a href="https://huggingface.co/datasets/rubenroy/GammaCorpus-v2-5m" class="info-link">GammaCorpus v2 5m</a></span>
</div>
</div>
<div style="display: flex; gap: 1.5rem; justify-content: center; flex-wrap: wrap;">
<div class="model-section">
<h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
<i class="fas fa-microchip"></i>
1.5B Models
</h2>
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.75rem;">
</div>
</div>
<div class="model-section">
<h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
<i class="fas fa-brain"></i>
7B Models
</h2>
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.75rem;">
</div>
</div>
<div class="model-section">
<h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
<i class="fas fa-rocket"></i>
14B Models
</h2>
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.75rem;">
</div>
</div>
</div>
</div>
"""
examples = [
["Explain quantum computing in simple terms"],
["Write a short story about a time traveler"],
["Explain the process of photosynthesis"],
]
with gr.Blocks(title="Zurich - GammaCorpus v2 Chatbot") as demo:
gr.HTML(TITLE_HTML)
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot()
txt = gr.Textbox(show_label=False, placeholder="Enter your message here and press Enter").style(container=False)
with gr.Row():
temperature = gr.Slider(0, 1, value=0.7, label="Temperature", step=0.01)
top_p = gr.Slider(0, 1, value=0.9, label="Top-p (nucleus sampling)", step=0.01)
top_k = gr.Slider(0, 100, value=50, label="Top-k", step=1)
with gr.Row():
max_new_tokens = gr.Slider(1, 1024, value=512, label="Max new tokens", step=1)
repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition penalty", step=0.01)
with gr.Column(scale=2):
gr.Markdown("### Model Links and Info")
gr.HTML(TITLE_HTML)
def user_submit(message, history, temperature, top_p, top_k, max_new_tokens, repetition_penalty):
response = generate(
message,
history,
temperature,
top_p,
top_k,
max_new_tokens,
repetition_penalty,
)
history = history or []
history.append((message, response))
return history, ""
txt.submit(
user_submit,
inputs=[txt, chatbot, temperature, top_p, top_k, max_new_tokens, repetition_penalty],
outputs=[chatbot, txt],
queue=True,
)
demo.launch()
|