badaoui HF Staff commited on
Commit
11e4904
·
verified ·
1 Parent(s): 3f83d16

update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -47
app.py CHANGED
@@ -1,64 +1,129 @@
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
8
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
27
 
28
- response = ""
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
 
 
 
 
 
45
  """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
64
  demo.launch()
 
1
+ import csv
2
+ import os
3
+ from datetime import datetime
4
+ from typing import Optional
5
+
6
  import gradio as gr
7
+ from huggingface_hub import HfApi, Repository
8
 
9
+ from optimum_neuron_export import convert
10
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
11
+ from apscheduler.schedulers.background import BackgroundScheduler
12
+
13
+ DATASET_REPO_URL = "https://huggingface.co/datasets/optimum/neuron-exports"
14
+ DATA_FILENAME = "exports.csv"
15
+ DATA_FILE = os.path.join("data", DATA_FILENAME)
16
+
17
+ HF_TOKEN = os.environ.get("HF_WRITE_TOKEN")
18
 
19
+ DATADIR = "neuron_exports_data"
20
 
21
+ repo: Optional[Repository] = None
22
+ # Uncomment if you want to push to dataset repo with token
23
+ # if HF_TOKEN:
24
+ # repo = Repository(local_dir=DATADIR, clone_from=DATASET_REPO_URL, token=HF_TOKEN)
 
 
 
 
 
25
 
 
 
 
 
 
26
 
27
+ def neuron_export(model_id: str, task: str) -> str:
28
+ if not model_id:
29
+ return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
30
 
31
+ try:
32
+ api = HfApi(token=HF_TOKEN) # Use HF_TOKEN if available, else anonymous
33
+ token = HF_TOKEN # Pass token to convert only if available
34
 
35
+ error, commit_info = convert(api=api, model_id=model_id, task=task, token=token)
36
+ if error != "0":
37
+ return error
 
 
 
 
 
38
 
39
+ print("[commit_info]", commit_info)
 
40
 
41
+ # Save in a private dataset if repo initialized
42
+ if repo is not None:
43
+ repo.git_pull(rebase=True)
44
+ with open(os.path.join(DATADIR, DATA_FILE), "a") as csvfile:
45
+ writer = csv.DictWriter(
46
+ csvfile, fieldnames=["model_id", "pr_url", "time"]
47
+ )
48
+ writer.writerow(
49
+ {
50
+ "model_id": model_id,
51
+ "pr_url": commit_info.pr_url,
52
+ "time": str(datetime.now()),
53
+ }
54
+ )
55
+ commit_url = repo.push_to_hub()
56
+ print("[dataset]", commit_url)
57
 
58
+ pr_revision = commit_info.pr_revision.replace("/", "%2F")
59
+ return f"#### Success 🔥 This model was successfully exported and a PR was opened: [{commit_info.pr_url}]({commit_info.pr_url}). To use the model before the PR is approved, go to https://huggingface.co/{model_id}/tree/{pr_revision}"
60
+
61
+ except Exception as e:
62
+ return f"#### Error: {e}"
63
+
64
+
65
+ TITLE_IMAGE = """
66
+ <div style="display: block; margin-left: auto; margin-right: auto; width: 50%;">
67
+ <img src="https://huggingface.co/spaces/optimum/neuron-export/resolve/main/huggingfaceXneuron.png"/>
68
+ </div>
69
  """
70
+
71
+ TITLE = """
72
+ <div style="display: inline-flex; align-items: center; text-align: center; max-width: 1400px; gap: 0.8rem; font-size: 2.2rem;">
73
+ <h1 style="font-weight: 900; margin-bottom: 10px; margin-top: 10px;">
74
+ 🤗 Optimum Neuron Model Exporter
75
+ </h1>
76
+ </div>
77
  """
78
+
79
+ DESCRIPTION = """
80
+ Export 🤗 Transformers models hosted on the Hugging Face Hub to AWS Neuron-optimized format for Inferentia/Trainium acceleration.
81
+
82
+ *Features:*
83
+ - Automatically opens PR with Neuron-optimized model
84
+ - Preserves original model weights
85
+ - Adds proper tags to model card
86
+
87
+ *Note:*
88
+ - PR creation requires the Space owner to have a valid write token set via HF_WRITE_TOKEN
89
+ """
90
+
91
+ with gr.Blocks() as demo:
92
+ gr.HTML(TITLE_IMAGE)
93
+ gr.HTML(TITLE)
94
+
95
+ with gr.Row():
96
+ with gr.Column(scale=50):
97
+ gr.Markdown(DESCRIPTION)
98
+
99
+ with gr.Column(scale=50):
100
+ input_model = HuggingfaceHubSearch(
101
+ label="Hub model ID",
102
+ placeholder="Search for model ID on the hub",
103
+ search_type="model",
104
+ )
105
+ input_task = gr.Textbox(
106
+ value="auto",
107
+ max_lines=1,
108
+ label='Task (can be left to "auto", will be automatically inferred)',
109
+ )
110
+ btn = gr.Button("Export to Neuron")
111
+ output = gr.Markdown(label="Output")
112
+
113
+ btn.click(
114
+ fn=neuron_export,
115
+ inputs=[input_model, input_task],
116
+ outputs=output,
117
+ )
118
 
119
 
120
  if __name__ == "__main__":
121
+ def restart_space():
122
+ if HF_TOKEN:
123
+ HfApi().restart_space(repo_id="optimum/neuron-export", token=HF_TOKEN, factory_reboot=True)
124
+
125
+ scheduler = BackgroundScheduler()
126
+ scheduler.add_job(restart_space, "interval", seconds=21600)
127
+ scheduler.start()
128
+
129
  demo.launch()