import gradio as gr import os from huggingface_hub import InferenceClient """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ api_token = os.environ.get("API_TOKEN") headers = { 'Authorization': f'Bearer {api_token}' } #client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta",headers=headers,token=api_token) client = InferenceClient(model="dicta-il/dictalm2.0",headers=headers,token=api_token) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] print(system_message) print(max_tokens) print(temperature) print(top_p) print(message) #print(token) #print(headers) for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value=f"""Translate the user input from English to Hebrew. Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY. If the english text contains multiple consecutive special characters, replace them with a corresponding single special character for translation. Once the text is translated, restore the original group of special characters in their corresponding place. For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation. After translation, replace the !! in the correct place in the hebrew text, אוי!!""", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()