from huggingface_hub import InferenceClient import gradio as gr import random import prompts from pypipertts import PyPiper import codecs pp=PyPiper() #client = InferenceClient("Qwen/QwQ-32B-Preview") client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") def format_prompt(message, history): prompt="" if history: print(history) prompt = "" for user_prompt, bot_response in history: #print (bot_response) prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate(prompt,history,role="ASSISTANT"): if not history: history=[] seed = random.randint(1,9999999999999) print(seed) if role == "ASSISTANT": system_prompt = prompts.ASSISTANT elif role == "RESEARCHER": system_prompt = prompts.RESEARCHER else: system_prompt = "" generate_kwargs = dict( temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0, do_sample=True, seed=seed, ) formatted_prompt = format_prompt(f"{system_prompt}\nUSER:{prompt}", history) stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" buf = "" for response in stream: output += response.token.text '''buf += response.token.text if "\n" in buf or ". " in buf or "" in buf: yield [(prompt,output)]''' yield [(prompt,output)] def load_mod(model): yield f"Loading: {model}" pp.load_mod(model) yield f"Voice Loaded: {model}" def tts(inp,names,length,noise,width,sen_pause): #print(inp) print(inp[-1][1]) txt = inp[-1][1].strip("") #decoded_text = codecs.decode(txt, "unicode_escape") yield from pp.stream_tts([txt],names,length,noise,width,sen_pause) with gr.Blocks() as iface: gr.HTML("""
Mixtral 8x7b Chatbot + Piper TTS
""") chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, layout="panel") prompt = gr.Textbox(label="Prompt") with gr.Row(): submit_b = gr.Button() stop_b = gr.Button("Stop") clear = gr.ClearButton([chatbot,prompt]) aud=gr.Audio(streaming=True, autoplay=True) with gr.Accordion("Voice Controls",open=False): msg = gr.HTML("""""") names=gr.Dropdown(label="Voice", choices=pp.key_list,value="en_US-ryan-high") length=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1) noise=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5) width=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5) sen_pause=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1) upd_btn=gr.Button("Update Voice") with gr.Row(visible=False): stt=gr.Textbox() iface.load(load_mod,names,msg) upd = upd_btn.click(tts,[chatbot,names,length,noise,width,sen_pause],aud) sub_b = submit_b.click(generate, [prompt,chatbot],chatbot).then(tts,[chatbot,names,length,noise,width,sen_pause],aud) names_change=names.change(load_mod,names,msg) stop_b.click(None,None,None, cancels=[sub_b,names_change,upd]) iface.queue(default_concurrency_limit=20).launch(max_threads=40)