from huggingface_hub import InferenceClient
import gradio as gr
import random
import prompts
from pypipertts import PyPiper
import codecs
pp=PyPiper()

#client = InferenceClient("Qwen/QwQ-32B-Preview")
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def format_prompt(message, history):
    prompt=""
    if history:
        print(history)
        prompt = "<s>"
        for user_prompt, bot_response in history:
            #print (bot_response)
            prompt += f"[INST] {user_prompt} [/INST]"
            prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(prompt,history,role="ASSISTANT"):
    if not history:
        history=[]
    seed = random.randint(1,9999999999999)
    print(seed)
    if role == "ASSISTANT":
        system_prompt = prompts.ASSISTANT
    elif role == "RESEARCHER":
        system_prompt = prompts.RESEARCHER
    else: system_prompt = ""
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1.0,
        do_sample=True,
        seed=seed,
    )
    formatted_prompt = format_prompt(f"{system_prompt}\nUSER:{prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    buf = ""
    for response in stream:
        output += response.token.text
        '''buf += response.token.text
        if "\n" in buf or ". " in buf or "</s>" in buf:
            yield [(prompt,output)]'''
        yield [(prompt,output)]

def load_mod(model):
    yield f"Loading: {model}"
    pp.load_mod(model)
    yield f"Voice Loaded: {model}"
    
def tts(inp,names,length,noise,width,sen_pause):
    #print(inp)
    print(inp[-1][1])
    txt = inp[-1][1].strip("</s>")
    #decoded_text = codecs.decode(txt, "unicode_escape")
    yield from pp.stream_tts([txt],names,length,noise,width,sen_pause)

with gr.Blocks() as iface:
    gr.HTML("""<center><div style='font-size:xx-large;font-weight:900;'>Mixtral 8x7b Chatbot + Piper TTS</div>""")
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, layout="panel")
    prompt = gr.Textbox(label="Prompt")
    with gr.Row():
        submit_b = gr.Button()
        stop_b = gr.Button("Stop")
        clear = gr.ClearButton([chatbot,prompt])
    aud=gr.Audio(streaming=True, autoplay=True)
    with gr.Accordion("Voice Controls",open=False):
        msg = gr.HTML("""""")
        names=gr.Dropdown(label="Voice", choices=pp.key_list,value="en_US-ryan-high")
        length=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1)
        noise=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5)
        width=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5)
        sen_pause=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1)
        upd_btn=gr.Button("Update Voice")
    with gr.Row(visible=False):
        stt=gr.Textbox()
    iface.load(load_mod,names,msg)
    upd = upd_btn.click(tts,[chatbot,names,length,noise,width,sen_pause],aud)
    sub_b = submit_b.click(generate, [prompt,chatbot],chatbot).then(tts,[chatbot,names,length,noise,width,sen_pause],aud)
    names_change=names.change(load_mod,names,msg)
    stop_b.click(None,None,None, cancels=[sub_b,names_change,upd])
iface.queue(default_concurrency_limit=20).launch(max_threads=40)