from huggingface_hub import InferenceClient
import gradio as gr
import random
import prompts
from pypipertts import PyPiper
import codecs
pp=PyPiper()
#client = InferenceClient("Qwen/QwQ-32B-Preview")
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
def format_prompt(message, history):
prompt=""
if history:
print(history)
prompt = ""
for user_prompt, bot_response in history:
#print (bot_response)
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response} "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt,history,role="ASSISTANT"):
if not history:
history=[]
seed = random.randint(1,9999999999999)
print(seed)
if role == "ASSISTANT":
system_prompt = prompts.ASSISTANT
elif role == "RESEARCHER":
system_prompt = prompts.RESEARCHER
else: system_prompt = ""
generate_kwargs = dict(
temperature=0.9,
max_new_tokens=512,
top_p=0.95,
repetition_penalty=1.0,
do_sample=True,
seed=seed,
)
formatted_prompt = format_prompt(f"{system_prompt}\nUSER:{prompt}", history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
buf = ""
for response in stream:
output += response.token.text
'''buf += response.token.text
if "\n" in buf or ". " in buf or "" in buf:
yield [(prompt,output)]'''
yield [(prompt,output)]
def load_mod(model):
yield f"Loading: {model}"
pp.load_mod(model)
yield f"Voice Loaded: {model}"
def tts(inp,names,length,noise,width,sen_pause):
#print(inp)
print(inp[-1][1])
txt = inp[-1][1].strip("")
#decoded_text = codecs.decode(txt, "unicode_escape")
yield from pp.stream_tts([txt],names,length,noise,width,sen_pause)
with gr.Blocks() as iface:
gr.HTML("""