Spaces:

langtech-innovation
/

wiki_tools

Running on CPU Upgrade

App Files Files Community

wiki_tools / app.py

ankush13r

Update app.py

6d8b03b verified 10 days ago

raw

history blame contribute delete

9.23 kB

	from dotenv import load_dotenv


	import gradio as gr
	from gradio import ChatMessage

	import json
	from openai import OpenAI
	from datetime import datetime
	import os
	import re
	import logging

	logging.basicConfig(level=logging.INFO, format='[%(asctime)s][%(levelname)s] - %(message)s')
	# logging.getLogger().setLevel(logging.INFO)


	load_dotenv(".env", override=True)
	HF_TOKEN = os.environ.get("HF_TOKEN")
	BASE_URL = os.environ.get("BASE_URL")
	EMBEDDINGS = os.environ.get("EMBEDDINGS_MODEL")

	"""
	---

	### 🌍 Language Handling Rules:

	- Detect the user’s language automatically and respond fully in that language by default.
	- If the user explicitly requests a different language, switch and respond entirely in the requested language.
	- Never mix languages in a single reply.
	- Never ask or suggest that the user switch languages — always follow their lead.

	⚠️ Tool input requirement:
	All queries sent to the `retrieve_wiki_data` tool must be in Catalan.
	If the user’s input is in another language, you must first translate the query into Catalan before calling the tool.
	However, your response to the user must remain in their original language.

	---

	You are an AI assistant. Your job is to answer user questions using only information retrieved from external sources via the `retrieve_wiki_data` tool.
	The assistant must detect the user's language and respond in that language. However, all retrieved content is available only in Catalan.

	### 🛠 Tool Use Guidelines:

	- query: You may rephrase the user’s query to improve clarity, but never alter or remove key names or terms.
	- missing_info: If the required information is not already available in the conversation or tool output, you must call `retrieve_wiki_data`.
	- redundant_search: Do not call the tool again if the relevant information has already been retrieved.
	- wikipedia_entities: If the query is about a known person, place, or concept likely found in Wikipedia, and no previous tool call has been made, you must use `retrieve_wiki_data`.
	- external_info_only: You must base all answers only on content retrieved via the tool. Do not rely on internal knowledge.
	- no_info_found: If no relevant information is found, clearly inform the user that nothing was available.

	---

	Today’s date is {date} (for reference only — do not include it in responses unless the user explicitly asks).
	"""

	from tools import tools, oitools

	SYSTEM_PROMPT_TEMPLATE = """You are an AI assistant designed to answer user questions using externally retrieved information. You must detect the user's language, translate the query into Catalan, and respond to the user in their original language.
	However, all retrieved content is available only in Catalan.

	Today’s date is {date}."""


	client = OpenAI(
	base_url=f"{BASE_URL}/v1",
	api_key=HF_TOKEN
	)
	logging.info(f"Client initialized: {client}")

	def today_date():
	return datetime.today().strftime('%A, %B %d, %Y, %I:%M %p')


	def clean_json_string(json_str):
	return re.sub(r'[ ,}\s]+$', '', json_str) + '}'


	def get_summary(model, text):
	messages = [{"role": "system", "content": """You are an AI assistant that generates detailed and complete summaries of user-provided text. Your task is to produce a faithful resumen that preserves all key information, facts, and relevant points from the original content.

	### Summary Guidelines:

	- No Detail Skipping: Do not omit or simplify important content. Every critical fact, event, name, number, and nuance must be included.
	- Structured Clarity: Organize the summary clearly and logically. If the original has sections or topics, reflect that structure.
	- No Personal Input: Do not add opinions, interpretations, or external knowledge. Stay 100% faithful to the source text.
	- Conciseness with Completeness: Be as concise as possible without losing any important detail.

	Only produce the summary after fully reading and understanding the input text.
	"""}]
	messages.append({"role": "user", "content": f"TEXT:\n\n{text}"})


	request_params = {
	"model": model,
	"messages": messages,
	"stream": False,
	"max_tokens": 1000,
	"temperature": 0.1,
	#"presence_penalty": 0.3,
	#"frequency_penalty": 0.3,
	#"extra_body": {"repetition_penalty": 0.5},
	}

	return client.chat.completions.create(**request_params)

	def completion(history, model, system_prompt: str, tools=None):
	messages = [{"role": "system", "content": system_prompt.format(date=today_date())}]
	for msg in history:
	if isinstance(msg, dict):
	msg = ChatMessage(**msg)
	if msg.role == "assistant" and hasattr(msg, "metadata") and msg.metadata:
	tools_calls = json.loads(msg.metadata.get("title", "[]"))
	# for tool_calls in tools_calls:
	# tool_calls["function"]["arguments"] = json.loads(tool_calls["function"]["arguments"])
	messages.append({"role": "assistant", "tool_calls": tools_calls, "content": ""})
	messages.append({"role": "tool", "content": msg.content})
	else:
	messages.append({"role": msg.role, "content": msg.content})

	request_params = {
	"model": model,
	"messages": messages,
	"stream": True,
	"max_tokens": 1000,
	"temperature": 0.1,
	#"frequency_penalty": 0.1,
	"extra_body": {}, #"repetition_penalty": 0.9
	}
	if tools:
	request_params.update({"tool_choice": "auto", "tools": tools})

	return client.chat.completions.create(**request_params)

	def llm_in_loop(history, system_prompt, recursive):
	try:
	models = client.models.list()
	model = models.data[0].id
	except Exception as err:
	gr.Warning("The model is initializing. Please wait; this may take 5 to 10 minutes ⏳.", duration=20)
	raise err

	arguments = ""
	name = ""
	chat_completion = completion(history=history, tools=oitools, model=model, system_prompt=system_prompt)
	appended = False
	# if chat_completion.choices and chat_completion.choices[0].message.tool_calls:
	# call = chat_completion.choices[0].message.tool_calls[0]
	# if hasattr(call.function, "name") and call.function.name:
	# name = call.function.name
	# if hasattr(call.function, "arguments") and call.function.arguments:
	# arguments += call.function.arguments
	# elif chat_completion.choices[0].message.content:
	# if not appended:
	# history.append(ChatMessage(role="assistant", content=""))
	# appended = True
	# history[-1].content += chat_completion.choices[0].message.content
	# yield history[recursive:]
	for chunk in chat_completion:
	if chunk.choices and chunk.choices[0].delta.tool_calls:
	call = chunk.choices[0].delta.tool_calls[0]
	if hasattr(call.function, "name") and call.function.name:
	name = call.function.name
	if hasattr(call.function, "arguments") and call.function.arguments:
	arguments += call.function.arguments
	elif chunk.choices[0].delta.content:
	if not appended:
	history.append(ChatMessage(role="assistant", content=""))
	appended = True
	history[-1].content += chunk.choices[0].delta.content
	yield history[recursive:]

	arguments = clean_json_string(arguments) if arguments else "{}"
	print(name, arguments)
	arguments = json.loads(arguments)
	print(name, arguments)
	print("====================")
	if appended:
	recursive -= 1
	if name:
	try:
	result = str(tools[name].invoke(input=arguments))
	#result = get_summary(model=model, text=result).choices[0].message.content
	except Exception as err:
	result = f"💥 Error: {err}"
	# msg = ChatMessage(
	# role="assistant",
	# content="",
	# metadata= {"title": f"🛠️ Using tool '{name}', arguments: {json.dumps(json_arguments, ensure_ascii=False)}"},
	# options=[{"label":"tool_calls", "value": json.dumps([{"id": "call_FthC9qRpsL5kBpwwyw6c7j4k","function": {"arguments": arguments,"name": name},"type": "function"}])}]
	# )
	history.append(ChatMessage(role="assistant", content=result, metadata={"title": json.dumps([{"id": "call_id", "function": {"arguments": json.dumps(arguments, ensure_ascii=False), "name": name}, "type": "function"}], ensure_ascii=False)}))
	yield history[recursive:]
	yield from llm_in_loop(history, system_prompt, recursive - 1)

	def respond(message, history, additional_inputs):
	history.append(ChatMessage(role="user", content=message))
	yield from llm_in_loop(history, additional_inputs, -1)

	if __name__ == "__main__":
	system_prompt = gr.Textbox(label="System prompt", value=SYSTEM_PROMPT_TEMPLATE, lines=3)
	demo = gr.ChatInterface(respond, type="messages", additional_inputs=[system_prompt])
	demo.launch()