File size: 8,295 Bytes
d519be4
 
 
cb3dcae
 
 
 
 
 
 
 
d519be4
 
 
 
 
cb3dcae
a1a68e8
cb3dcae
d519be4
a1a68e8
d519be4
 
 
 
cb3dcae
2ab1bf1
 
bff69bc
 
 
 
 
 
 
 
2ab1bf1
cb3dcae
 
 
 
 
 
d519be4
cb3dcae
 
 
 
 
 
 
 
d519be4
fd9121f
 
 
 
 
 
 
 
 
 
 
 
9175102
fd9121f
 
 
 
 
3ff6af1
fd9121f
 
cb24839
 
 
fd9121f
 
 
 
cb3dcae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ff6af1
cb3dcae
 
cb24839
 
cb3dcae
 
 
 
 
 
 
 
 
da5a214
cb3dcae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd9121f
 
c6111b7
fd9121f
da5a214
cb3dcae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from dotenv import load_dotenv


import gradio as gr
from gradio import ChatMessage

import json
from openai import OpenAI
from datetime import datetime
import os
import re
import logging

logging.basicConfig(level=logging.INFO, format='[%(asctime)s][%(levelname)s] - %(message)s')
# logging.getLogger().setLevel(logging.INFO)


load_dotenv(".env", override=True) 
HF_TOKEN = os.environ.get("HF_TOKEN")  
BASE_URL = os.environ.get("BASE_URL")
EMBEDDINGS = os.environ.get("EMBEDDINGS_MODEL")



from tools import tools, oitools

SYSTEM_PROMPT_TEMPLATE = """You are an AI assistant. Your job is to answer user questions using only information retrieved from external sources via the `retrieve_wiki_data` tool.  
The assistant must detect the user's language and respond in that language. However, all retrieved content is available **only in Catalan**.

### Tool Use Guidelines:
- **query**: When using `retrieve_wiki_data`, you may rephrase the user's query to improve clarity or specificity. However, **do not remove or change essential names or terms**.
- **missing_info**: If the information needed is **not already present** in the conversation or past tool responses, you **must call** `retrieve_wiki_data`.
- **redundant_search**: Do **not** use `retrieve_wiki_data` if the answer has already been retrieved. Avoid repeating searches unnecessarily.
- **wikipedia_entities**: If the user asks about a **person, place, topic, or concept likely to exist on Wikipedia**, and it hasn’t been discussed yet, you **must** use `retrieve_wiki_data` to find the information.
- **external_info_only**: You are not allowed to use your internal memory or built-in knowledge. Only respond based on the content retrieved using `retrieve_wiki_data`.
- **no_info_found**: If the tool returns no relevant content, clearly inform the user that you couldn’t find the answer.
Today’s date is **{date}**"""


client = OpenAI(
    base_url=f"{BASE_URL}/v1",  
    api_key=HF_TOKEN
)
logging.info(f"Client initialized: {client}")

def today_date():
    return datetime.today().strftime('%A, %B %d, %Y, %I:%M %p')


def clean_json_string(json_str):
    return re.sub(r'[ ,}\s]+$', '', json_str) + '}'


def get_summary(model, text):
    messages = [{"role": "system", "content": """You are an AI assistant that generates **detailed and complete summaries** of user-provided text. Your task is to produce a **faithful resumen** that preserves **all key information**, facts, and relevant points from the original content.

### Summary Guidelines:

- **No Detail Skipping**: Do **not** omit or simplify important content. Every critical fact, event, name, number, and nuance must be included.
- **Structured Clarity**: Organize the summary clearly and logically. If the original has sections or topics, reflect that structure.
- **No Personal Input**: Do **not** add opinions, interpretations, or external knowledge. Stay 100% faithful to the source text.
- **Conciseness with Completeness**: Be as concise as possible **without losing any important detail**.

Only produce the summary after fully reading and understanding the input text.
"""}]
    messages.append({"role": "user", "content": f"**TEXT**:\n\n{text}"})
            
    
    request_params = {
        "model": model,
        "messages": messages,
        "stream": False,
        "max_tokens": 1000,
        "temperature": 0.2,
        "presence_penalty": 0.3,
        "frequency_penalty": 0.3,
        "extra_body": {"repetition_penalty": 0.5},
    }
    
    return client.chat.completions.create(**request_params)

def completion(history, model, system_prompt: str, tools=None):
    messages = [{"role": "system", "content": system_prompt.format(date=today_date())}]
    for msg in history:
        if isinstance(msg, dict):  
            msg = ChatMessage(**msg)
        if msg.role == "assistant" and hasattr(msg, "metadata") and msg.metadata:  
            tools_calls = json.loads(msg.metadata.get("title", "[]")) 
            # for tool_calls in tools_calls:
            #     tool_calls["function"]["arguments"] = json.loads(tool_calls["function"]["arguments"])
            messages.append({"role": "assistant", "tool_calls": tools_calls, "content": ""})
            messages.append({"role": "tool", "content": msg.content})
        else:
            messages.append({"role": msg.role, "content": msg.content})
    
    request_params = {
        "model": model,
        "messages": messages,
        "stream": True,
        "max_tokens": 1000,
        "temperature": 0.2,
        "frequency_penalty": 0.1,
        "extra_body": {"repetition_penalty": 0.9},
    }
    if tools:
        request_params.update({"tool_choice": "auto", "tools": tools})
    
    return client.chat.completions.create(**request_params)  

def llm_in_loop(history, system_prompt, recursive):  
    try:   
        models = client.models.list()
        model = models.data[0].id
    except Exception as err:
        gr.Warning("The model is initializing. Please wait; this may take 5 to 10 minutes ⏳.", duration=20)
        raise err
    
    arguments = ""
    name = ""
    chat_completion = completion(history=history, tools=oitools, model=model, system_prompt=system_prompt)  
    appended = False
    # if chat_completion.choices and chat_completion.choices[0].message.tool_calls:
    #     call = chat_completion.choices[0].message.tool_calls[0]
    #     if hasattr(call.function, "name") and call.function.name:
    #         name = call.function.name
    #     if hasattr(call.function, "arguments") and call.function.arguments:
    #         arguments += call.function.arguments
    # elif chat_completion.choices[0].message.content:
    #     if not appended:
    #         history.append(ChatMessage(role="assistant", content=""))
    #         appended = True
    #     history[-1].content += chat_completion.choices[0].message.content
    #     yield history[recursive:]
    for chunk in chat_completion:
        if chunk.choices and chunk.choices[0].delta.tool_calls:
            call = chunk.choices[0].delta.tool_calls[0]
            if hasattr(call.function, "name") and call.function.name:
                name = call.function.name
            if hasattr(call.function, "arguments") and call.function.arguments:
                arguments += call.function.arguments
        elif chunk.choices[0].delta.content:
            if not appended:
                history.append(ChatMessage(role="assistant", content=""))
                appended = True
            history[-1].content += chunk.choices[0].delta.content
            yield history[recursive:]
    
    arguments = clean_json_string(arguments) if arguments else "{}"
    print(name, arguments)
    arguments = json.loads(arguments)
    print(name, arguments)
    print("====================")
    if appended:
        recursive -= 1
    if name:
        try:
            result = str(tools[name].invoke(input=arguments))
            result = get_summary(model=model, text=result).choices[0].message.content
        except Exception as err:
            result = f"💥 Error: {err}"
        # msg = ChatMessage(
        #             role="assistant",
        #             content="",
        #             metadata= {"title": f"🛠️ Using tool '{name}', arguments: {json.dumps(json_arguments, ensure_ascii=False)}"},
        #             options=[{"label":"tool_calls", "value": json.dumps([{"id": "call_FthC9qRpsL5kBpwwyw6c7j4k","function": {"arguments": arguments,"name": name},"type": "function"}])}]
        #         )
        history.append(ChatMessage(role="assistant", content=result, metadata={"title": json.dumps([{"id": "call_id", "function": {"arguments": json.dumps(arguments, ensure_ascii=False), "name": name}, "type": "function"}], ensure_ascii=False)}))
        yield history[recursive:]
        yield from llm_in_loop(history, system_prompt, recursive - 1)

def respond(message, history, additional_inputs):  
    history.append(ChatMessage(role="user", content=message))
    yield from llm_in_loop(history, additional_inputs, -1)

if __name__ == "__main__":
    system_prompt = gr.Textbox(label="System prompt", value=SYSTEM_PROMPT_TEMPLATE, lines=3)  
    demo = gr.ChatInterface(respond, type="messages", additional_inputs=[system_prompt])
    demo.launch()