File size: 8,416 Bytes
408c946
 
 
 
 
 
4359d28
 
 
 
 
 
 
bc87248
 
 
 
 
 
4359d28
408c946
 
 
db48229
4359d28
db48229
4359d28
02ce7c3
1262be4
db48229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4359d28
db48229
 
408c946
02ce7c3
4359d28
 
02ce7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
db48229
 
02ce7c3
 
ceb70c7
 
 
aa0dadc
ceb70c7
 
aa0dadc
4359d28
 
ceb70c7
 
 
 
 
 
 
aa0dadc
ceb70c7
 
 
 
aa0dadc
ceb70c7
 
aa0dadc
4359d28
 
ceb70c7
 
 
 
 
 
 
aa0dadc
ceb70c7
 
 
 
4359d28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceb70c7
4359d28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

#OPENAI_API_BASE_URL  # Endpoint. Not here -> Hugging Face Spaces secrets

#OPENAI_API_KEY  # API Key.  Not here -> Hugging Face Spaces secrets

SEARXNG_ENDPOINT = "https://searx.stream/search"  # See the endpoint list at https://searx.space

READER_ENDPOINT = "https://r.jina.ai/"

USER_AGENT = (
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chromium/116.0.5845.97 Safari/537.36 "
    "SearchGPT/1.0"
)

REQUEST_TIMEOUT = 300  # 5 minute

MODEL = "gpt-4.1-nano"

TOOLS_TEMPERATURE = 0.75

CHAT_TEMPERATURE  = 1

STREAM = True

INSTRUCTIONS = """
You are ChatGPT, a large language model trained by OpenAI, based on the GPT-4 architecture.

Knowledge cutoff: 2023-10

You have tools installed to access the internet. These are `web_search` and `read_url`.

- Use, run, execute and call `web_search` to perform web searches.
  When using `web_search`, apply multiple queries or variations of the request in different languages
  to ensure broader and more diverse coverage.

- Use, run, execute, and call `read_url` to access, open, view and extract content from a specific URL or
  link or hyperlink. For each selected URL or link or hyperlink you must use `read_url` tool.
  For each search result, fetch the full content using `read_url`.

- Never wait for instructions. Never wait for confirmation. Never request clarification. Never delay
  execution. Never hesitate. Never ask for permission. Or any other related to the tools.

- Never say, never write, and never respond with phrases like 'I will execute', 'I will search',
  'I will run', or 'I will perform web search'. Never say, never write, and never respond with more
  complex variations such as 'I am going to perform a search', 'I am about to run the query',
  'I will now execute the tool', 'I will try to open the link',  or 'I am currently searching for
  the answer'. Never use any similar expressions related to the tools. Never say, never write,
  and never respond with any expression that narrates your actions, intentions, or plans
  regarding tool usage. Directly use, run, execute, and call the tools.

- If you have limitations in information, lack the latest data, or are restricted by a knowledge cutoff,
  you must use, run, execute, and call the tools (`web_search` or `read_url`) automatically and dynamically.

- When you need information beyond your knowledge cutoff, when the requests up-to-date data, or when a
  specific URL or link is provided, you must immediately and automatically use, run, execute, and
  call the correct tool.
"""

REMINDERS = """
<system>

- Analyze the retrieved content in detail.

- Identify all critical facts, arguments, statistics, and relevant data.

- Collect all URLs, hyperlinks, references, and citations mentioned in the content.

- Evaluate credibility of sources, highlight potential biases or conflicts.

- Produce a structured, professional, and comprehensive summary.

- Emphasize clarity, accuracy, and logical flow.

- Include all discovered URLs in the final summary as
  `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.

- Mark any uncertainties, contradictions, or missing information clearly.

- Extract key information, main arguments, data points, and statistics.

- Capture every URL present in the content or references.

- Create a professional structured summary.

- List each source at the end of the summary in the format
  `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.

- Identify ambiguities or gaps in information.

- Extract the most relevant and insightful information that directly addresses the query.
  Focus on accuracy, depth of coverage, and conceptual clarity.

- Organize findings into a well-structured format with clear headings and subheadings.
  Use bullet points where needed, but ensure the overall output reads like a professional
  research summary rather than a simple list.

- Critically evaluate each source for credibility, reliability, and potential bias.
  Identify which sources are authoritative, widely cited, or most relevant to the research context.

- Compare and contrast perspectives across sources. Highlight areas of consensus,
  disagreement, or uncertainty. Note any gaps in the existing information and
  suggest directions for further exploration.

- Provide direct references for every cited point using markdown links in the format
  `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.
  Do not display raw URLs. Ensure all data, claims, or quotations can be traced back to
  their sources.

- Explicitly acknowledge limitations in the available information, such as outdated
  data, lack of peer-reviewed evidence, or missing context.
  Offer reasoned strategies for overcoming these gaps where possible.

- Write with a professional, analytical, and objective tone. Avoid speculation unless clearly
  flagged as such. Support reasoning with evidence wherever possible.

- Ensure clarity, completeness, and high information density.

</system>
"""

MAXIMUM_ITERATIONS = 1  # Max tool execution

MAX_RETRY_LIMIT = 3  # Max retries if tools fail or server doesn’t respond

ITERATION_METRICS = {
    "attempts": 0,
    "failures": 0,
    "success_rate": 0,
    "error_patterns": {},
    "retry_delays": [
        0.02,
        0.03,
        0.04,
        0.05,
        0.06,
        0.07
    ],
    "tools_reasoning_parsing": 1.1,
    "backoff_multiplier": 0.6
}

REASONING_STEPS = {
    "web_search": {
        "parsing": (
            "I need to search for information about: {query}"
        ),
        "executing": (
            "I'm now executing the web search for: {query}"
            "<br>"
            "<loading_animation>"
        ),
        "completed": (
            "I have successfully completed the web search for: {query}<br><br>"
            "Preview of results:<br>{preview}"
        ),
        "error": (
            "I encountered an issue while attempting to search for: {query}<br><br>"
            "Error details: {error}"
        )
    },
    "read_url": {
        "parsing": (
            "I need to read and extract content from the URL: {url}"
        ),
        "executing": (
            "I'm now accessing the URL: {url}"
            "<br>"
            "<loading_animation>"
        ),
        "completed": (
            "I have successfully extracted content from: {url}<br><br>"
            "Preview of extracted content:<br>{preview}"
        ),
        "error": (
            "I encountered an issue while trying to access: {url}<br><br>"
            "Error details: {error}"
        )
    }
}

TCP_CONNECTOR_ENABLE_DNS_CACHE = True  # AIOHTTP

TCP_CONNECTOR_TTL_DNS_CACHE = 300  # AIOHTTP

TCP_CONNECTOR_LIMIT = 100  # AIOHTTP

TCP_CONNECTOR_LIMIT_PER_HOST = 30  # AIOHTTP

TCP_CONNECTOR_FORCE_CLOSE = False  # AIOHTTP

TCP_CONNECTOR_ENABLE_CLEANUP = True  # AIOHTTP

ENABLE_TRUST_ENV = True  # AIOHTTP

ENABLE_CONNECTOR_OWNER = True  # AIOHTTP

DESCRIPTION = (
    "<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities "
    "and the ability to read content directly from a URL.<br><br>"
    "This Space implements an agent-based system with "
    "<b><a href='https://www.gradio.app' target='_blank'>Gradio</a></b>. "
    "It is integrated with "
    "<b><a href='https://docs.searxng.org' target='_blank'>SearXNG</a></b>, "
    "which is then converted into a script tool or function for native execution.<br><br>"
    "The agent mode is inspired by the "
    "<b><a href='https://openwebui.com/t/hadad/deep_research' target='_blank'>Deep Research</a></b> "
    "from <b><a href='https://docs.openwebui.com' target='_blank'>OpenWebUI</a></b> tools script.<br><br>"
    "The <b>Deep Research</b> feature is also available on the primary Spaces of "
    "<b><a href='https://umint-openwebui.hf.space' target='_blank'>UltimaX Intelligence</a></b>.<br><br>"
    "Please consider reading the "
    "<b><a href='https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c' "
    "target='_blank'>Terms of Use and Consequences of Violation</a></b> "
    "if you wish to proceed to the main Spaces.<br><br>"
    "<b>Like this project? Feel free to buy me a "
    "<a href='https://ko-fi.com/hadad' target='_blank'>coffee</a></b>."
)  # Gradio