Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -81,7 +81,7 @@ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
|
81 |
|
82 |
@dataclass
|
83 |
class ConversationConfig:
|
84 |
-
max_words: int =
|
85 |
prefix_url: str = "https://r.jina.ai/"
|
86 |
api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
|
87 |
legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
|
@@ -89,10 +89,10 @@ class ConversationConfig:
|
|
89 |
local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
90 |
local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
|
91 |
# ํ ํฐ ์ ์ฆ๊ฐ
|
92 |
-
max_tokens: int =
|
93 |
-
max_new_tokens: int =
|
94 |
-
min_conversation_turns: int =
|
95 |
-
max_conversation_turns: int =
|
96 |
|
97 |
|
98 |
def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
|
@@ -168,46 +168,240 @@ def extract_keywords_for_search(text: str, language: str = "English") -> List[st
|
|
168 |
return []
|
169 |
|
170 |
def search_and_compile_content(keyword: str, language: str = "English") -> str:
|
171 |
-
"""ํค์๋๋ก ๊ฒ์ํ์ฌ ์ฝํ
์ธ ์ปดํ์ผ"""
|
172 |
if not BRAVE_KEY:
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
-
# ์ธ์ด์ ๋ฐ๋ฅธ ๊ฒ์ ์ฟผ๋ฆฌ
|
176 |
if language == "Korean":
|
177 |
queries = [
|
178 |
-
f"{keyword} ์ต์ ๋ด์ค",
|
179 |
-
f"{keyword} ์ ๋ณด",
|
180 |
-
f"{keyword} ํธ๋ ๋
|
|
|
|
|
|
|
181 |
]
|
182 |
else:
|
183 |
queries = [
|
184 |
-
f"{keyword} latest news",
|
185 |
-
f"{keyword} explained",
|
186 |
-
f"{keyword} trends
|
|
|
|
|
|
|
187 |
]
|
188 |
|
189 |
all_content = []
|
|
|
190 |
|
191 |
for query in queries:
|
192 |
-
results = brave_search(query, count=
|
193 |
-
for r in results[:
|
194 |
-
content = f"**{r['title']}**\n{r['snippet']}\n"
|
195 |
all_content.append(content)
|
|
|
196 |
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
# ์ปดํ์ผ๋ ์ฝํ
์ธ ๋ฐํ
|
201 |
compiled = "\n\n".join(all_content)
|
202 |
|
203 |
-
# ํค์๋ ๊ธฐ๋ฐ ์๊ฐ
|
204 |
if language == "Korean":
|
205 |
-
intro = f"'{keyword}'์ ๋ํ
|
206 |
else:
|
207 |
-
intro = f"
|
208 |
|
209 |
return intro + compiled
|
210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
class UnifiedAudioConverter:
|
212 |
def __init__(self, config: ConversationConfig):
|
213 |
self.config = config
|
|
|
81 |
|
82 |
@dataclass
|
83 |
class ConversationConfig:
|
84 |
+
max_words: int = 8000 # 4000์์ 6000์ผ๋ก ์ฆ๊ฐ (1.5๋ฐฐ)
|
85 |
prefix_url: str = "https://r.jina.ai/"
|
86 |
api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
|
87 |
legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
|
|
|
89 |
local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
90 |
local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
|
91 |
# ํ ํฐ ์ ์ฆ๊ฐ
|
92 |
+
max_tokens: int = 6000 # 3000์์ 4500์ผ๋ก ์ฆ๊ฐ (1.5๋ฐฐ)
|
93 |
+
max_new_tokens: int = 12000 # 6000์์ 9000์ผ๋ก ์ฆ๊ฐ (1.5๋ฐฐ)
|
94 |
+
min_conversation_turns: int = 18 # ์ต์ ๋ํ ํด ์
|
95 |
+
max_conversation_turns: int = 20 # ์ต๋ ๋ํ ํด ์
|
96 |
|
97 |
|
98 |
def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
|
|
|
168 |
return []
|
169 |
|
170 |
def search_and_compile_content(keyword: str, language: str = "English") -> str:
|
171 |
+
"""ํค์๋๋ก ๊ฒ์ํ์ฌ ์ถฉ๋ถํ ์ฝํ
์ธ ์ปดํ์ผ"""
|
172 |
if not BRAVE_KEY:
|
173 |
+
# API ์์ ๋๋ ๊ธฐ๋ณธ ์ฝํ
์ธ ์์ฑ
|
174 |
+
if language == "Korean":
|
175 |
+
return f"""
|
176 |
+
'{keyword}'์ ๋ํ ์ข
ํฉ์ ์ธ ์ ๋ณด:
|
177 |
+
|
178 |
+
{keyword}๋ ํ๋ ์ฌํ์์ ๋งค์ฐ ์ค์ํ ์ฃผ์ ์
๋๋ค.
|
179 |
+
์ด ์ฃผ์ ๋ ๋ค์ํ ์ธก๋ฉด์์ ์ฐ๋ฆฌ์ ์ถ์ ์ํฅ์ ๋ฏธ์น๊ณ ์์ผ๋ฉฐ,
|
180 |
+
์ต๊ทผ ๋ค์ด ๋์ฑ ์ฃผ๋ชฉ๋ฐ๊ณ ์์ต๋๋ค.
|
181 |
+
|
182 |
+
์ฃผ์ ํน์ง:
|
183 |
+
1. ๊ธฐ์ ์ ๋ฐ์ ๊ณผ ํ์
|
184 |
+
2. ์ฌํ์ ์ํฅ๊ณผ ๋ณํ
|
185 |
+
3. ๋ฏธ๋ ์ ๋ง๊ณผ ๊ฐ๋ฅ์ฑ
|
186 |
+
4. ์ค์ฉ์ ํ์ฉ ๋ฐฉ์
|
187 |
+
5. ๊ธ๋ก๋ฒ ํธ๋ ๋์ ๋ํฅ
|
188 |
+
|
189 |
+
์ ๋ฌธ๊ฐ๋ค์ {keyword}๊ฐ ์์ผ๋ก ๋์ฑ ์ค์ํด์ง ๊ฒ์ผ๋ก ์์ํ๊ณ ์์ผ๋ฉฐ,
|
190 |
+
์ด์ ๋ํ ๊น์ด ์๋ ์ดํด๊ฐ ํ์ํ ์์ ์
๋๋ค.
|
191 |
+
"""
|
192 |
+
else:
|
193 |
+
return f"""
|
194 |
+
Comprehensive information about '{keyword}':
|
195 |
+
|
196 |
+
{keyword} is a significant topic in modern society.
|
197 |
+
This subject impacts our lives in various ways and has been
|
198 |
+
gaining increasing attention recently.
|
199 |
+
|
200 |
+
Key aspects:
|
201 |
+
1. Technological advancement and innovation
|
202 |
+
2. Social impact and changes
|
203 |
+
3. Future prospects and possibilities
|
204 |
+
4. Practical applications
|
205 |
+
5. Global trends and developments
|
206 |
+
|
207 |
+
Experts predict that {keyword} will become even more important,
|
208 |
+
and it's crucial to develop a deep understanding of this topic.
|
209 |
+
"""
|
210 |
|
211 |
+
# ์ธ์ด์ ๋ฐ๋ฅธ ๋ค์ํ ๊ฒ์ ์ฟผ๋ฆฌ
|
212 |
if language == "Korean":
|
213 |
queries = [
|
214 |
+
f"{keyword} ์ต์ ๋ด์ค 2024",
|
215 |
+
f"{keyword} ์ ๋ณด ์ค๋ช
",
|
216 |
+
f"{keyword} ํธ๋ ๋ ์ ๋ง",
|
217 |
+
f"{keyword} ์ฅ์ ๋จ์ ",
|
218 |
+
f"{keyword} ํ์ฉ ๋ฐฉ๋ฒ",
|
219 |
+
f"{keyword} ์ ๋ฌธ๊ฐ ์๊ฒฌ"
|
220 |
]
|
221 |
else:
|
222 |
queries = [
|
223 |
+
f"{keyword} latest news 2024",
|
224 |
+
f"{keyword} explained comprehensive",
|
225 |
+
f"{keyword} trends forecast",
|
226 |
+
f"{keyword} advantages disadvantages",
|
227 |
+
f"{keyword} how to use",
|
228 |
+
f"{keyword} expert opinions"
|
229 |
]
|
230 |
|
231 |
all_content = []
|
232 |
+
total_content_length = 0
|
233 |
|
234 |
for query in queries:
|
235 |
+
results = brave_search(query, count=5) # ๋ ๋ง์ ๊ฒฐ๊ณผ ๊ฐ์ ธ์ค๊ธฐ
|
236 |
+
for r in results[:3]: # ๊ฐ ์ฟผ๋ฆฌ๋น ์์ 3๊ฐ
|
237 |
+
content = f"**{r['title']}**\n{r['snippet']}\nSource: {r['host']}\n"
|
238 |
all_content.append(content)
|
239 |
+
total_content_length += len(r['snippet'])
|
240 |
|
241 |
+
# ์ฝํ
์ธ ๊ฐ ๋ถ์กฑํ๋ฉด ์ถ๊ฐ ์์ฑ
|
242 |
+
if total_content_length < 1000: # ์ต์ 1000์ ํ๋ณด
|
243 |
+
if language == "Korean":
|
244 |
+
additional_content = f"""
|
245 |
+
์ถ๊ฐ ์ ๋ณด:
|
246 |
+
{keyword}์ ๊ด๋ จ๋ ์ต๊ทผ ๋ํฅ์ ์ดํด๋ณด๋ฉด, ์ด ๋ถ์ผ๋ ๋น ๋ฅด๊ฒ ๋ฐ์ ํ๊ณ ์์ต๋๋ค.
|
247 |
+
๋ง์ ์ ๋ฌธ๊ฐ๋ค์ด ์ด ์ฃผ์ ์ ๋ํด ํ๋ฐํ ์ฐ๊ตฌํ๊ณ ์์ผ๋ฉฐ,
|
248 |
+
์ค์ํ์์์ ์์ฉ ๊ฐ๋ฅ์ฑ๋ ๊ณ์ ํ๋๋๊ณ ์์ต๋๋ค.
|
249 |
+
|
250 |
+
๏ฟฝ๏ฟฝ๏ฟฝํ ์ฃผ๋ชฉํ ์ ์:
|
251 |
+
- ๊ธฐ์ ํ์ ์ ๊ฐ์ํ
|
252 |
+
- ์ฌ์ฉ์ ๊ฒฝํ์ ๊ฐ์
|
253 |
+
- ์ ๊ทผ์ฑ์ ํฅ์
|
254 |
+
- ๋น์ฉ ํจ์จ์ฑ ์ฆ๋
|
255 |
+
- ๊ธ๋ก๋ฒ ์์ฅ์ ์ฑ์ฅ
|
256 |
+
|
257 |
+
์ด๋ฌํ ์์๋ค์ด {keyword}์ ๋ฏธ๋๋ฅผ ๋์ฑ ๋ฐ๊ฒ ๋ง๋ค๊ณ ์์ต๋๋ค.
|
258 |
+
"""
|
259 |
+
else:
|
260 |
+
additional_content = f"""
|
261 |
+
Additional insights:
|
262 |
+
Recent developments in {keyword} show rapid advancement in this field.
|
263 |
+
Many experts are actively researching this topic, and its practical
|
264 |
+
applications continue to expand.
|
265 |
+
|
266 |
+
Key points to note:
|
267 |
+
- Accelerating technological innovation
|
268 |
+
- Improving user experience
|
269 |
+
- Enhanced accessibility
|
270 |
+
- Increased cost efficiency
|
271 |
+
- Growing global market
|
272 |
+
|
273 |
+
These factors are making the future of {keyword} increasingly promising.
|
274 |
+
"""
|
275 |
+
all_content.append(additional_content)
|
276 |
|
277 |
# ์ปดํ์ผ๋ ์ฝํ
์ธ ๋ฐํ
|
278 |
compiled = "\n\n".join(all_content)
|
279 |
|
280 |
+
# ํค์๋ ๊ธฐ๋ฐ ์๊ฐ
|
281 |
if language == "Korean":
|
282 |
+
intro = f"### '{keyword}'์ ๋ํ ์ข
ํฉ์ ์ธ ์ ๋ณด์ ์ต์ ๋ํฅ:\n\n"
|
283 |
else:
|
284 |
+
intro = f"### Comprehensive information and latest trends about '{keyword}':\n\n"
|
285 |
|
286 |
return intro + compiled
|
287 |
|
288 |
+
|
289 |
+
def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
|
290 |
+
"""Build prompt for conversation generation with enhanced radio talk show style"""
|
291 |
+
# ํ
์คํธ ๊ธธ์ด ์ ํ
|
292 |
+
max_text_length = 4500 if search_context else 6000
|
293 |
+
if len(text) > max_text_length:
|
294 |
+
text = text[:max_text_length] + "..."
|
295 |
+
|
296 |
+
if language == "Korean":
|
297 |
+
# ๋ํ ํ
ํ๋ฆฟ์ ๋ ๋ง์ ํด์ผ๋ก ํ์ฅ (15-20ํ)
|
298 |
+
template = """
|
299 |
+
{
|
300 |
+
"conversation": [
|
301 |
+
{"speaker": "์ค์", "text": ""},
|
302 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
303 |
+
{"speaker": "์ค์", "text": ""},
|
304 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
305 |
+
{"speaker": "์ค์", "text": ""},
|
306 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
307 |
+
{"speaker": "์ค์", "text": ""},
|
308 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
309 |
+
{"speaker": "์ค์", "text": ""},
|
310 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
311 |
+
{"speaker": "์ค์", "text": ""},
|
312 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
313 |
+
{"speaker": "์ค์", "text": ""},
|
314 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
315 |
+
{"speaker": "์ค์", "text": ""},
|
316 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
317 |
+
{"speaker": "์ค์", "text": ""},
|
318 |
+
{"speaker": "๋ฏผํธ", "text": ""}
|
319 |
+
]
|
320 |
+
}
|
321 |
+
"""
|
322 |
+
|
323 |
+
context_part = ""
|
324 |
+
if search_context:
|
325 |
+
context_part = f"# ์ต์ ๊ด๋ จ ์ ๋ณด:\n{search_context}\n"
|
326 |
+
|
327 |
+
base_prompt = (
|
328 |
+
f"# ์๋ณธ ์ฝํ
์ธ :\n{text}\n\n"
|
329 |
+
f"{context_part}"
|
330 |
+
f"์ ๋ด์ฉ์ผ๋ก 30์ค ์ด์์ ๋ผ๋์ค ๋๋ด ํ๋ก๊ทธ๋จ ๋๋ณธ์ ์์ฑํด์ฃผ์ธ์.\n\n"
|
331 |
+
f"## ํ์ ์๊ตฌ์ฌํญ:\n"
|
332 |
+
f"1. **์ต์ 18ํ ์ด์์ ๋ํ ๊ตํ** (์ค์ 9ํ, ๋ฏผํธ 9ํ ์ด์)\n"
|
333 |
+
f"2. **๋ํ ์คํ์ผ**: ์ค์ ๋ผ๋์ค ๋๋ด์ฒ๋ผ ์์ฃผ ์์ฐ์ค๋ฝ๊ณ ํธ์ํ ๊ตฌ์ด์ฒด\n"
|
334 |
+
f"3. **ํ์ ์ญํ **:\n"
|
335 |
+
f" - ์ค์: ์งํ์ (์งง์ ์ง๋ฌธ, ๋ฆฌ์ก์
, ํ์ ์ ํ)\n"
|
336 |
+
f" - ๋ฏผํธ: ์ ๋ฌธ๊ฐ (๊ฐ๊ฒฐํ ์ค๋ช
, ์์ ์ ๊ณต)\n"
|
337 |
+
f"4. **๋ํ ํจํด**:\n"
|
338 |
+
f" - ์ค์: \"๊ทธ๋ ๊ตฐ์\", \"ํฅ๋ฏธ๋กญ๋ค์\", \"๋ ์์ธํ ์ค๋ช
ํด์ฃผ์๊ฒ ์ด์?\"\n"
|
339 |
+
f" - ๋ฏผํธ: 1-2๋ฌธ์ฅ์ผ๋ก ํต์ฌ ์ค๋ช
\n"
|
340 |
+
f" - ์์ฐ์ค๋ฌ์ด ์ถ์์ ์ฌ์ฉ\n"
|
341 |
+
f"5. **๋ด์ฉ ๊ตฌ์ฑ**:\n"
|
342 |
+
f" - ๋์
๋ถ (2-3ํ): ์ฃผ์ ์๊ฐ\n"
|
343 |
+
f" - ์ ๊ฐ๋ถ (10-12ํ): ํต์ฌ ๋ด์ฉ ์ค๋ช
\n"
|
344 |
+
f" - ๋ง๋ฌด๋ฆฌ (3-4ํ): ์์ฝ ๋ฐ ์ ๋ฆฌ\n"
|
345 |
+
f"6. **ํ์**: ์๋ก ์กด๋๋ง ์ฌ์ฉ\n\n"
|
346 |
+
f"๋ฐ๋์ ์ JSON ํ์์ผ๋ก 18ํ ์ด์์ ๋ํ๋ฅผ ์์ฑํ์ธ์:\n{template}"
|
347 |
+
)
|
348 |
+
|
349 |
+
return base_prompt
|
350 |
+
|
351 |
+
else:
|
352 |
+
# ์์ด ํ
ํ๋ฆฟ๋ ํ์ฅ
|
353 |
+
template = """
|
354 |
+
{
|
355 |
+
"conversation": [
|
356 |
+
{"speaker": "Alex", "text": ""},
|
357 |
+
{"speaker": "Jordan", "text": ""},
|
358 |
+
{"speaker": "Alex", "text": ""},
|
359 |
+
{"speaker": "Jordan", "text": ""},
|
360 |
+
{"speaker": "Alex", "text": ""},
|
361 |
+
{"speaker": "Jordan", "text": ""},
|
362 |
+
{"speaker": "Alex", "text": ""},
|
363 |
+
{"speaker": "Jordan", "text": ""},
|
364 |
+
{"speaker": "Alex", "text": ""},
|
365 |
+
{"speaker": "Jordan", "text": ""},
|
366 |
+
{"speaker": "Alex", "text": ""},
|
367 |
+
{"speaker": "Jordan", "text": ""},
|
368 |
+
{"speaker": "Alex", "text": ""},
|
369 |
+
{"speaker": "Jordan", "text": ""},
|
370 |
+
{"speaker": "Alex", "text": ""},
|
371 |
+
{"speaker": "Jordan", "text": ""},
|
372 |
+
{"speaker": "Alex", "text": ""},
|
373 |
+
{"speaker": "Jordan", "text": ""}
|
374 |
+
]
|
375 |
+
}
|
376 |
+
"""
|
377 |
+
|
378 |
+
context_part = ""
|
379 |
+
if search_context:
|
380 |
+
context_part = f"# Latest Information:\n{search_context}\n"
|
381 |
+
|
382 |
+
base_prompt = (
|
383 |
+
f"# Content:\n{text}\n\n"
|
384 |
+
f"{context_part}"
|
385 |
+
f"Create a radio talk show conversation with at least 30 lines.\n\n"
|
386 |
+
f"## Requirements:\n"
|
387 |
+
f"1. **Minimum 18 conversation exchanges** (Alex 9+, Jordan 9+)\n"
|
388 |
+
f"2. **Style**: Natural radio talk show conversation\n"
|
389 |
+
f"3. **Roles**:\n"
|
390 |
+
f" - Alex: Host (short questions, reactions, transitions)\n"
|
391 |
+
f" - Jordan: Expert (concise explanations, examples)\n"
|
392 |
+
f"4. **Pattern**:\n"
|
393 |
+
f" - Alex: \"I see\", \"Fascinating\", \"Could you elaborate?\"\n"
|
394 |
+
f" - Jordan: 1-2 sentence explanations\n"
|
395 |
+
f" - Natural speech fillers\n"
|
396 |
+
f"5. **Structure**:\n"
|
397 |
+
f" - Introduction (2-3 exchanges): Topic intro\n"
|
398 |
+
f" - Main content (10-12 exchanges): Core discussion\n"
|
399 |
+
f" - Conclusion (3-4 exchanges): Summary\n\n"
|
400 |
+
f"Create exactly 18+ exchanges in this JSON format:\n{template}"
|
401 |
+
)
|
402 |
+
|
403 |
+
return base_prompt
|
404 |
+
|
405 |
class UnifiedAudioConverter:
|
406 |
def __init__(self, config: ConversationConfig):
|
407 |
self.config = config
|