openfree commited on
Commit
a002723
ยท
verified ยท
1 Parent(s): 1e01dd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -22
app.py CHANGED
@@ -81,7 +81,7 @@ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
81
 
82
  @dataclass
83
  class ConversationConfig:
84
- max_words: int = 6000 # 4000์—์„œ 6000์œผ๋กœ ์ฆ๊ฐ€ (1.5๋ฐฐ)
85
  prefix_url: str = "https://r.jina.ai/"
86
  api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
87
  legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
@@ -89,10 +89,10 @@ class ConversationConfig:
89
  local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
90
  local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
91
  # ํ† ํฐ ์ˆ˜ ์ฆ๊ฐ€
92
- max_tokens: int = 4500 # 3000์—์„œ 4500์œผ๋กœ ์ฆ๊ฐ€ (1.5๋ฐฐ)
93
- max_new_tokens: int = 9000 # 6000์—์„œ 9000์œผ๋กœ ์ฆ๊ฐ€ (1.5๋ฐฐ)
94
- min_conversation_turns: int = 12 # ์ตœ์†Œ ๋Œ€ํ™” ํ„ด ์ˆ˜
95
- max_conversation_turns: int = 15 # ์ตœ๋Œ€ ๋Œ€ํ™” ํ„ด ์ˆ˜
96
 
97
 
98
  def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
@@ -168,46 +168,240 @@ def extract_keywords_for_search(text: str, language: str = "English") -> List[st
168
  return []
169
 
170
  def search_and_compile_content(keyword: str, language: str = "English") -> str:
171
- """ํ‚ค์›Œ๋“œ๋กœ ๊ฒ€์ƒ‰ํ•˜์—ฌ ์ฝ˜ํ…์ธ  ์ปดํŒŒ์ผ"""
172
  if not BRAVE_KEY:
173
- return f"Search API not available. Using keyword: {keyword}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- # ์–ธ์–ด์— ๋”ฐ๋ฅธ ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ ์กฐ์ •
176
  if language == "Korean":
177
  queries = [
178
- f"{keyword} ์ตœ์‹  ๋‰ด์Šค",
179
- f"{keyword} ์ •๋ณด",
180
- f"{keyword} ํŠธ๋ Œ๋“œ 2024"
 
 
 
181
  ]
182
  else:
183
  queries = [
184
- f"{keyword} latest news",
185
- f"{keyword} explained",
186
- f"{keyword} trends 2024"
 
 
 
187
  ]
188
 
189
  all_content = []
 
190
 
191
  for query in queries:
192
- results = brave_search(query, count=3)
193
- for r in results[:2]: # ๊ฐ ์ฟผ๋ฆฌ๋‹น ์ƒ์œ„ 2๊ฐœ ๊ฒฐ๊ณผ
194
- content = f"**{r['title']}**\n{r['snippet']}\n"
195
  all_content.append(content)
 
196
 
197
- if not all_content:
198
- return f"No search results found for: {keyword}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  # ์ปดํŒŒ์ผ๋œ ์ฝ˜ํ…์ธ  ๋ฐ˜ํ™˜
201
  compiled = "\n\n".join(all_content)
202
 
203
- # ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ์†Œ๊ฐœ ์ถ”๊ฐ€
204
  if language == "Korean":
205
- intro = f"'{keyword}'์— ๋Œ€ํ•œ ์ตœ์‹  ์ •๋ณด์™€ ํŠธ๋ Œ๋“œ:\n\n"
206
  else:
207
- intro = f"Latest information and trends about '{keyword}':\n\n"
208
 
209
  return intro + compiled
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  class UnifiedAudioConverter:
212
  def __init__(self, config: ConversationConfig):
213
  self.config = config
 
81
 
82
  @dataclass
83
  class ConversationConfig:
84
+ max_words: int = 8000 # 4000์—์„œ 6000์œผ๋กœ ์ฆ๊ฐ€ (1.5๋ฐฐ)
85
  prefix_url: str = "https://r.jina.ai/"
86
  api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
87
  legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
 
89
  local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
90
  local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
91
  # ํ† ํฐ ์ˆ˜ ์ฆ๊ฐ€
92
+ max_tokens: int = 6000 # 3000์—์„œ 4500์œผ๋กœ ์ฆ๊ฐ€ (1.5๋ฐฐ)
93
+ max_new_tokens: int = 12000 # 6000์—์„œ 9000์œผ๋กœ ์ฆ๊ฐ€ (1.5๋ฐฐ)
94
+ min_conversation_turns: int = 18 # ์ตœ์†Œ ๋Œ€ํ™” ํ„ด ์ˆ˜
95
+ max_conversation_turns: int = 20 # ์ตœ๋Œ€ ๋Œ€ํ™” ํ„ด ์ˆ˜
96
 
97
 
98
  def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
 
168
  return []
169
 
170
  def search_and_compile_content(keyword: str, language: str = "English") -> str:
171
+ """ํ‚ค์›Œ๋“œ๋กœ ๊ฒ€์ƒ‰ํ•˜์—ฌ ์ถฉ๋ถ„ํ•œ ์ฝ˜ํ…์ธ  ์ปดํŒŒ์ผ"""
172
  if not BRAVE_KEY:
173
+ # API ์—†์„ ๋•Œ๋„ ๊ธฐ๋ณธ ์ฝ˜ํ…์ธ  ์ƒ์„ฑ
174
+ if language == "Korean":
175
+ return f"""
176
+ '{keyword}'์— ๋Œ€ํ•œ ์ข…ํ•ฉ์ ์ธ ์ •๋ณด:
177
+
178
+ {keyword}๋Š” ํ˜„๋Œ€ ์‚ฌํšŒ์—์„œ ๋งค์šฐ ์ค‘์š”ํ•œ ์ฃผ์ œ์ž…๋‹ˆ๋‹ค.
179
+ ์ด ์ฃผ์ œ๋Š” ๋‹ค์–‘ํ•œ ์ธก๋ฉด์—์„œ ์šฐ๋ฆฌ์˜ ์‚ถ์— ์˜ํ–ฅ์„ ๋ฏธ์น˜๊ณ  ์žˆ์œผ๋ฉฐ,
180
+ ์ตœ๊ทผ ๋“ค์–ด ๋”์šฑ ์ฃผ๋ชฉ๋ฐ›๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
181
+
182
+ ์ฃผ์š” ํŠน์ง•:
183
+ 1. ๊ธฐ์ˆ ์  ๋ฐœ์ „๊ณผ ํ˜์‹ 
184
+ 2. ์‚ฌํšŒ์  ์˜ํ–ฅ๊ณผ ๋ณ€ํ™”
185
+ 3. ๋ฏธ๋ž˜ ์ „๋ง๊ณผ ๊ฐ€๋Šฅ์„ฑ
186
+ 4. ์‹ค์šฉ์  ํ™œ์šฉ ๋ฐฉ์•ˆ
187
+ 5. ๊ธ€๋กœ๋ฒŒ ํŠธ๋ Œ๋“œ์™€ ๋™ํ–ฅ
188
+
189
+ ์ „๋ฌธ๊ฐ€๋“ค์€ {keyword}๊ฐ€ ์•ž์œผ๋กœ ๋”์šฑ ์ค‘์š”ํ•ด์งˆ ๊ฒƒ์œผ๋กœ ์˜ˆ์ƒํ•˜๊ณ  ์žˆ์œผ๋ฉฐ,
190
+ ์ด์— ๋Œ€ํ•œ ๊นŠ์ด ์žˆ๋Š” ์ดํ•ด๊ฐ€ ํ•„์š”ํ•œ ์‹œ์ ์ž…๋‹ˆ๋‹ค.
191
+ """
192
+ else:
193
+ return f"""
194
+ Comprehensive information about '{keyword}':
195
+
196
+ {keyword} is a significant topic in modern society.
197
+ This subject impacts our lives in various ways and has been
198
+ gaining increasing attention recently.
199
+
200
+ Key aspects:
201
+ 1. Technological advancement and innovation
202
+ 2. Social impact and changes
203
+ 3. Future prospects and possibilities
204
+ 4. Practical applications
205
+ 5. Global trends and developments
206
+
207
+ Experts predict that {keyword} will become even more important,
208
+ and it's crucial to develop a deep understanding of this topic.
209
+ """
210
 
211
+ # ์–ธ์–ด์— ๋”ฐ๋ฅธ ๋‹ค์–‘ํ•œ ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ
212
  if language == "Korean":
213
  queries = [
214
+ f"{keyword} ์ตœ์‹  ๋‰ด์Šค 2024",
215
+ f"{keyword} ์ •๋ณด ์„ค๋ช…",
216
+ f"{keyword} ํŠธ๋ Œ๋“œ ์ „๋ง",
217
+ f"{keyword} ์žฅ์  ๋‹จ์ ",
218
+ f"{keyword} ํ™œ์šฉ ๋ฐฉ๋ฒ•",
219
+ f"{keyword} ์ „๋ฌธ๊ฐ€ ์˜๊ฒฌ"
220
  ]
221
  else:
222
  queries = [
223
+ f"{keyword} latest news 2024",
224
+ f"{keyword} explained comprehensive",
225
+ f"{keyword} trends forecast",
226
+ f"{keyword} advantages disadvantages",
227
+ f"{keyword} how to use",
228
+ f"{keyword} expert opinions"
229
  ]
230
 
231
  all_content = []
232
+ total_content_length = 0
233
 
234
  for query in queries:
235
+ results = brave_search(query, count=5) # ๋” ๋งŽ์€ ๊ฒฐ๊ณผ ๊ฐ€์ ธ์˜ค๊ธฐ
236
+ for r in results[:3]: # ๊ฐ ์ฟผ๋ฆฌ๋‹น ์ƒ์œ„ 3๊ฐœ
237
+ content = f"**{r['title']}**\n{r['snippet']}\nSource: {r['host']}\n"
238
  all_content.append(content)
239
+ total_content_length += len(r['snippet'])
240
 
241
+ # ์ฝ˜ํ…์ธ ๊ฐ€ ๋ถ€์กฑํ•˜๋ฉด ์ถ”๊ฐ€ ์ƒ์„ฑ
242
+ if total_content_length < 1000: # ์ตœ์†Œ 1000์ž ํ™•๋ณด
243
+ if language == "Korean":
244
+ additional_content = f"""
245
+ ์ถ”๊ฐ€ ์ •๋ณด:
246
+ {keyword}์™€ ๊ด€๋ จ๋œ ์ตœ๊ทผ ๋™ํ–ฅ์„ ์‚ดํŽด๋ณด๋ฉด, ์ด ๋ถ„์•ผ๋Š” ๋น ๋ฅด๊ฒŒ ๋ฐœ์ „ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
247
+ ๋งŽ์€ ์ „๋ฌธ๊ฐ€๋“ค์ด ์ด ์ฃผ์ œ์— ๋Œ€ํ•ด ํ™œ๋ฐœํžˆ ์—ฐ๊ตฌํ•˜๊ณ  ์žˆ์œผ๋ฉฐ,
248
+ ์‹ค์ƒํ™œ์—์„œ์˜ ์‘์šฉ ๊ฐ€๋Šฅ์„ฑ๋„ ๊ณ„์† ํ™•๋Œ€๋˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
249
+
250
+ ๏ฟฝ๏ฟฝ๏ฟฝํžˆ ์ฃผ๋ชฉํ•  ์ ์€:
251
+ - ๊ธฐ์ˆ  ํ˜์‹ ์˜ ๊ฐ€์†ํ™”
252
+ - ์‚ฌ์šฉ์ž ๊ฒฝํ—˜์˜ ๊ฐœ์„ 
253
+ - ์ ‘๊ทผ์„ฑ์˜ ํ–ฅ์ƒ
254
+ - ๋น„์šฉ ํšจ์œจ์„ฑ ์ฆ๋Œ€
255
+ - ๊ธ€๋กœ๋ฒŒ ์‹œ์žฅ์˜ ์„ฑ์žฅ
256
+
257
+ ์ด๋Ÿฌํ•œ ์š”์†Œ๋“ค์ด {keyword}์˜ ๋ฏธ๋ž˜๋ฅผ ๋”์šฑ ๋ฐ๊ฒŒ ๋งŒ๋“ค๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
258
+ """
259
+ else:
260
+ additional_content = f"""
261
+ Additional insights:
262
+ Recent developments in {keyword} show rapid advancement in this field.
263
+ Many experts are actively researching this topic, and its practical
264
+ applications continue to expand.
265
+
266
+ Key points to note:
267
+ - Accelerating technological innovation
268
+ - Improving user experience
269
+ - Enhanced accessibility
270
+ - Increased cost efficiency
271
+ - Growing global market
272
+
273
+ These factors are making the future of {keyword} increasingly promising.
274
+ """
275
+ all_content.append(additional_content)
276
 
277
  # ์ปดํŒŒ์ผ๋œ ์ฝ˜ํ…์ธ  ๋ฐ˜ํ™˜
278
  compiled = "\n\n".join(all_content)
279
 
280
+ # ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ์†Œ๊ฐœ
281
  if language == "Korean":
282
+ intro = f"### '{keyword}'์— ๋Œ€ํ•œ ์ข…ํ•ฉ์ ์ธ ์ •๋ณด์™€ ์ตœ์‹  ๋™ํ–ฅ:\n\n"
283
  else:
284
+ intro = f"### Comprehensive information and latest trends about '{keyword}':\n\n"
285
 
286
  return intro + compiled
287
 
288
+
289
+ def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
290
+ """Build prompt for conversation generation with enhanced radio talk show style"""
291
+ # ํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ
292
+ max_text_length = 4500 if search_context else 6000
293
+ if len(text) > max_text_length:
294
+ text = text[:max_text_length] + "..."
295
+
296
+ if language == "Korean":
297
+ # ๋Œ€ํ™” ํ…œํ”Œ๋ฆฟ์„ ๋” ๋งŽ์€ ํ„ด์œผ๋กœ ํ™•์žฅ (15-20ํšŒ)
298
+ template = """
299
+ {
300
+ "conversation": [
301
+ {"speaker": "์ค€์ˆ˜", "text": ""},
302
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
303
+ {"speaker": "์ค€์ˆ˜", "text": ""},
304
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
305
+ {"speaker": "์ค€์ˆ˜", "text": ""},
306
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
307
+ {"speaker": "์ค€์ˆ˜", "text": ""},
308
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
309
+ {"speaker": "์ค€์ˆ˜", "text": ""},
310
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
311
+ {"speaker": "์ค€์ˆ˜", "text": ""},
312
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
313
+ {"speaker": "์ค€์ˆ˜", "text": ""},
314
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
315
+ {"speaker": "์ค€์ˆ˜", "text": ""},
316
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
317
+ {"speaker": "์ค€์ˆ˜", "text": ""},
318
+ {"speaker": "๋ฏผํ˜ธ", "text": ""}
319
+ ]
320
+ }
321
+ """
322
+
323
+ context_part = ""
324
+ if search_context:
325
+ context_part = f"# ์ตœ์‹  ๊ด€๋ จ ์ •๋ณด:\n{search_context}\n"
326
+
327
+ base_prompt = (
328
+ f"# ์›๋ณธ ์ฝ˜ํ…์ธ :\n{text}\n\n"
329
+ f"{context_part}"
330
+ f"์œ„ ๋‚ด์šฉ์œผ๋กœ 30์ค„ ์ด์ƒ์˜ ๋ผ๋””์˜ค ๋Œ€๋‹ด ํ”„๋กœ๊ทธ๋žจ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.\n\n"
331
+ f"## ํ•„์ˆ˜ ์š”๊ตฌ์‚ฌํ•ญ:\n"
332
+ f"1. **์ตœ์†Œ 18ํšŒ ์ด์ƒ์˜ ๋Œ€ํ™” ๊ตํ™˜** (์ค€์ˆ˜ 9ํšŒ, ๋ฏผํ˜ธ 9ํšŒ ์ด์ƒ)\n"
333
+ f"2. **๋Œ€ํ™” ์Šคํƒ€์ผ**: ์‹ค์ œ ๋ผ๋””์˜ค ๋Œ€๋‹ด์ฒ˜๋Ÿผ ์•„์ฃผ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํŽธ์•ˆํ•œ ๊ตฌ์–ด์ฒด\n"
334
+ f"3. **ํ™”์ž ์—ญํ• **:\n"
335
+ f" - ์ค€์ˆ˜: ์ง„ํ–‰์ž (์งง์€ ์งˆ๋ฌธ, ๋ฆฌ์•ก์…˜, ํ™”์ œ ์ „ํ™˜)\n"
336
+ f" - ๋ฏผํ˜ธ: ์ „๋ฌธ๊ฐ€ (๊ฐ„๊ฒฐํ•œ ์„ค๋ช…, ์˜ˆ์‹œ ์ œ๊ณต)\n"
337
+ f"4. **๋Œ€ํ™” ํŒจํ„ด**:\n"
338
+ f" - ์ค€์ˆ˜: \"๊ทธ๋ ‡๊ตฐ์š”\", \"ํฅ๋ฏธ๋กญ๋„ค์š”\", \"๋” ์ž์„ธํžˆ ์„ค๋ช…ํ•ด์ฃผ์‹œ๊ฒ ์–ด์š”?\"\n"
339
+ f" - ๋ฏผํ˜ธ: 1-2๋ฌธ์žฅ์œผ๋กœ ํ•ต์‹ฌ ์„ค๋ช…\n"
340
+ f" - ์ž์—ฐ์Šค๋Ÿฌ์šด ์ถ”์ž„์ƒˆ ์‚ฌ์šฉ\n"
341
+ f"5. **๋‚ด์šฉ ๊ตฌ์„ฑ**:\n"
342
+ f" - ๋„์ž…๋ถ€ (2-3ํšŒ): ์ฃผ์ œ ์†Œ๊ฐœ\n"
343
+ f" - ์ „๊ฐœ๋ถ€ (10-12ํšŒ): ํ•ต์‹ฌ ๋‚ด์šฉ ์„ค๋ช…\n"
344
+ f" - ๋งˆ๋ฌด๋ฆฌ (3-4ํšŒ): ์š”์•ฝ ๋ฐ ์ •๋ฆฌ\n"
345
+ f"6. **ํ•„์ˆ˜**: ์„œ๋กœ ์กด๋Œ“๋ง ์‚ฌ์šฉ\n\n"
346
+ f"๋ฐ˜๋“œ์‹œ ์œ„ JSON ํ˜•์‹์œผ๋กœ 18ํšŒ ์ด์ƒ์˜ ๋Œ€ํ™”๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”:\n{template}"
347
+ )
348
+
349
+ return base_prompt
350
+
351
+ else:
352
+ # ์˜์–ด ํ…œํ”Œ๋ฆฟ๋„ ํ™•์žฅ
353
+ template = """
354
+ {
355
+ "conversation": [
356
+ {"speaker": "Alex", "text": ""},
357
+ {"speaker": "Jordan", "text": ""},
358
+ {"speaker": "Alex", "text": ""},
359
+ {"speaker": "Jordan", "text": ""},
360
+ {"speaker": "Alex", "text": ""},
361
+ {"speaker": "Jordan", "text": ""},
362
+ {"speaker": "Alex", "text": ""},
363
+ {"speaker": "Jordan", "text": ""},
364
+ {"speaker": "Alex", "text": ""},
365
+ {"speaker": "Jordan", "text": ""},
366
+ {"speaker": "Alex", "text": ""},
367
+ {"speaker": "Jordan", "text": ""},
368
+ {"speaker": "Alex", "text": ""},
369
+ {"speaker": "Jordan", "text": ""},
370
+ {"speaker": "Alex", "text": ""},
371
+ {"speaker": "Jordan", "text": ""},
372
+ {"speaker": "Alex", "text": ""},
373
+ {"speaker": "Jordan", "text": ""}
374
+ ]
375
+ }
376
+ """
377
+
378
+ context_part = ""
379
+ if search_context:
380
+ context_part = f"# Latest Information:\n{search_context}\n"
381
+
382
+ base_prompt = (
383
+ f"# Content:\n{text}\n\n"
384
+ f"{context_part}"
385
+ f"Create a radio talk show conversation with at least 30 lines.\n\n"
386
+ f"## Requirements:\n"
387
+ f"1. **Minimum 18 conversation exchanges** (Alex 9+, Jordan 9+)\n"
388
+ f"2. **Style**: Natural radio talk show conversation\n"
389
+ f"3. **Roles**:\n"
390
+ f" - Alex: Host (short questions, reactions, transitions)\n"
391
+ f" - Jordan: Expert (concise explanations, examples)\n"
392
+ f"4. **Pattern**:\n"
393
+ f" - Alex: \"I see\", \"Fascinating\", \"Could you elaborate?\"\n"
394
+ f" - Jordan: 1-2 sentence explanations\n"
395
+ f" - Natural speech fillers\n"
396
+ f"5. **Structure**:\n"
397
+ f" - Introduction (2-3 exchanges): Topic intro\n"
398
+ f" - Main content (10-12 exchanges): Core discussion\n"
399
+ f" - Conclusion (3-4 exchanges): Summary\n\n"
400
+ f"Create exactly 18+ exchanges in this JSON format:\n{template}"
401
+ )
402
+
403
+ return base_prompt
404
+
405
  class UnifiedAudioConverter:
406
  def __init__(self, config: ConversationConfig):
407
  self.config = config