File size: 3,460 Bytes
aab927d
 
 
3e4fdb2
aab927d
3e4fdb2
aab927d
 
73dadd8
 
aab927d
 
 
 
73dadd8
aab927d
 
3e4fdb2
aab927d
 
 
 
3e4fdb2
e617f3b
 
 
aab927d
73dadd8
aab927d
 
 
 
 
3e4fdb2
aab927d
73dadd8
aab927d
ccfdf30
 
acbb25a
ccfdf30
 
 
5f703f9
 
3e4fdb2
5f703f9
3e4fdb2
ccfdf30
 
 
aab927d
ccfdf30
 
 
aab927d
 
73dadd8
 
 
 
 
aab927d
73dadd8
aab927d
 
 
 
 
 
 
 
0372ddf
aab927d
0372ddf
 
 
 
aab927d
0372ddf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import re

# ์ƒ์„ฑ๋œ ๋ชจ๋“  ๋ด‡ ์‘๋‹ต ๊ธฐ๋ก
def generate_reply(ctx, makePipeLine, user_msg):
    # ์ตœ์ดˆ ์‘๋‹ต
    response = generate_valid_response(ctx, makePipeLine, user_msg)
    ctx.addHistory("bot", response)

    # ๋ถˆ์•ˆ์ •ํ•œ ์‘๋‹ต์ด ์œ ๋„๋˜๋ฏ€๋กœ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Œ
    '''
    # ์‘๋‹ต์ด ๋Š๊ฒผ๋‹ค๋ฉด ์ถ”๊ฐ€ ์ƒ์„ฑ
    if is_truncated_response(response):
        continuation = generate_valid_response(ctx, makePipeLine, response)
        ctx.addHistory("bot", continuation)
    '''

# ๋ด‡ ์‘๋‹ต 1ํšŒ ์ƒ์„ฑ
def generate_valid_response(ctx, makePipeline, user_msg) -> str:
    user_name = ctx.getUserName()
    bot_name = ctx.getBotName()

    while True:
        prompt = build_prompt(ctx.getHistory(), user_msg, user_name, bot_name)
        print("\n==========[DEBUG: Prompt]==========")
        print(prompt)
        print("===================================\n")
        full_text = makePipeline.character_chat(prompt)
        response = extract_response(full_text)
        if is_valid_response(response, user_name, bot_name):
            break
    return clean_response(response, bot_name)

# ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ ์ •๋ฆฌ
def build_prompt(history, user_msg, user_name, bot_name):
    with open("assets/prompt/init.txt", "r", encoding="utf-8") as f:
        system_prompt = f.read().strip()

    # ์ตœ๊ทผ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ์ผ๋ฐ˜ ํ…์ŠคํŠธ๋กœ ์žฌ๊ตฌ์„ฑ
    dialogue = ""
    for turn in history[-16:]:
        role = user_name if turn["role"] == "user" else bot_name
        dialogue += f"{role}: {turn['text']}\n"

    #์œ ์ € ์ž…๋ ฅ์„ ์ค‘๋ณต์ ์šฉํ•˜๋ฏ€๋กœ ์‚ญ์ œ
    '''
    dialogue += f"{user_name}: {user_msg}\n"
    '''

    # ๋ชจ๋ธ์— ๋งž๋Š” ํฌ๋งท ๊ตฌ์„ฑ
    prompt = f"""### Instruction:
{system_prompt}

{dialogue}
### Response:
{bot_name}:"""
    return prompt

# ์ถœ๋ ฅ์—์„œ ์‘๋‹ต ์ถ”์ถœ (HyperCLOVAX ํฌ๋งท์— ๋งž๊ฒŒ)
def extract_response(full_text):
    # '### Response:' ์ดํ›„ ํ…์ŠคํŠธ ์ถ”์ถœ
    if "### Response:" in full_text:
        reply = full_text.split("### Response:")[-1].strip()
    else:
        reply = full_text.strip()
    return reply

# ์‘๋‹ต ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ
def is_valid_response(text: str, user_name, bot_name) -> bool:
    if user_name + ":" in text:
        return False
    return True

# ์ถœ๋ ฅ ์ •์ œ
def clean_response(text: str, bot_name):
    # bot_name ์ œ๊ฑฐ
    text = re.sub(rf"{bot_name}:\s*", "", text).strip()
    # ๋ฏธ์™„์„ฑ ๋ฌธ์žฅ ์ œ๊ฑฐ
    return clean_truncated_response(text)

# ๋ฏธ์™„์„ฑ ๋ฌธ์žฅ ์‚ญ์ œ
def clean_truncated_response(text: str) -> str:
    """
    ์‘๋‹ต ํ…์ŠคํŠธ๊ฐ€ ๋ฏธ์™„์„ฑ๋œ ๋ฌธ์žฅ์œผ๋กœ ๋๋‚˜๋ฉด ๋งˆ์ง€๋ง‰ ๋ฌธ์žฅ์„ ์ œ๊ฑฐํ•˜์—ฌ ๋ฐ˜ํ™˜,
    ๊ทธ๋ ‡์ง€ ์•Š์œผ๋ฉด ์›๋ฌธ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜.
    """

    # ๋ฌธ์žฅ ๋ถ„๋ฆฌ ('.', '?', '!', '~' ๋“ฑ ๊ธฐ์ค€ + ์ค„๋ฐ”๊ฟˆ ํฌํ•จ)
    sentence_end_pattern = r"(?<=[\.?!~])\s|\n"
    segments = re.split(sentence_end_pattern, text.strip())

    if not segments:
        return text.strip()

    cleaned = []
    for s in segments:
        s = s.strip()
        if not s:
            continue
        # ๋ฌธ์žฅ ๋ถ€ํ˜ธ๋กœ ๋๋‚˜๋Š” ๊ฒฝ์šฐ๋งŒ ํฌํ•จ
        if re.search(r"[.?!~โ€ฆ\u2026\u2639\u263A\u2764\uD83D\uDE0A\uD83D\uDE22]$", s):
            cleaned.append(s)
        else:
            break  # ๋ถˆ์™„์ „ํ•œ ๋ฌธ์žฅ์ด๋ฏ€๋กœ ์ดํ›„ ๋ชจ๋‘ ์ œ๊ฑฐ

    # ๋งŒ์•ฝ ๋ชจ๋“  ๋ฌธ์žฅ์ด ๋๋งบ์Œ์„ ์ž˜ ํ–ˆ๋‹ค๋ฉด โ†’ ์›๋ฌธ ๋ฐ˜ํ™˜
    result = " ".join(cleaned)
    return result if result != "" else text.strip()