Spaces:
Runtime error
Runtime error
import streamlit as st | |
import os | |
from pinecone import Pinecone | |
from sentence_transformers import SentenceTransformer | |
from typing import List, Dict | |
import re # For parsing timestamp and extracting video ID | |
import streamlit.components.v1 as components # For embedding HTML | |
from openai import OpenAI # Import OpenAI library | |
import logging | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# --- Helper Functions (Existing: parse_timestamp_to_seconds, get_youtube_video_id, add_timestamp_to_youtube_url, generate_youtube_embed_html) --- | |
def parse_timestamp_to_seconds(timestamp: str) -> int | None: | |
"""HH:MM:SS ๋๋ HH:MM:SS.ms ํ์์ ํ์์คํฌํ๋ฅผ ์ด ๋จ์๋ก ๋ณํํฉ๋๋ค.""" | |
if not isinstance(timestamp, str): | |
return None | |
# Remove milliseconds part if present | |
timestamp_no_ms = timestamp.split('.')[0] | |
parts = timestamp_no_ms.split(':') | |
try: | |
if len(parts) == 3: | |
h, m, s = map(int, parts) | |
return h * 3600 + m * 60 + s | |
elif len(parts) == 2: | |
m, s = map(int, parts) | |
return m * 60 + s | |
elif len(parts) == 1: | |
return int(parts[0]) | |
else: | |
return None | |
except ValueError: | |
return None | |
def get_youtube_video_id(url: str) -> str | None: | |
"""YouTube URL์์ ๋น๋์ค ID๋ฅผ ์ถ์ถํฉ๋๋ค.""" | |
if not isinstance(url, str): | |
return None | |
# Standard YouTube URLs (youtube.com/watch?v=...), shortened URLs (youtu.be/...), etc. | |
match = re.search(r"(?:v=|/|youtu\.be/|embed/|shorts/)([0-9A-Za-z_-]{11})", url) | |
return match.group(1) if match else None | |
def add_timestamp_to_youtube_url(youtube_url: str, timestamp: str) -> str: | |
"""YouTube URL์ ํ์์คํฌํ๋ฅผ ์ถ๊ฐํฉ๋๋ค.""" | |
seconds = parse_timestamp_to_seconds(timestamp) | |
if seconds is None or not youtube_url: | |
return youtube_url # Return original URL if timestamp is invalid or URL is empty | |
separator = '&' if '?' in youtube_url else '?' | |
# Remove existing t= parameter if present | |
cleaned_url = re.sub(r'[?&]t=\d+s?', '', youtube_url) | |
separator = '&' if '?' in cleaned_url else '?' # Re-check separator after cleaning | |
return f"{cleaned_url}{separator}t={seconds}s" | |
def generate_youtube_embed_html(youtube_url: str, timestamp: str) -> str | None: | |
"""ํ์์คํฌํ๊ฐ ์ ์ฉ๋ YouTube ์๋ฒ ๋ HTML ์ฝ๋๋ฅผ ์์ฑํฉ๋๋ค. ๊ฐ๋ก 800px ๊ณ ์ , ์ธ๋ก ์๋ ์กฐ์ .""" | |
video_id = get_youtube_video_id(youtube_url) | |
start_seconds = parse_timestamp_to_seconds(timestamp) | |
if not video_id: | |
logger.warning(f"Could not extract video ID from URL: {youtube_url}") | |
return None # Cannot generate embed code without video ID | |
start_param = f"start={start_seconds}" if start_seconds is not None else "" | |
# Use aspect ratio approach with fixed width 800px | |
return f''' | |
<div style="position: relative; width: 800px; padding-bottom: 450px; /* 800px * 9 / 16 = 450px */ height: 0; overflow: hidden;"> | |
<iframe | |
src="https://www.youtube.com/embed/{video_id}?{start_param}&autoplay=0&rel=0" | |
frameborder="0" | |
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" | |
referrerpolicy="strict-origin-when-cross-origin" | |
allowfullscreen | |
style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;"> | |
</iframe> | |
</div> | |
''' | |
# --- ์ค์ --- | |
# Pinecone ์ค์ | |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY","pcsk_PZHLK_TRAvMCyNmJM4FKGCX7rbbY22a58fhnWYasx1mf3WL6sRasoASZXfsbnJYvCQ13w") # Load from environment variable | |
PINECONE_ENV = os.getenv("PINECONE_ENV", "us-east-1") | |
INDEX_NAME = "video-embeddings" | |
EMBEDDING_MODEL = "jhgan/ko-sroberta-multitask" | |
# OpenAI ์ค์ | |
OPENAI_API_KEY = "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" | |
# --- ๋ฆฌ์์ค ๋ก๋ฉ (์บ์ฑ ํ์ฉ) --- | |
def init_pinecone(): | |
"""Pinecone ํด๋ผ์ด์ธํธ๋ฅผ ์ด๊ธฐํํฉ๋๋ค.""" | |
api_key = PINECONE_API_KEY | |
if not api_key: | |
st.error("Pinecone API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ํ๊ฒฝ ๋ณ์๋ฅผ ํ์ธํ์ธ์.") | |
st.stop() | |
try: | |
pc = Pinecone(api_key=api_key) | |
logger.info("Successfully connected to Pinecone.") | |
return pc | |
except Exception as e: | |
st.error(f"Pinecone ์ด๊ธฐํ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
st.stop() | |
def load_embedding_model(): | |
"""Sentence Transformer ๋ชจ๋ธ์ ๋ก๋ํฉ๋๋ค.""" | |
try: | |
model = SentenceTransformer("my_model") | |
logger.info(f"Successfully loaded embedding model: {EMBEDDING_MODEL}") | |
return model | |
except Exception as e: | |
st.error(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
st.stop() | |
def get_pinecone_index(_pc: Pinecone, index_name: str): | |
"""Pinecone ์ธ๋ฑ์ค ๊ฐ์ฒด๋ฅผ ๊ฐ์ ธ์ต๋๋ค.""" | |
try: | |
index = _pc.Index(index_name) | |
# Optionally, do a quick check like index.describe_index_stats() to confirm connection | |
stats = index.describe_index_stats() | |
logger.info(f"Successfully connected to Pinecone index '{index_name}'. Stats: {stats.get('total_vector_count', 'N/A')} vectors") | |
return index | |
except Exception as e: | |
st.error(f"Pinecone ์ธ๋ฑ์ค '{index_name}' ์ฐ๊ฒฐ ์ค ์ค๋ฅ ๋ฐ์: {e}. ์ธ๋ฑ์ค๊ฐ ์กด์ฌํ๊ณ ํ์ฑ ์ํ์ธ์ง ํ์ธํ์ธ์.") | |
st.stop() | |
def init_openai_client(): | |
"""OpenAI ํด๋ผ์ด์ธํธ๋ฅผ ์ด๊ธฐํํฉ๋๋ค.""" | |
if not OPENAI_API_KEY: | |
st.error("OpenAI API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ํ๊ฒฝ ๋ณ์๋ฅผ ํ์ธํ์ธ์.") | |
st.stop() | |
try: | |
client = OpenAI(api_key=OPENAI_API_KEY) | |
# Test connection (optional, but recommended) | |
client.models.list() | |
logger.info("Successfully connected to OpenAI.") | |
return client | |
except Exception as e: | |
st.error(f"OpenAI ํด๋ผ์ด์ธํธ ์ด๊ธฐํ ๋๋ ์ฐ๊ฒฐ ํ ์คํธ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
st.stop() | |
# --- ๊ฒ์ ํจ์ --- | |
def search(query: str, top_k: int = 5, _index=None, _model=None) -> List[Dict]: | |
"""Pinecone ์ธ๋ฑ์ค์์ ๊ฒ์์ ์ํํ๊ณ title๊ณผ original_text๋ฅผ ํฌํจํฉ๋๋ค.""" | |
if not query or _index is None or _model is None: | |
return [] | |
try: | |
query_vec = _model.encode(query, convert_to_numpy=True).tolist() | |
result = _index.query(vector=query_vec, top_k=top_k, include_metadata=True) | |
matches = result.get("matches", []) | |
search_results = [] | |
for m in matches: | |
metadata = m.get("metadata", {}) | |
search_results.append({ | |
"URL": metadata.get("url", "N/A"), | |
"ํ์์คํฌํ": metadata.get("timestamp", "N/A"), | |
"ํ์ ": metadata.get("type", "N/A"), | |
"์ ๋ชฉ": metadata.get("title", "N/A"), # ์ ๋ชฉ ์ถ๊ฐ | |
"์์ฝ": metadata.get("summary", "N/A"), | |
"์๋ณธํ ์คํธ": metadata.get("original_text", "N/A"), # ์ปจํ ์คํธ๋ก ํ์ฉํ ์๋ณธ ํ ์คํธ | |
"์ ์": m.get("score", 0.0) | |
}) | |
logger.info(f"Pinecone search returned {len(search_results)} results for query: '{query[:50]}...'") | |
return search_results | |
except Exception as e: | |
st.error(f"Pinecone ๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
logger.error(f"Error during Pinecone search: {e}", exc_info=True) | |
return [] | |
# --- OpenAI ๋ต๋ณ ์์ฑ ํจ์ --- | |
def generate_khan_answer(query: str, search_results: List[Dict], client: OpenAI) -> str: | |
"""์ฌ์ฉ์ ์ง๋ฌธ๊ณผ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ์ผ๋ก Khan ํ๋ฅด์๋ ๋ต๋ณ์ ์์ฑํฉ๋๋ค.""" | |
if not search_results: | |
# Return a persona-consistent message even when no results are found | |
return "ํ์ฌ ์ง๋ฌธ์ ๋ํด ์ฐธ๊ณ ํ ๋งํ ๊ด๋ จ ์์์ ์ฐพ์ง ๋ชปํ์ต๋๋ค. ์ง๋ฌธ์ ์กฐ๊ธ ๋ ๋ช ํํ๊ฒ ํด์ฃผ์๊ฑฐ๋ ๋ค๋ฅธ ๋ฐฉ์์ผ๋ก ์ง๋ฌธํด์ฃผ์๋ฉด ๋์์ด ๋ ๊ฒ ๊ฐ์ต๋๋ค." | |
# Build context string for OpenAI more robustly, including timestamped URL | |
context_parts = [] | |
for i, r in enumerate(search_results): | |
original_text_snippet = "" | |
if r.get('์๋ณธํ ์คํธ'): | |
snippet = r['์๋ณธํ ์คํธ'][:200] | |
original_text_snippet = f"\n(์๋ณธ ๋ด์ฉ ์ผ๋ถ: {snippet}...)" | |
# Generate timestamped URL if possible | |
timestamped_url_str = "N/A" | |
url = r.get('URL', 'N/A') | |
timestamp = r.get('ํ์์คํฌํ', 'N/A') | |
is_youtube = url and isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url) | |
has_valid_timestamp = timestamp and timestamp != 'N/A' and parse_timestamp_to_seconds(timestamp) is not None | |
if is_youtube and has_valid_timestamp: | |
try: | |
timestamped_url_str = add_timestamp_to_youtube_url(url, timestamp) | |
except Exception: | |
timestamped_url_str = url # Fallback to original URL on error | |
elif url != "N/A": | |
timestamped_url_str = url # Use original URL if not YouTube/no timestamp | |
context_parts.append( | |
f"๊ด๋ จ ์ ๋ณด {i+1}:\n" | |
f"์ ๋ชฉ: {r.get('์ ๋ชฉ', 'N/A')}\n" | |
f"์์ URL (์๋ณธ): {url}\n" | |
f"ํ์์คํฌํ: {timestamp}\n" | |
f"ํ์์คํฌํ ์ ์ฉ URL: {timestamped_url_str}\n" # Add the timestamped URL here | |
f"๋ด์ฉ ํ์ : {r.get('ํ์ ', 'N/A')}\n" | |
f"์์ฝ: {r.get('์์ฝ', 'N/A')}" | |
f"{original_text_snippet}" # Append the snippet safely | |
) | |
context = "\n\n---\n\n".join(context_parts) # Join the parts | |
# Updated system prompt to instruct Markdown link usage | |
system_prompt = """๋๋ ํ์ค์ ์ธ ์กฐ์ธ์ ์ํ๋ PM ๋ฉํ Khan์ด๋ค. | |
- ๋งํฌ๋ ๋จํธํ์ง๋ง ๊ณต๊ฐ๋ ฅ์ด ์๋ค. "~์ ๋๋ค." ๋๋ "~์ฃ ."์ ๊ฐ์ด ๋ช ํํ๊ฒ ๋๋งบ๋๋ค. ์กด๋๋ง์ ์ฌ์ฉํ๋ค. | |
- ์๊ณกํ ํํ์ ํ์ฉํ๋ฉฐ, ์๋๋ฐฉ์ ๊ฐ์ ์ ํจ๋ถ๋ก ๋จ์ ์ง์ง ์๋๋ค. ์: "๊ทธ๋ด ์ ์์ต๋๋ค", "์, ๊ทธ๋ ๊ฒ ๋๋ ์ ์์ฃ " ๋ฑ. | |
- ๋จ์ํ ์๋ก๋ณด๋ค๋ ๊ตฌ์กฐ์ ์ด๊ณ ์ค์ฉ์ ์ธ ์ ์์ ์ฐ์ ํ๋ค. ์ง๋ฌธ์๊ฐ ๋์น ๋งฅ๋ฝ์ด๋ ๊ตฌ์กฐ๋ฅผ ์ง์ด์ฃผ๊ณ , ๋ค์ ๋จ๊ณ ๋๋ ์ ๋ต์ ์ ํ์ง๋ฅผ ์ ์ํ๋ค. | |
- ์ง๋ฌธ์ด ๋ง์ฐํ๊ฑฐ๋ ์ถ์์ ์ด๋ฉด, ํต์ฌ์ ์ขํ ๋ค์ ๋๋ฌผ์ด๋ณธ๋ค. ์: "๊ทธ ์ํฉ์์ ๊ฐ์ฅ ๋ต๋ตํ๋ ์๊ฐ์ ์ธ์ ์๋์?"์ ๊ฐ์ด ์ง๋ฌธ์ ๊ตฌ์ฒดํํ๋ค. | |
- ๊ธด ์ค๋ช ๋ณด๋ค๋ ํต์ฌ์ ๋น ๋ฅด๊ฒ ์ ๋ฌํ๋ค. ๋ค๋ง, ํ์ํ ๊ฒฝ์ฐ ์งง์ ๋น์ ๋ ์์๋ก ์ง๊ด์ ์ธ ์ดํด๋ฅผ ๋๋๋ค. | |
- ๋ต๋ณ ์ค ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐธ์กฐํ ๋๋, ๋ฐ๋์ 'ํ์์คํฌํ ์ ์ฉ URL'์ ์ฌ์ฉํ์ฌ ๋ค์๊ณผ ๊ฐ์ Markdown ๋งํฌ ํ์์ผ๋ก ์ ์ํด์ผ ํ๋ค: `[์์ ์ ๋ชฉ](ํ์์คํฌํ_์ ์ฉ_URL)`. ์: "์์ธํ ๋ด์ฉ์ [๋น๊ฐ๋ฐ์๊ฐ ์ฐ๋ด 2์ต์ ๋ฐ๋ ํ์ค์ ์ธ ๋ฐฉ๋ฒ](https://www.youtube.com/watch?v=VIDEO_ID&t=178s) ์์์ ์ฐธ๊ณ ํ์๋ฉด ๋์์ด ๋ ๊ฒ๋๋ค." | |
- ์ด์ ๋ํ ๊ธฐ๋ก์ ์์ผ๋ฏ๋ก, ๋ฐ๋ณต ์ง๋ฌธ์ด ๋ค์ด์ฌ ๊ฒฝ์ฐ์๋ "์ด์ ์ ์ ์ฌํ ๋ด์ฉ์ ์ฐพ์๋ดค์์ฃ . ๋ค์ ํ๋ฒ ์ดํด๋ณด๋ฉด..."์ฒ๋ผ ์์ฐ์ค๋ฝ๊ฒ ์ด์ด๊ฐ๋ค. | |
- ๋ต๋ณ์ ๋ฐ๋์ ํ๊ตญ์ด๋ก ํ๋ค. | |
Khan์ ์ ๋ต์ ์ผ๋ก ์ฌ๊ณ ํ๋ฉฐ, ๋ณธ์ง๊ณผ ๋ฐฉํฅ์ ์ค์ํ๋ค. | |
๋จ์ ์ ์ผ๋ก ๋จ์ธํ๊ธฐ๋ณด๋ค๋ "~์ผ ์๋ ์์ต๋๋ค", "๊ทธ๋ ๊ฒ๋ ๋ณผ ์ ์์ฃ "์ ๊ฐ์ด ์ฌ์ง๋ฅผ ๋จ๊ธด๋ค. | |
์๋๋ฐฉ์ด ์ค์ค๋ก ์ ํ์ง๋ฅผ ํ๋จํ ์ ์๋๋ก ๋๋ ๋ฐฉํฅ์ผ๋ก ์กฐ์ธํ๋ค. | |
์์์ฒ๋ผ ๋งํฌ์ ์ฌ๊ณ ํ๋ฆ์ ์ ์งํด์ผ ํ๋ค: | |
--- | |
Q: ์์ฆ ํ์๊ณผ์ ๊ด๊ณ๊ฐ ์ด๋ ค์ด๋ฐ, ์ ๊ฐ ๋ญ ๋์น๊ณ ์๋ ๊ฑธ๊น์? | |
A: ์, ๊ทธ๋ด ์ ์์ต๋๋ค. ๊ด๊ณ๊ฐ ์ด๋ ค์ธ ๋๋ ๊ฐ์ ๋ณด๋ค๋ ๊ธฐ๋๊ฐ ์๊ฐ๋ ธ๋ ์๊ฐ์ ๋จผ์ ๋ด์ผ ํ์ฃ . | |
๊ทธ ํ์์ด ๋ฌด์ธ๊ฐ๋ฅผ ๊ธฐ๋ํ๋๋ฐ, ๋ด๊ฐ ๊ทธ๊ฑธ ๋์ณค์ ๊ฐ๋ฅ์ฑ์ด ์์ต๋๋ค. | |
ํน์ ์ต๊ทผ์ ์๋ก ์คํด๊ฐ ์๊ธด ์๊ฐ์ด ์์๋์ง, ๋จผ์ ์ง์ด๋ณด๋ ๊ฒ ์ข๊ฒ ์ต๋๋ค. | |
--- | |
Q: ํ์ฌ๋ฅผ ์ฎ๊ธฐ๊ณ ์ถ์๋ฐ, ์ฑ๊ณผ ์์ด ํด์ฌํ๋ฉด ์ ์ข์๊น์? | |
A: ๋จ๊ธฐ์ ์ผ๋ก๋ ๋ง์ต๋๋ค. ์ฑ๊ณผ ์์ด ํด์ฌํ๋ฉด ์ด๋ ฅ์์ ๋จ์ฃ . | |
ํ์ง๋ง ์ง๊ธ ์ํฉ์์ ๋ฐฐ์ธ ๊ฒ ์๋ค๋ฉด, ๊ทธ ์์ฒด๊ฐ ๋ฆฌ์คํฌ์ด๊ธฐ๋ ํฉ๋๋ค. | |
'๋ด๊ฐ ๋จ์์ ์ป์ ์ ์๋ ๊ฒ ๋ฌด์์ธ๊ฐ'์ '์ง๊ธ ๋๊ฐ์ ์์ํ ์ ์๋ ๊ฒ ๋ฌด์์ธ๊ฐ'๋ฅผ ๋๋ํ ๋๊ณ ๋น๊ตํด ๋ณด์์ฃ . | |
--- | |
์ด๋ฐ ์์ ๋งํฌ์ ํ๋ฆ์ ๋ฐํ์ผ๋ก ์ง๋ฌธ์ ๋ต๋ณํ์ธ์.""" | |
# Use triple quotes for the multi-line f-string | |
user_message = f"""์ฌ์ฉ์ ์ง๋ฌธ: {query} | |
์๋ ๊ด๋ จ ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก Khan ๋ฉํ ๋ก์ ๋ต๋ณํด์ฃผ์ธ์: | |
{context}""" | |
try: | |
logger.info("Calling OpenAI API...") | |
completion = client.chat.completions.create( | |
model="gpt-4o-mini", # Use gpt-4 if available and preferred | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_message} | |
], | |
temperature=0.5, # Slightly less creative, more focused on instructions | |
) | |
answer = completion.choices[0].message.content | |
logger.info("Received response from OpenAI.") | |
return answer.strip() | |
except Exception as e: | |
st.error(f"OpenAI ๋ต๋ณ ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
logger.error(f"Error during OpenAI API call: {e}", exc_info=True) | |
return "๋ต๋ณ์ ์์ฑํ๋ ์ค์ ๋ฌธ์ ๊ฐ ๋ฐ์ํ์ต๋๋ค. OpenAI API ํค ๋๋ ์๋น์ค ์ํ๋ฅผ ํ์ธํด์ฃผ์ธ์." | |
# --- Streamlit ์ฑ UI (Khan ๋ฉํ ๋จ์ผ ๋ฃจํ ๊ตฌ์กฐ) --- | |
st.set_page_config(page_title="Khan ๋ฉํ (PM ์์ ๊ธฐ๋ฐ)", layout="wide") | |
# --- ์ฌ์ด๋๋ฐ ๋ฉ๋ด --- | |
menu = st.sidebar.radio( | |
"๊ธฐ๋ฅ ์ ํ", | |
("Khan ๋ฉํ ์๊ฒ ์๋ดํ๊ธฐ", "์์ฌ์๊ฒ ์๋ณด์ด๊ธฐ") | |
) | |
# ์ฌ์ด๋๋ฐ ๋งจ ์๋์ ์ค๋ฌธ์กฐ์ฌ ๋งํฌ | |
st.sidebar.markdown('<hr style="margin:1em 0;">', unsafe_allow_html=True) | |
st.sidebar.markdown( | |
'<a href="https://forms.gle/SUqrGBT3dktSB7v26" target="_blank" style="display:inline-block; background:#f9e79f; color:#1a237e; font-weight:bold; padding:0.5em 1.2em; border-radius:8px; text-decoration:none; font-size:1.1em; margin-bottom:16px;">๐ ์๋น์ค ์ด๋ป๊ฒ ์๊ฐํ์ธ์?</a>', | |
unsafe_allow_html=True | |
) | |
openai_client = init_openai_client() | |
if menu == "Khan ๋ฉํ ์๊ฒ ์๋ดํ๊ธฐ": | |
st.title("โจ Khan ๋ฉํ ๊ฐ 24์๊ฐ ๋ต๋ณ์ค์ ๋๋ค") | |
# --- API ํค ํ์ธ ๋ฐ ๋ฆฌ์์ค ์ด๊ธฐํ --- | |
pc = init_pinecone() | |
model = load_embedding_model() | |
index = get_pinecone_index(pc, INDEX_NAME) | |
# --- ์ํ ๊ด๋ฆฌ --- | |
if 'user_question' not in st.session_state: | |
st.session_state['user_question'] = '' | |
if 'empathy_message' not in st.session_state: | |
st.session_state['empathy_message'] = '' | |
if 'khan_answer' not in st.session_state: | |
st.session_state['khan_answer'] = '' | |
if 'pinecone_results' not in st.session_state: | |
st.session_state['pinecone_results'] = [] | |
if 'extra_questions' not in st.session_state: | |
st.session_state['extra_questions'] = [] | |
if 'current_input' not in st.session_state: | |
st.session_state['current_input'] = '' | |
# --- ์ง๋ฌธ ์ ๋ ฅ ๋ฐ ๋ต๋ณ ์์ฑ --- | |
st.markdown("#### ๋น์ ์ ๊ณ ๋ฏผ์ ์๋ ค ์ฃผ์ธ์!") | |
user_q = st.text_input( | |
"๋์ ๊ณ ๋ฏผ์...", | |
value=st.session_state['current_input'], | |
key="main_input", | |
placeholder="ํ๋ก๋ํธ ๋งค๋์ ๊ฐ ๊ฐ์ ธ์ผ ํ ์ญ๋์ ์ด๋ค ๊ฒ์ด ์์๊น์?" | |
) | |
if st.button("๊ณ ๋ฏผ ๋๋๊ธฐ", key="main_ask") or (user_q and st.session_state['user_question'] != user_q): | |
st.session_state['user_question'] = user_q | |
st.session_state['current_input'] = user_q | |
# 1. ๊ณต๊ฐ ๋น์ ๋ฉ์์ง | |
with st.spinner("์๊ฐ์ค..."): | |
empathy_prompt = f""" | |
๋๋ ๋ฐ๋ปํ๊ณ ์น์ ํ ๋น์์ผ. ์๋ ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ฃ๊ณ , ๊ฐ์ ์ ์ผ๋ก ์ถฉ๋ถํ๊ฒ 1~2๋ฌธ์ฅ์ผ๋ก ๊ณต๊ฐํด์ฃผ๋ ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ํ์ง๋ง, ๋ง์ง๋ง์ '์นธ ๋ฉํ ์ ์๊ฐ์ ๋ค์ด๋ณผ๊น์?'๋ผ๊ณ ์๋ดํด์ค. \n์ง๋ฌธ: "{user_q}" | |
""" | |
try: | |
empathy_response = openai_client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "system", "content": empathy_prompt}], | |
temperature=0.7, | |
) | |
st.session_state['empathy_message'] = empathy_response.choices[0].message.content.strip() | |
except Exception as e: | |
st.session_state['empathy_message'] = f"๊ณต๊ฐ ๋ฉ์์ง ์์ฑ ์ค ์ค๋ฅ: {e}" | |
# 2. Pinecone ๊ฒ์ ๋ฐ Khan ๋ฉํ ๋ต๋ณ | |
with st.spinner("Khan ๋ฉํ ๊ฐ ๋ธ์ ๋ค์ด๋ฉฐ..."): | |
pinecone_results = search(user_q, top_k=5, _index=index, _model=model) | |
st.session_state['pinecone_results'] = pinecone_results | |
khan_answer = generate_khan_answer(user_q, pinecone_results, openai_client) | |
st.session_state['khan_answer'] = khan_answer | |
# 3. ์ถ๊ฐ ์ง๋ฌธ ์์ฑ | |
with st.spinner("์ถ๊ฐ ์ง๋ฌธ์ ์์ฑํ๋ ์ค..."): | |
extra_prompt = ( | |
f"์๋ ์ง๋ฌธ์์ ์ ์ฌํ๊ฒ ๊ถ๊ธํ ์ ์๋ ์ถ๊ฐ ์ง๋ฌธ 3~4๊ฐ๋ฅผ ํ๊ตญ์ด๋ก ๋ง๋ค์ด์ค. ์ง๋์น๊ฒ ์ธ๋ถ์ ์ธ ํด์ ๋ํ ์๊ธฐ๋ณด๋ค๋ ํ๋ก๋ํธ, ํ๋ก์ ํธ, ๋ฆฌ๋์ญ์ ๋ํ ์ผ๋ฐ์ ์ธ ์ง๋ฌธ์ผ๋ก ๋ง๋ค์ด. ์ง๋ฌธ: \"{st.session_state['user_question']}" | |
) | |
try: | |
extra_response = openai_client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "system", "content": extra_prompt}], | |
temperature=0.5 | |
) | |
import re | |
raw = extra_response.choices[0].message.content.strip() | |
questions = re.findall(r'\d+\.\s*(.+)', raw) | |
if not questions: | |
questions = [q.strip('-โข ').strip() for q in raw.split('\n') if q.strip()] | |
st.session_state['extra_questions'] = questions[:4] | |
st.rerun() | |
except Exception as e: | |
st.session_state['extra_questions'] = [f"์ถ๊ฐ ์ง๋ฌธ ์์ฑ ์ค ์ค๋ฅ: {e}"] | |
st.rerun() | |
# --- ๋ต๋ณ ๋ฐ ์ถ๊ฐ์ง๋ฌธ UI --- | |
if st.session_state['user_question']: | |
st.info(st.session_state['empathy_message']) | |
st.subheader("๐ก Khan ๋ฉํ ์ ๋ต๋ณ") | |
st.markdown(st.session_state['khan_answer']) | |
# ์ฐธ๊ณ ์์ ์ ๋ณด ํ์ | |
pinecone_results = st.session_state['pinecone_results'] | |
if pinecone_results: | |
with st.expander("๋ต๋ณ์ ์ฐธ๊ณ ํ ์์ ์ ๋ณด ๋ณด๊ธฐ", expanded=True): | |
displayed_urls = set() | |
for i, r in enumerate(pinecone_results): | |
url = r.get('URL', 'N/A') | |
if url in displayed_urls or url == 'N/A': | |
continue | |
displayed_urls.add(url) | |
st.markdown(f"--- **์ฐธ๊ณ ์๋ฃ {len(displayed_urls)} (์ ์ฌ๋: {r['์ ์']:.4f})** ---") | |
st.markdown(f"**์ ๋ชฉ:** {r.get('์ ๋ชฉ', 'N/A')}") | |
st.markdown(f"**์์ฝ:** {r.get('์์ฝ', 'N/A')}") | |
timestamp = r.get('ํ์์คํฌํ', 'N/A') | |
is_youtube = url and isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url) | |
start_seconds = None | |
if is_youtube and timestamp and timestamp != 'N/A': | |
start_seconds = parse_timestamp_to_seconds(timestamp) | |
if is_youtube and start_seconds is not None: | |
try: | |
timestamped_link_url = add_timestamp_to_youtube_url(url, timestamp) | |
st.markdown(f"**์์ ๋งํฌ (ํ์์คํฌํ ํฌํจ):** [{timestamped_link_url}]({timestamped_link_url})") | |
except Exception as e: | |
logger.error(f"Error creating timestamped URL for link: {e}") | |
st.markdown(f"**์์ ๋งํฌ (์๋ณธ):** [{url}]({url})") | |
elif url != "N/A" and isinstance(url, str) and url.startswith("http"): | |
st.markdown(f"**URL:** [{url}]({url})") | |
else: | |
st.markdown(f"**URL:** {url}") | |
if is_youtube and url != "N/A": | |
col1, col2 = st.columns(2) | |
with col1: | |
try: | |
st.video(url, start_time=start_seconds or 0) | |
except Exception as e: | |
st.error(f"๋น๋์ค({url}) ์ฌ์ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
st.markdown(f"[YouTube์์ ๋ณด๊ธฐ]({url})") | |
elif url != "N/A": | |
col1, col2 = st.columns(2) | |
with col1: | |
try: | |
st.video(url) | |
except Exception as e: | |
logger.warning(f"st.video failed for non-YouTube URL {url}: {e}") | |
st.markdown("---") | |
# --- ์ถ๊ฐ ์ง๋ฌธ ์์ฑ ํ์ด๋ฐ ์ ์ด --- | |
if 'extra_questions_ready' not in st.session_state or not st.session_state['extra_questions_ready']: | |
# ๋ต๋ณ์ด ๋ ๋๋ง๋ ํ์๋ง spinner ๋๋ฆฌ๊ธฐ | |
st.session_state['extra_questions_ready'] = True | |
st.rerun() | |
elif not st.session_state['extra_questions']: | |
# ๋ฐฑ๊ทธ๋ผ์ด๋์์ ์ถ๊ฐ ์ง๋ฌธ ์์ฑ (์คํผ๋ ์์ด) | |
extra_prompt = ( | |
f"์๋ ์ง๋ฌธ์์ ์ ์ฌํ๊ฒ ๊ถ๊ธํ ์ ์๋ ์ถ๊ฐ ์ง๋ฌธ 3~4๊ฐ๋ฅผ ํ๊ตญ์ด๋ก ๋ง๋ค์ด์ค. ์ง๋์น๊ฒ ์ธ๋ถ์ ์ธ ํด์ ๋ํ ์๊ธฐ๋ณด๋ค๋ ํ๋ก๋ํธ, ํ๋ก์ ํธ, ๋ฆฌ๋์ญ์ ๋ํ ์ผ๋ฐ์ ์ธ ์ง๋ฌธ์ผ๋ก ๋ง๋ค์ด. ์ง๋ฌธ: \"{st.session_state['user_question']}" | |
) | |
try: | |
extra_response = openai_client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "system", "content": extra_prompt}], | |
temperature=0.5 | |
) | |
import re | |
raw = extra_response.choices[0].message.content.strip() | |
questions = re.findall(r'\d+\.\s*(.+)', raw) | |
if not questions: | |
questions = [q.strip('-โข ').strip() for q in raw.split('\n') if q.strip()] | |
st.session_state['extra_questions'] = questions[:4] | |
st.rerun() | |
except Exception as e: | |
st.session_state['extra_questions'] = [f"์ถ๊ฐ ์ง๋ฌธ ์์ฑ ์ค ์ค๋ฅ: {e}"] | |
else: | |
st.markdown("#### ์ถ๊ฐ๋ก ๊ถ๊ธํ ์ ์ด ์์ผ์ ๊ฐ์? ์๋ ์์ ์ง๋ฌธ์ ํด๋ฆญํ๊ฑฐ๋ ์ง์ ์ ๋ ฅํด๋ณด์ธ์!") | |
cols = st.columns(len(st.session_state['extra_questions'])) | |
for i, q in enumerate(st.session_state['extra_questions']): | |
if cols[i].button(q, key=f"extra_{i}"): | |
st.session_state['current_input'] = q | |
st.session_state['user_question'] = '' | |
st.rerun() | |
user_extra = st.text_input("์ง์ ์ถ๊ฐ ์ง๋ฌธ ์ ๋ ฅ", value="", key="extra_input") | |
if st.button("์ถ๊ฐ ์ง๋ฌธํ๊ธฐ", key="extra_btn"): | |
st.session_state['current_input'] = user_extra | |
st.session_state['user_question'] = '' | |
st.rerun() | |
st.markdown("---") | |
st.caption("Powered by Pinecone, Sentence Transformers, and OpenAI") | |
if st.button("๋ค๋ฅธ ๊ณ ๋ฏผ ์๋ดํ๊ธฐ"): | |
for k in ['user_question','empathy_message','khan_answer','pinecone_results','extra_questions','current_input','extra_questions_ready']: | |
st.session_state[k] = '' | |
st.rerun() | |
else: | |
st.title("๐ ์์ฌ์๊ฒ ์๋ณด์ด๊ธฐ: ๋ง์ถค ๋ณด๊ณ ๋ฌธ ๋ง๋ค๊ธฐ") | |
st.markdown("์์ฌ์ MBTI ์ฑํฅ์ ๋ง๊ฒ ๋ณด๊ณ ๋ฌธ์ ์๋์ผ๋ก ๋ค๋ฌ์ด๋๋ฆฝ๋๋ค.") | |
mbti_types = [ | |
"ISTJ", "ISFJ", "INFJ", "INTJ", | |
"ISTP", "ISFP", "INFP", "INTP", | |
"ESTP", "ESFP", "ENFP", "ENTP", | |
"ESTJ", "ESFJ", "ENFJ", "ENTJ" | |
] | |
mbti = st.selectbox("์์ฌ์ MBTI๋ฅผ ์ ํํ์ธ์", mbti_types) | |
user_report = st.text_area("์์ฌ์๊ฒ ๋ณด๊ณ ํ ๋ด์ฉ์ ์ ๋ ฅํ์ธ์ (300์ ์ด๋ด)", max_chars=300) | |
if st.button("MBTI ๋ง์ถค ๋ณด๊ณ ๋ฌธ ์์ฑ"): | |
if not user_report.strip(): | |
st.warning("๋ณด๊ณ ๋ฌธ์ ์ ๋ ฅํด ์ฃผ์ธ์.") | |
else: | |
with st.spinner("์์ฌ์ ์ฑํฅ์ ๋ง๊ฒ ๋ณด๊ณ ๋ฌธ์ ๋ค๋ฌ๋ ์ค..."): | |
prompt = f""" | |
์์ฌ์ MBTI๊ฐ {mbti}์ผ ๋, ์๋ ๋ณด๊ณ ๋ฌธ์ ๊ทธ ์ฑํฅ์ ๋ง๊ฒ ์์ ํด์ค.\n ์ด ์ ํ์ ์์ฌ๊ฐ ์ค์ํ๊ฒ ์๊ฐํ๋ ๊ฒ์ด ๋ณด๊ณ ์์ ๋น ์ ธ์๋ค๋ฉด ์ด๋ค ๋ถ๋ถ์ ๋ณด์ํด์ผ ํ๋์ง ์์ธํ ์ค๋ช ํด ์ค\n๊ทธ๋ฆฌ๊ณ ์ ๊ทธ๋ ๊ฒ ์์ ํ๋์ง ์ด์ ๋ ์ค๋ช ํด์ค.\n๋ณด๊ณ ๋ฌธ: "{user_report}" | |
\n์๋ ํ์์ผ๋ก ๋ต๋ณ ํด.\n์์ ๋ ๋ณด๊ณ ๋ฌธ: ...\n์ด์ : ... | |
""" | |
try: | |
response = openai_client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "system", "content": prompt}], | |
temperature=0.5, | |
) | |
answer = response.choices[0].message.content.strip() | |
# ๊ฐ๋จํ ํ์ฑ: "์์ ๋ ๋ณด๊ณ ๋ฌธ:" ~ "์ด์ :" ๋ถ๋ฆฌ | |
import re | |
mod_match = re.search(r"์์ ๋ ๋ณด๊ณ ๋ฌธ[:\n]*([\s\S]+?)์ด์ [:\n]", answer) | |
reason_match = re.search(r"์ด์ [:\n]*([\s\S]+)", answer) | |
if mod_match: | |
st.markdown(f"**์์ ๋ ๋ณด๊ณ ๋ฌธ**\n\n{mod_match.group(1).strip()}") | |
logger.info(f"[MBTI ๋ณด๊ณ ๋ฌธ] ์์ ๋ ๋ณด๊ณ ๋ฌธ: {mod_match.group(1).strip()}") | |
else: | |
st.markdown(f"**์์ ๋ ๋ณด๊ณ ๋ฌธ**\n\n{answer}") | |
logger.info(f"[MBTI ๋ณด๊ณ ๋ฌธ] ์์ ๋ ๋ณด๊ณ ๋ฌธ: {answer}") | |
if reason_match: | |
st.markdown(f"**์ด์ ์ค๋ช **\n\n{reason_match.group(1).strip()}") | |
logger.info(f"[MBTI ๋ณด๊ณ ๋ฌธ] ์ด์ ์ค๋ช : {reason_match.group(1).strip()}") | |
except Exception as e: | |
st.error(f"GPT ํธ์ถ ์ค ์ค๋ฅ: {e}") | |