Spaces:
Sleeping
Sleeping
| # ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ๋ถ๋ฌ์ค๊ธฐ | |
| import gradio as gr # Gradio: ์น ์ธํฐํ์ด์ค ๊ตฌ์ฑ์ ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ | |
| import requests # requests: HTTP ์์ฒญ์ ๋ณด๋ด๊ธฐ ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ | |
| from openai import OpenAI # OpenAI: Upstage Solar API์ ํธํ๋๋ ํด๋ผ์ด์ธํธ | |
| # ------------------------------ | |
| # ๐ ๋ฌธ์ ํ์ฑ ํจ์ ์ ์ | |
| # ------------------------------ | |
| def parse_document(file, api_key): | |
| """ | |
| ์ ๋ก๋๋ PDF ๋ฌธ์๋ฅผ HTML๋ก ๋ณํํ๋ ํจ์ (Upstage Document Parse API ์ฌ์ฉ) | |
| """ | |
| url = "https://api.upstage.ai/v1/document-ai/document-parse" # API ์์ฒญ URL | |
| headers = {'Authorization': f'Bearer {api_key}'} # ์ธ์ฆ ํค๋ ์ค์ | |
| files = {"document": open(file.name, "rb")} # ํ์ผ ์ฝ๊ธฐ | |
| data = { | |
| "base64_encoding": "['table']", # ํ ์ด๋ธ ๋ฐ์ดํฐ๋ base64๋ก ์ธ์ฝ๋ฉ | |
| "model": "document-parse" # ์ฌ์ฉ ๋ชจ๋ธ ๋ช ์ | |
| } | |
| response = requests.post(url, headers=headers, files=files, data=data) # POST ์์ฒญ | |
| result = response.json() # ์๋ต ๊ฒฐ๊ณผ ํ์ฑ | |
| html_text = result.get("content", {}).get("html", "") # HTML ์ถ์ถ | |
| return html_text | |
| # ------------------------------ | |
| # ๐ฌ ๋ฌธ์ ๊ธฐ๋ฐ Q&A ํจ์ ์ ์ | |
| # ------------------------------ | |
| def chat_with_document(history, html_text, user_question, api_key): | |
| """ | |
| ๋ฌธ์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ต๋ณํ๋ Solar LLM ํจ์ | |
| """ | |
| if not html_text.strip(): | |
| return history, history, "โ ๏ธ ๋จผ์ ๋ฌธ์๋ฅผ ๋ณํํด์ฃผ์ธ์." # ๋ฌธ์๊ฐ ์๋ ๊ฒฝ์ฐ ์๋ด | |
| # OpenAI ํด๋ผ์ด์ธํธ ์ด๊ธฐํ (Upstage Solar LLM) | |
| client = OpenAI( | |
| api_key=api_key, | |
| base_url="https://api.upstage.ai/v1" | |
| ) | |
| # ์ด์ ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ | |
| history = history or [] | |
| # ์์คํ ํ๋กฌํํธ: HTML ๋ฌธ์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ๋ต๋ณ ์์ฒญ | |
| system_prompt = f"""The following is a financial statement document extracted in HTML format. | |
| Please answer user questions accurately and concisely in Korean, based on the text within HTML tags. | |
| Document: | |
| {html_text} | |
| """ | |
| # ๋ฉ์์ง ๊ตฌ์ฑ (์์คํ โ ์ฌ์ฉ์/๋ด ๋ํ โ ํ์ฌ ์ง๋ฌธ) | |
| messages = [{"role": "system", "content": system_prompt}] | |
| for user, bot in history: | |
| messages.append({"role": "user", "content": user}) | |
| messages.append({"role": "assistant", "content": bot}) | |
| messages.append({"role": "user", "content": user_question}) | |
| # Solar LLM ํธ์ถ | |
| try: | |
| response = client.chat.completions.create( | |
| model="solar-pro", # ์ฌ์ฉํ ๋ชจ๋ธ ์ด๋ฆ | |
| messages=messages, # ์ ์ฒด ๋ฉ์์ง ์ ๋ฌ | |
| temperature=0, # ์ฐฝ์์ฑ ์ต์ํ | |
| max_tokens=1024 # ์ต๋ ์๋ต ๊ธธ์ด | |
| ) | |
| bot_reply = response.choices[0].message.content # ์๋ต ๋ฉ์์ง ์ถ์ถ | |
| except Exception as e: | |
| bot_reply = f"โ ๏ธ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" # ์๋ฌ ์ฒ๋ฆฌ | |
| # ๋ํ ์ด๋ ฅ ์ ๋ฐ์ดํธ ํ ๋ฐํ | |
| history.append((user_question, bot_reply)) | |
| return history, history, "" | |
| # ------------------------------ | |
| # ๐ HTML ๋ณด๊ธฐ ํ ๊ธ ํจ์ | |
| # ------------------------------ | |
| def toggle_html_view(current_html, is_visible): | |
| """ | |
| HTML ๋ณด๊ธฐ/์จ๊ธฐ๊ธฐ ์ํ๋ฅผ ํ ๊ธํ๋ ํจ์ | |
| """ | |
| return ( | |
| gr.update(value=current_html, visible=not is_visible), # ํ ์คํธ๋ฐ์ค ์จ๊ธฐ๊ธฐ/๋ณด์ด๊ธฐ | |
| gr.update(value=current_html, visible=is_visible), # HTML ๋ ๋๋ง ๋ฐ๋ ๋์ | |
| not is_visible # ์ํ ๋ฐ์ | |
| ) | |
| # ------------------------------ | |
| # ๐ฆ Gradio UI ๊ตฌ์ฑ | |
| # ------------------------------ | |
| with gr.Blocks() as demo: | |
| # ์ ๋ชฉ ๋ฐ ์ค๋ช ํ์ | |
| gr.Markdown("# ๐ ์ฌ๋ฌด์ ํ ๋ถ์ ์ฑ๋ด") | |
| gr.Markdown("1. Document Parse API๋ก PDF ๋ฌธ์๋ฅผ HTML๋ก ๋ณํํฉ๋๋ค.\n" | |
| "2. Solar LLM์ ํตํด ๋ฌธ์ ๊ธฐ๋ฐ ์ง๋ฌธ์ ๋ต๋ณํฉ๋๋ค.") | |
| # ๐ API Key ์ ๋ ฅ์ฐฝ (์ฌ์ฉ์๊ฐ ์ง์ ์ ๋ ฅ) | |
| api_key_input = gr.Textbox(label="๐ Upstage API Key", type="password", placeholder="Paste your API key here") | |
| # ๐ ํ์ผ ์ ๋ก๋ + ๋ฌธ์ ๋ณํ ๋ฒํผ | |
| with gr.Row(): | |
| file_input = gr.File(label="๐ ์ฌ๋ฌด์ ํ ์ ๋ก๋") | |
| parse_btn = gr.Button("๋ฌธ์ HTML ๋ณํ") | |
| # ๐ HTML ์ถ๋ ฅ ์์ญ (ํ ์คํธ + HTML ํ ๊ธ ๋ทฐ) | |
| html_output = gr.Textbox(label="๐ ๋ฌธ์ ๋ด์ฉ", lines=10, visible=True, elem_id="scrollable-html") | |
| html_display = gr.HTML(visible=False, elem_id="scrollable-html-display") | |
| toggle_html_btn = gr.Button("๐ HTML ๋ณด๊ธฐ ์ ํ") | |
| html_visible_state = gr.State(False) # ๋ณด๊ธฐ ์ํ ์ ์ฅ | |
| # ๋ฌธ์ ๋ณํ ๋ฒํผ ํด๋ฆญ ์ โ HTML ์์ฑ | |
| parse_btn.click( | |
| fn=parse_document, | |
| inputs=[file_input, api_key_input], | |
| outputs=html_output | |
| ) | |
| # HTML ๋ณด๊ธฐ ์ ํ ๋ฒํผ ํด๋ฆญ ์ โ ํ ๊ธ ๋์ ์คํ | |
| toggle_html_btn.click( | |
| fn=toggle_html_view, | |
| inputs=[html_output, html_visible_state], | |
| outputs=[html_output, html_display, html_visible_state] | |
| ) | |
| # ๐ฌ ์ฑ๋ด ์ธํฐํ์ด์ค | |
| chatbot = gr.Chatbot(label="๐ฌ ๋ฌธ์ ๊ธฐ๋ฐ Q&A", height=400) | |
| user_question = gr.Textbox(label="โ ์ง๋ฌธ์ ์ ๋ ฅํ์ธ์", lines=2) | |
| answer_btn = gr.Button("๋ต๋ณ ์์ฑ") | |
| chat_state = gr.State([]) # ๋ํ ์ํ ์ ์ฅ | |
| # ๐ก ์์ ์ง๋ฌธ ๋ฒํผ ๊ตฌ์ฑ | |
| with gr.Row(): | |
| gr.Markdown("๐ก ์์ ์ง๋ฌธ:") | |
| ex1 = gr.Button("์ด๋ค ๊ธฐ์ ์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?") | |
| ex2 = gr.Button("3๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?") | |
| # ์์ ์ง๋ฌธ ๋ฒํผ ํด๋ฆญ ์ โ ์ง๋ฌธ + ์๋ต ์คํ | |
| for btn, question in [(ex1, "์ด๋ค ๊ธฐ์ ์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?"), (ex2, "1๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?")]: | |
| btn.click( | |
| fn=lambda q=question: q, # ์ง๋ฌธ ํ ์คํธ ์ ๋ฌ | |
| inputs=[], | |
| outputs=user_question | |
| ).then( | |
| fn=chat_with_document, | |
| inputs=[chat_state, html_output, user_question, api_key_input], | |
| outputs=[chatbot, chat_state, user_question], | |
| show_progress=True | |
| ) | |
| # ์ฌ์ฉ์ ์ง๋ฌธ ์ ์ถ โ Solar LLM ๋ต๋ณ | |
| answer_btn.click( | |
| fn=chat_with_document, | |
| inputs=[chat_state, html_output, user_question, api_key_input], | |
| outputs=[chatbot, chat_state, user_question], | |
| show_progress=True | |
| ) | |
| # ------------------------------ | |
| # ๐จ ์คํฌ๋กค ๊ฐ๋ฅํ HTML ๋ฐ์ค ์คํ์ผ ์ง์ | |
| # ------------------------------ | |
| demo.css = """ | |
| #scrollable-html, #scrollable-html-display { | |
| max-height: 400px; | |
| overflow: auto; | |
| border: 1px solid #ccc; | |
| padding: 10px; | |
| } | |
| """ | |
| # ๐ ์ฑ ์คํ | |
| if __name__ == "__main__": | |
| demo.launch() |