Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import os | |
import time | |
import threading | |
import tempfile | |
import logging | |
import random | |
import uuid | |
import shutil | |
import glob | |
from datetime import datetime | |
import sys | |
import types | |
# λ‘κΉ μ€μ | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.StreamHandler(), | |
logging.FileHandler('main_keyword_app.log', mode='a') | |
] | |
) | |
logger = logging.getLogger(__name__) | |
# νκ²½λ³μμμ λͺ¨λ μ½λ λ‘λ λ° λμ μμ± | |
def load_module_from_env(module_name, env_var_name): | |
"""νκ²½λ³μμμ λͺ¨λ μ½λλ₯Ό λ‘λνμ¬ λμ μΌλ‘ λͺ¨λ μμ±""" | |
try: | |
module_code = os.getenv(env_var_name) | |
if not module_code: | |
raise ValueError(f"νκ²½λ³μ {env_var_name}κ° μ€μ λμ§ μμμ΅λλ€.") | |
# μ λͺ¨λ μμ± | |
module = types.ModuleType(module_name) | |
# λͺ¨λμ νμν κΈ°λ³Έ μν¬νΈλ€ μΆκ° | |
module.__dict__.update({ | |
'os': __import__('os'), | |
'time': __import__('time'), | |
'logging': __import__('logging'), | |
'pandas': __import__('pandas'), | |
'requests': __import__('requests'), | |
'tempfile': __import__('tempfile'), | |
'threading': __import__('threading'), | |
're': __import__('re'), | |
'random': __import__('random'), | |
'uuid': __import__('uuid'), | |
'shutil': __import__('shutil'), | |
'glob': __import__('glob'), | |
'datetime': __import__('datetime'), | |
'types': __import__('types'), | |
'collections': __import__('collections'), | |
'Counter': __import__('collections').Counter, | |
'defaultdict': __import__('collections').defaultdict, | |
'hmac': __import__('hmac'), | |
'hashlib': __import__('hashlib'), | |
'base64': __import__('base64'), | |
}) | |
# μ½λ μ€ν | |
exec(module_code, module.__dict__) | |
# μμ€ν λͺ¨λμ λ±λ‘ | |
sys.modules[module_name] = module | |
logger.info(f"β λͺ¨λ {module_name} λ‘λ μλ£") | |
return module | |
except Exception as e: | |
logger.error(f"β λͺ¨λ {module_name} λ‘λ μ€ν¨: {e}") | |
raise | |
# νμν λͺ¨λλ€μ νκ²½λ³μμμ λ‘λ | |
logger.info("π λͺ¨λ λ‘λ μμ...") | |
try: | |
# 1. api_utils λͺ¨λ λ‘λ | |
api_utils = load_module_from_env('api_utils', 'API_UTILS_CODE') | |
# 2. text_utils λͺ¨λ λ‘λ (λ€λ₯Έ λͺ¨λλ€μ΄ μμ‘΄νλ―λ‘ λ¨Όμ λ‘λ) | |
text_utils = load_module_from_env('text_utils', 'TEXT_UTILS_CODE') | |
# 3. keyword_search λͺ¨λ λ‘λ | |
keyword_search = load_module_from_env('keyword_search', 'KEYWORD_SEARCH_CODE') | |
# 4. product_search λͺ¨λ λ‘λ (text_utils, keyword_search μμ‘΄) | |
product_search_module = load_module_from_env('product_search', 'PRODUCT_SEARCH_CODE') | |
# product_search λͺ¨λμ μμ‘΄μ± μ£Όμ | |
product_search_module.api_utils = api_utils | |
product_search_module.text_utils = text_utils | |
product_search = product_search_module | |
# 5. keyword_processor λͺ¨λ λ‘λ | |
keyword_processor_module = load_module_from_env('keyword_processor', 'KEYWORD_PROCESSOR_CODE') | |
# keyword_processor λͺ¨λμ μμ‘΄μ± μ£Όμ | |
keyword_processor_module.text_utils = text_utils | |
keyword_processor_module.keyword_search = keyword_search | |
keyword_processor_module.product_search = product_search | |
keyword_processor = keyword_processor_module | |
# 6. export_utils λͺ¨λ λ‘λ | |
export_utils = load_module_from_env('export_utils', 'EXPORT_UTILS_CODE') | |
# 7. category_analysis λͺ¨λ λ‘λ (λͺ¨λ λͺ¨λ μμ‘΄) | |
category_analysis_module = load_module_from_env('category_analysis', 'CATEGORY_ANALYSIS_CODE') | |
# category_analysis λͺ¨λμ μμ‘΄μ± μ£Όμ | |
category_analysis_module.text_utils = text_utils | |
category_analysis_module.product_search = product_search | |
category_analysis_module.keyword_search = keyword_search | |
category_analysis = category_analysis_module | |
logger.info("β λͺ¨λ λͺ¨λ λ‘λ μλ£") | |
except Exception as e: | |
logger.error(f"β λͺ¨λ λ‘λ μ€ μΉλͺ μ μ€λ₯: {e}") | |
logger.error("νμν νκ²½λ³μλ€μ΄ μ€μ λμλμ§ νμΈνμΈμ:") | |
logger.error("- API_UTILS_CODE") | |
logger.error("- TEXT_UTILS_CODE") | |
logger.error("- KEYWORD_SEARCH_CODE") | |
logger.error("- PRODUCT_SEARCH_CODE") | |
logger.error("- KEYWORD_PROCESSOR_CODE") | |
logger.error("- EXPORT_UTILS_CODE") | |
logger.error("- CATEGORY_ANALYSIS_CODE") | |
raise | |
# μΈμ λ³ μμ νμΌ κ΄λ¦¬λ₯Ό μν λμ λ리 | |
session_temp_files = {} | |
session_data = {} | |
def cleanup_huggingface_temp_folders(): | |
"""νκΉ νμ΄μ€ μμ ν΄λ μ΄κΈ° μ 리""" | |
try: | |
# μΌλ°μ μΈ μμ λλ ν λ¦¬λ€ | |
temp_dirs = [ | |
tempfile.gettempdir(), | |
"/tmp", | |
"/var/tmp", | |
os.path.join(os.getcwd(), "temp"), | |
os.path.join(os.getcwd(), "tmp"), | |
"/gradio_cached_examples", | |
"/flagged" | |
] | |
cleanup_count = 0 | |
for temp_dir in temp_dirs: | |
if os.path.exists(temp_dir): | |
try: | |
# κΈ°μ‘΄ μΈμ νμΌλ€ μ 리 | |
session_files = glob.glob(os.path.join(temp_dir, "session_*.xlsx")) | |
session_files.extend(glob.glob(os.path.join(temp_dir, "session_*.csv"))) | |
session_files.extend(glob.glob(os.path.join(temp_dir, "*keyword*.xlsx"))) | |
session_files.extend(glob.glob(os.path.join(temp_dir, "*keyword*.csv"))) | |
session_files.extend(glob.glob(os.path.join(temp_dir, "tmp*.xlsx"))) | |
session_files.extend(glob.glob(os.path.join(temp_dir, "tmp*.csv"))) | |
for file_path in session_files: | |
try: | |
# νμΌμ΄ 1μκ° μ΄μ μ€λλ κ²½μ°λ§ μμ | |
if os.path.getmtime(file_path) < time.time() - 3600: | |
os.remove(file_path) | |
cleanup_count += 1 | |
logger.info(f"μ΄κΈ° μ 리: μ€λλ μμ νμΌ μμ - {file_path}") | |
except Exception as e: | |
logger.warning(f"νμΌ μμ μ€ν¨ (무μλ¨): {file_path} - {e}") | |
except Exception as e: | |
logger.warning(f"μμ λλ ν 리 μ 리 μ€ν¨ (무μλ¨): {temp_dir} - {e}") | |
logger.info(f"β νκΉ νμ΄μ€ μμ ν΄λ μ΄κΈ° μ 리 μλ£ - {cleanup_count}κ° νμΌ μμ ") | |
# Gradio μΊμ ν΄λλ μ 리 | |
try: | |
gradio_temp_dir = os.path.join(os.getcwd(), "gradio_cached_examples") | |
if os.path.exists(gradio_temp_dir): | |
shutil.rmtree(gradio_temp_dir, ignore_errors=True) | |
logger.info("Gradio μΊμ ν΄λ μ 리 μλ£") | |
except Exception as e: | |
logger.warning(f"Gradio μΊμ ν΄λ μ 리 μ€ν¨ (무μλ¨): {e}") | |
except Exception as e: | |
logger.error(f"μ΄κΈ° μμ ν΄λ μ 리 μ€ μ€λ₯ (κ³μ μ§ν): {e}") | |
def setup_clean_temp_environment(): | |
"""κΉ¨λν μμ νκ²½ μ€μ """ | |
try: | |
# 1. κΈ°μ‘΄ μμ νμΌλ€ μ 리 | |
cleanup_huggingface_temp_folders() | |
# 2. μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 μμ± | |
app_temp_dir = os.path.join(tempfile.gettempdir(), "keyword_app") | |
if os.path.exists(app_temp_dir): | |
shutil.rmtree(app_temp_dir, ignore_errors=True) | |
os.makedirs(app_temp_dir, exist_ok=True) | |
# 3. νκ²½ λ³μ μ€μ (μμ λλ ν 리 μ§μ ) | |
os.environ['KEYWORD_APP_TEMP'] = app_temp_dir | |
logger.info(f"β μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 μ€μ : {app_temp_dir}") | |
return app_temp_dir | |
except Exception as e: | |
logger.error(f"μμ νκ²½ μ€μ μ€ν¨: {e}") | |
return tempfile.gettempdir() | |
def get_app_temp_dir(): | |
"""μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 λ°ν""" | |
return os.environ.get('KEYWORD_APP_TEMP', tempfile.gettempdir()) | |
def get_session_id(): | |
"""μΈμ ID μμ±""" | |
return str(uuid.uuid4()) | |
def cleanup_session_files(session_id, delay=300): | |
"""μΈμ λ³ μμ νμΌ μ 리 ν¨μ""" | |
def cleanup(): | |
time.sleep(delay) | |
if session_id in session_temp_files: | |
files_to_remove = session_temp_files[session_id].copy() | |
del session_temp_files[session_id] | |
for file_path in files_to_remove: | |
try: | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
logger.info(f"μΈμ {session_id[:8]}... μμ νμΌ μμ : {file_path}") | |
except Exception as e: | |
logger.error(f"μΈμ {session_id[:8]}... νμΌ μμ μ€λ₯: {e}") | |
threading.Thread(target=cleanup, daemon=True).start() | |
def register_session_file(session_id, file_path): | |
"""μΈμ λ³ νμΌ λ±λ‘""" | |
if session_id not in session_temp_files: | |
session_temp_files[session_id] = [] | |
session_temp_files[session_id].append(file_path) | |
def cleanup_old_sessions(): | |
"""μ€λλ μΈμ λ°μ΄ν° μ 리""" | |
current_time = time.time() | |
sessions_to_remove = [] | |
for session_id, data in session_data.items(): | |
if current_time - data.get('last_activity', 0) > 3600: # 1μκ° μ΄κ³Ό | |
sessions_to_remove.append(session_id) | |
for session_id in sessions_to_remove: | |
# νμΌ μ 리 | |
if session_id in session_temp_files: | |
for file_path in session_temp_files[session_id]: | |
try: | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
logger.info(f"μ€λλ μΈμ {session_id[:8]}... νμΌ μμ : {file_path}") | |
except Exception as e: | |
logger.error(f"μ€λλ μΈμ νμΌ μμ μ€λ₯: {e}") | |
del session_temp_files[session_id] | |
# μΈμ λ°μ΄ν° μ 리 | |
if session_id in session_data: | |
del session_data[session_id] | |
logger.info(f"μ€λλ μΈμ λ°μ΄ν° μμ : {session_id[:8]}...") | |
def update_session_activity(session_id): | |
"""μΈμ νλ μκ° μ λ°μ΄νΈ""" | |
if session_id not in session_data: | |
session_data[session_id] = {} | |
session_data[session_id]['last_activity'] = time.time() | |
def create_session_temp_file(session_id, suffix='.xlsx'): | |
"""μΈμ λ³ μμ νμΌ μμ± (μ μ© λλ ν 리 μ¬μ©)""" | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
random_suffix = str(random.randint(1000, 9999)) | |
# μ ν리μΌμ΄μ μ μ© μμ λλ ν 리 μ¬μ© | |
temp_dir = get_app_temp_dir() | |
filename = f"session_{session_id[:8]}_{timestamp}_{random_suffix}{suffix}" | |
temp_file_path = os.path.join(temp_dir, filename) | |
# λΉ νμΌ μμ± | |
with open(temp_file_path, 'w') as f: | |
pass | |
register_session_file(session_id, temp_file_path) | |
return temp_file_path | |
def wrapper_modified(keyword, korean_only, apply_main_keyword_option, exclude_zero_volume, session_id): | |
"""ν€μλ κ²μ λ° μ²λ¦¬ λνΌ ν¨μ (μΈμ ID μΆκ°)""" | |
update_session_activity(session_id) | |
# νμ¬ ν€μλ μ¬μ© (μΈμ λ³λ‘ κ΄λ¦¬) | |
current_keyword = keyword | |
# ν€μλκ° λΉμ΄μλ κ²½μ° μ²λ¦¬ | |
if not keyword: | |
return (gr.update(value=""), gr.update(choices=["μ 체 보기"]), gr.update(choices=["μ 체"]), | |
None, gr.update(choices=["μ 체 보기"], value="μ 체 보기"), None, | |
gr.update(visible=False), gr.update(visible=False), current_keyword) | |
# λ€μ΄λ² μΌν API κ²μ μν | |
search_results = product_search.fetch_naver_shopping_data(keyword, korean_only, apply_main_keyword_option == "λ©μΈν€μλ μ μ©") | |
# κ²μ κ²°κ³Όκ° μλ κ²½μ° | |
if not search_results.get("product_list"): | |
return (gr.update(value="<p>κ²μ κ²°κ³Όκ° μμ΅λλ€. λ€λ₯Έ ν€μλλ‘ μλν΄λ³΄μΈμ.</p>"), | |
gr.update(choices=["μ 체 보기"]), gr.update(choices=["μ 체"]), | |
None, gr.update(choices=["μ 체 보기"], value="μ 체 보기"), None, | |
gr.update(visible=False), gr.update(visible=False), current_keyword) | |
# κ²μ κ²°κ³Ό μ²λ¦¬ - ν€μλ μ λ¬ λ° κ²μλ 0 ν€μλ μ μΈ μ΅μ μ λ¬ | |
result = keyword_processor.process_search_results(search_results, current_keyword, exclude_zero_volume) | |
df_products = result["products_df"] | |
df_keywords = result["keywords_df"] | |
category_list = result["categories"] | |
if df_keywords.empty: | |
return (gr.update(value="<p>μΆμΆλ ν€μλκ° μμ΅λλ€. λ€λ₯Έ μ΅μ μΌλ‘ μλν΄λ³΄μΈμ.</p>"), | |
gr.update(choices=["μ 체 보기"]), gr.update(choices=["μ 체"]), | |
df_keywords, gr.update(choices=["μ 체 보기"], value="μ 체 보기"), None, | |
gr.update(visible=False), gr.update(visible=False), current_keyword) | |
# HTML ν μ΄λΈ μμ± | |
html = export_utils.create_table_without_checkboxes(df_keywords) | |
# νν°λ§μ μν κ³ μ κ° λ¦¬μ€νΈ μμ± | |
volume_range_choices = ["μ 체"] + sorted(df_keywords["κ²μλꡬκ°"].unique().tolist()) | |
# λΆμν μΉ΄ν κ³ λ¦¬ λλ‘λ€μ΄λ κ°μ μ νμ§λ‘ μ λ°μ΄νΈ | |
first_category = category_list[0] if category_list else "μ 체 보기" | |
# μΈμ λ³ μμ νμΌ μμ± | |
excel_path = create_session_excel_file(df_keywords, session_id) | |
# λΆμ μΉμ νμ | |
return (gr.update(value=html), gr.update(choices=category_list), gr.update(choices=volume_range_choices), | |
df_keywords, gr.update(choices=category_list, value=first_category), excel_path, | |
gr.update(visible=True), gr.update(visible=True), current_keyword) | |
def create_session_excel_file(df, session_id): | |
"""μΈμ λ³ μμ νμΌ μμ±""" | |
try: | |
excel_path = create_session_temp_file(session_id, '.xlsx') | |
df.to_excel(excel_path, index=False, engine='openpyxl') | |
logger.info(f"μΈμ {session_id[:8]}... μμ νμΌ μμ±: {excel_path}") | |
return excel_path | |
except Exception as e: | |
logger.error(f"μΈμ λ³ μμ νμΌ μμ± μ€λ₯: {e}") | |
return None | |
def analyze_with_auto_download(analysis_keywords, selected_category, state_df, session_id): | |
"""μΉ΄ν κ³ λ¦¬ μΌμΉ λΆμ μ€ν λ° μλ λ€μ΄λ‘λ (μΈμ ID μΆκ°)""" | |
update_session_activity(session_id) | |
# λΆμν ν€μλλ μΉ΄ν κ³ λ¦¬κ° μλ κ²½μ° | |
if not analysis_keywords or not selected_category: | |
return "ν€μλμ μΉ΄ν κ³ λ¦¬λ₯Ό λͺ¨λ μ νν΄μ£ΌμΈμ.", None, gr.update(visible=False) | |
# λΆμ μ€ν - λμ λ‘λ©λ category_analysis λͺ¨λ μ¬μ© | |
analysis_result = category_analysis.analyze_keywords_by_category(analysis_keywords, selected_category, state_df) | |
# μΈμ λ³ μμ νμΌ μμ± | |
excel_path = create_session_excel_file(state_df, session_id) | |
# λΆμ κ²°κ³Ό μΆλ ₯ μΉμ νμ | |
return analysis_result, excel_path, gr.update(visible=True) | |
def filter_and_sort_table(df, selected_cat, keyword_sort, total_volume_sort, usage_count_sort, selected_volume_range, exclude_zero_volume, session_id): | |
"""ν μ΄λΈ νν°λ§ λ° μ λ ¬ ν¨μ (μΈμ ID μΆκ°)""" | |
update_session_activity(session_id) | |
if df is None or df.empty: | |
return "" | |
# νν°λ§ μ μ© | |
filtered_df = df.copy() | |
# μΉ΄ν κ³ λ¦¬ νν° μ μ© | |
if selected_cat and selected_cat != "μ 체 보기": | |
cat_name_to_filter = selected_cat.rsplit(" (", 1)[0] | |
filtered_df = filtered_df[filtered_df["κ΄λ ¨ μΉ΄ν κ³ λ¦¬"].astype(str).str.contains(cat_name_to_filter, case=False, na=False)] | |
def get_filtered_category_display(current_categories_str): | |
if pd.isna(current_categories_str): | |
return "" | |
categories = str(current_categories_str).split('\n') | |
matched_categories = [cat for cat in categories if cat_name_to_filter.lower() in cat.lower()] | |
if matched_categories: | |
return "\n".join(matched_categories) | |
return current_categories_str | |
filtered_df['κ΄λ ¨ μΉ΄ν κ³ λ¦¬'] = filtered_df['κ΄λ ¨ μΉ΄ν κ³ λ¦¬'].apply(get_filtered_category_display) | |
# κ²μλ κ΅¬κ° νν° μ μ© | |
if selected_volume_range and selected_volume_range != "μ 체": | |
filtered_df = filtered_df[filtered_df["κ²μλꡬκ°"] == selected_volume_range] | |
# κ²μλ 0 μ μΈ νν° μ μ© | |
if exclude_zero_volume: | |
filtered_df = filtered_df[filtered_df["μ΄κ²μλ"] > 0] | |
logger.info(f"μΈμ {session_id[:8]}... κ²μλ 0 μ μΈ νν° μ μ© - λ¨μ ν€μλ μ: {len(filtered_df)}") | |
# μ λ ¬ μ μ© | |
if keyword_sort != "μ λ ¬ μμ": | |
is_ascending = keyword_sort == "μ€λ¦μ°¨μ" | |
filtered_df = filtered_df.sort_values(by="μ‘°ν© ν€μλ", ascending=is_ascending) | |
if total_volume_sort != "μ λ ¬ μμ": | |
is_ascending = total_volume_sort == "μ€λ¦μ°¨μ" | |
filtered_df = filtered_df.sort_values(by="μ΄κ²μλ", ascending=is_ascending) | |
# ν€μλ μ¬μ©νμ μ λ ¬ μ μ© | |
if usage_count_sort != "μ λ ¬ μμ": | |
is_ascending = usage_count_sort == "μ€λ¦μ°¨μ" | |
filtered_df = filtered_df.sort_values(by="ν€μλ μ¬μ©νμ", ascending=is_ascending) | |
# μλ²μ 1λΆν° μμ°¨μ μΌλ‘ μ μ§νκΈ° μν΄ ν μΈλ±μ€ μ¬μ€μ | |
filtered_df = filtered_df.reset_index(drop=True) | |
# μλ²μ ν¬ν¨ν HTML ν μ΄λΈ μμ± | |
html = export_utils.create_table_without_checkboxes(filtered_df) | |
return html | |
def update_category_selection(selected_cat, session_id): | |
"""μΉ΄ν κ³ λ¦¬ νν° μ ν μ λΆμν μΉ΄ν κ³ λ¦¬λ κ°μ κ°μΌλ‘ μ λ°μ΄νΈ""" | |
update_session_activity(session_id) | |
logger.debug(f"μΈμ {session_id[:8]}... μΉ΄ν κ³ λ¦¬ μ ν λ³κ²½: {selected_cat}") | |
return gr.update(value=selected_cat) | |
def reset_interface(session_id): | |
"""μΈν°νμ΄μ€ 리μ ν¨μ - μΈμ λ³ λ°μ΄ν° μ΄κΈ°ν""" | |
update_session_activity(session_id) | |
# μΈμ λ³ μμ νμΌ μ 리 | |
if session_id in session_temp_files: | |
for file_path in session_temp_files[session_id]: | |
try: | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
logger.info(f"μΈμ {session_id[:8]}... 리μ μ νμΌ μμ : {file_path}") | |
except Exception as e: | |
logger.error(f"μΈμ {session_id[:8]}... 리μ μ νμΌ μμ μ€λ₯: {e}") | |
session_temp_files[session_id] = [] | |
return ( | |
"", # κ²μ ν€μλ | |
True, # νκΈλ§ μΆμΆ | |
False, # κ²μλ 0 ν€μλ μ μΈ | |
"λ©μΈν€μλ μ μ©", # μ‘°ν© λ°©μ | |
"", # HTML ν μ΄λΈ | |
["μ 체 보기"], # μΉ΄ν κ³ λ¦¬ νν° | |
"μ 체 보기", # μΉ΄ν κ³ λ¦¬ νν° μ ν | |
["μ 체"], # κ²μλ κ΅¬κ° νν° | |
"μ 체", # κ²μλ κ΅¬κ° μ ν | |
"μ λ ¬ μμ", # μ΄κ²μλ μ λ ¬ | |
"μ λ ¬ μμ", # ν€μλ μ¬μ©νμ μ λ ¬ | |
None, # μν DataFrame | |
["μ 체 보기"], # λΆμν μΉ΄ν κ³ λ¦¬ | |
"μ 체 보기", # λΆμν μΉ΄ν κ³ λ¦¬ μ ν | |
"", # ν€μλ μ λ ₯ | |
"", # λΆμ κ²°κ³Ό | |
None, # λ€μ΄λ‘λ νμΌ | |
gr.update(visible=False), # ν€μλ λΆμ μΉμ | |
gr.update(visible=False), # λΆμ κ²°κ³Ό μΆλ ₯ μΉμ | |
"" # ν€μλ μν | |
) | |
# λνΌ ν¨μλ€λ μΈμ ID μΆκ° | |
def search_with_loading(keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id): | |
update_session_activity(session_id) | |
return ( | |
gr.update(visible=True), | |
gr.update(visible=False) | |
) | |
def process_search_results(keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id): | |
update_session_activity(session_id) | |
result = wrapper_modified(keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id) | |
table_html, cat_choices, vol_choices, df, selected_cat, excel, keyword_section_vis, cat_section_vis, new_keyword_state = result | |
if not isinstance(df, type(None)) and not df.empty: | |
empty_placeholder_vis = False | |
keyword_section_visibility = True | |
execution_section_visibility = True | |
else: | |
empty_placeholder_vis = True | |
keyword_section_visibility = False | |
execution_section_visibility = False | |
return ( | |
table_html, cat_choices, vol_choices, df, selected_cat, excel, | |
gr.update(visible=keyword_section_visibility), | |
gr.update(visible=cat_section_vis), | |
gr.update(visible=False), | |
gr.update(visible=empty_placeholder_vis), | |
gr.update(visible=execution_section_visibility), | |
new_keyword_state | |
) | |
def analyze_with_loading(analysis_keywords, selected_category, state_df, session_id): | |
update_session_activity(session_id) | |
return gr.update(visible=True) | |
def process_analyze_results(analysis_keywords, selected_category, state_df, session_id): | |
update_session_activity(session_id) | |
results = analyze_with_auto_download(analysis_keywords, selected_category, state_df, session_id) | |
return results + (gr.update(visible=False),) | |
# μΈμ μ 리 μ€μΌμ€λ¬ | |
def start_session_cleanup_scheduler(): | |
"""μΈμ μ 리 μ€μΌμ€λ¬ μμ""" | |
def cleanup_scheduler(): | |
while True: | |
time.sleep(600) # 10λΆλ§λ€ μ€ν | |
cleanup_old_sessions() | |
# μΆκ°λ‘ νκΉ νμ΄μ€ μμ ν΄λλ μ£ΌκΈ°μ μ 리 | |
cleanup_huggingface_temp_folders() | |
threading.Thread(target=cleanup_scheduler, daemon=True).start() | |
def cleanup_on_startup(): | |
"""μ ν리μΌμ΄μ μμ μ μ 체 μ 리""" | |
logger.info("π§Ή μ ν리μΌμ΄μ μμ - μ΄κΈ° μ 리 μμ μμ...") | |
# 1. νκΉ νμ΄μ€ μμ ν΄λ μ 리 | |
cleanup_huggingface_temp_folders() | |
# 2. κΉ¨λν μμ νκ²½ μ€μ | |
app_temp_dir = setup_clean_temp_environment() | |
# 3. μ μ λ³μ μ΄κΈ°ν | |
global session_temp_files, session_data | |
session_temp_files.clear() | |
session_data.clear() | |
logger.info(f"β μ΄κΈ° μ 리 μμ μλ£ - μ± μ μ© λλ ν 리: {app_temp_dir}") | |
return app_temp_dir | |
# Gradio μΈν°νμ΄μ€ μμ± | |
def create_app(): | |
fontawesome_html = """ | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"> | |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/orioncactus/pretendard/dist/web/static/pretendard.css"> | |
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700&display=swap"> | |
""" | |
# CSS νμΌ λ‘λ | |
try: | |
with open('style.css', 'r', encoding='utf-8') as f: | |
custom_css = f.read() | |
except: | |
custom_css = """ | |
:root { | |
--primary-color: #FB7F0D; | |
--secondary-color: #ff9a8b; | |
} | |
.custom-button { | |
background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)) !important; | |
color: white !important; | |
border-radius: 30px !important; | |
height: 45px !important; | |
font-size: 16px !important; | |
font-weight: bold !important; | |
width: 100% !important; | |
text-align: center !important; | |
display: flex !important; | |
align-items: center !important; | |
justify-content: center !important; | |
} | |
.reset-button { | |
background: linear-gradient(135deg, #6c757d, #495057) !important; | |
color: white !important; | |
border-radius: 30px !important; | |
height: 45px !important; | |
font-size: 16px !important; | |
font-weight: bold !important; | |
width: 100% !important; | |
text-align: center !important; | |
display: flex !important; | |
align-items: center !important; | |
justify-content: center !important; | |
} | |
.section-title { | |
border-bottom: 2px solid #FB7F0D; | |
font-weight: bold; | |
padding-bottom: 5px; | |
margin-bottom: 15px; | |
} | |
.loading-indicator { | |
display: flex; | |
align-items: center; | |
justify-content: center; | |
padding: 15px; | |
background-color: #f8f9fa; | |
border-radius: 5px; | |
margin: 10px 0; | |
border: 1px solid #ddd; | |
} | |
.loading-spinner { | |
border: 4px solid rgba(0, 0, 0, 0.1); | |
width: 24px; | |
height: 24px; | |
border-radius: 50%; | |
border-left-color: #FB7F0D; | |
animation: spin 1s linear infinite; | |
margin-right: 10px; | |
} | |
@keyframes spin { | |
0% { transform: rotate(0deg); } | |
100% { transform: rotate(360deg); } | |
} | |
.progress-bar { | |
height: 10px; | |
background-color: #FB7F0D; | |
border-radius: 5px; | |
width: 0%; | |
animation: progressAnim 2s ease-in-out infinite; | |
} | |
@keyframes progressAnim { | |
0% { width: 10%; } | |
50% { width: 70%; } | |
100% { width: 10%; } | |
} | |
.empty-table { | |
width: 100%; | |
border-collapse: collapse; | |
font-size: 14px; | |
margin-top: 20px; | |
} | |
.empty-table th { | |
background-color: #FB7F0D; | |
color: white; | |
text-align: left; | |
padding: 12px; | |
border: 1px solid #ddd; | |
} | |
.empty-table td { | |
padding: 10px; | |
border: 1px solid #ddd; | |
text-align: center; | |
color: #999; | |
} | |
.button-container { | |
margin-top: 20px; | |
display: flex; | |
gap: 15px; | |
} | |
.execution-section { | |
margin-top: 20px; | |
background-color: #f9f9f9; | |
border-radius: 8px; | |
padding: 15px; | |
border: 1px solid #e5e5e5; | |
} | |
.session-info { | |
background-color: #e8f4f8; | |
padding: 8px 12px; | |
border-radius: 4px; | |
font-size: 12px; | |
color: #0c5460; | |
margin-bottom: 10px; | |
text-align: center; | |
} | |
""" | |
with gr.Blocks(css=custom_css, theme=gr.themes.Default( | |
primary_hue="orange", | |
secondary_hue="orange", | |
font=[gr.themes.GoogleFont("Noto Sans KR"), "ui-sans-serif", "system-ui"] | |
)) as demo: | |
gr.HTML(fontawesome_html) | |
# μΈμ ID μν (κ° μ¬μ©μλ³λ‘ κ³ μ ) | |
session_id = gr.State(get_session_id) | |
# ν€μλ μν κ΄λ¦¬ | |
keyword_state = gr.State("") | |
# μ λ ₯ μΉμ | |
with gr.Column(elem_classes="custom-frame fade-in"): | |
gr.HTML('<div class="section-title"><i class="fas fa-search"></i> κ²μ μ λ ₯</div>') | |
with gr.Row(): | |
with gr.Column(scale=1): | |
keyword = gr.Textbox( | |
label="λ©μΈ ν€μλ", | |
placeholder="μ: μ€μ§μ΄" | |
) | |
with gr.Column(scale=1): | |
search_btn = gr.Button( | |
"λ©μΈν€μλ λΆμ", | |
elem_classes="custom-button" | |
) | |
with gr.Accordion("μ΅μ μ€μ ", open=False): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
korean_only = gr.Checkbox( | |
label="νκΈλ§ μΆμΆ", | |
value=True | |
) | |
with gr.Column(scale=1): | |
exclude_zero_volume = gr.Checkbox( | |
label="κ²μλ 0 ν€μλ μ μΈ", | |
value=False | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
apply_main_keyword = gr.Radio( | |
["λ©μΈν€μλ μ μ©", "λ©μΈν€μλ λ―Έμ μ©"], | |
label="μ‘°ν© λ°©μ", | |
value="λ©μΈν€μλ μ μ©" | |
) | |
with gr.Column(scale=1): | |
gr.HTML("") | |
# μ§ν μν νμ μΉμ | |
with gr.Column(elem_classes="custom-frame fade-in", visible=False) as progress_section: | |
gr.HTML('<div class="section-title"><i class="fas fa-spinner"></i> λΆμ μ§ν μν</div>') | |
progress_html = gr.HTML(""" | |
<div style="padding: 15px; background-color: #f9f9f9; border-radius: 5px; margin: 10px 0; border: 1px solid #ddd;"> | |
<div style="margin-bottom: 10px; display: flex; align-items: center;"> | |
<i class="fas fa-spinner fa-spin" style="color: #FB7F0D; margin-right: 10px;"></i> | |
<span>ν€μλ λ°μ΄ν°λ₯Ό λΆμμ€μ λλ€. μ μλ§ κΈ°λ€λ €μ£ΌμΈμ...</span> | |
</div> | |
<div style="background-color: #e9ecef; height: 10px; border-radius: 5px; overflow: hidden;"> | |
<div class="progress-bar"></div> | |
</div> | |
</div> | |
""") | |
# λ©μΈν€μλ λΆμ κ²°κ³Ό μΉμ | |
with gr.Column(elem_classes="custom-frame fade-in") as main_keyword_section: | |
gr.HTML('<div class="section-title"><i class="fas fa-table"></i> λ©μΈν€μλ λΆμ κ²°κ³Ό</div>') | |
empty_table_html = gr.HTML(""" | |
<table class="empty-table"> | |
<thead> | |
<tr> | |
<th>μλ²</th> | |
<th>μ‘°ν© ν€μλ</th> | |
<th>PCκ²μλ</th> | |
<th>λͺ¨λ°μΌκ²μλ</th> | |
<th>μ΄κ²μλ</th> | |
<th>κ²μλꡬκ°</th> | |
<th>ν€μλ μ¬μ©μμμ</th> | |
<th>ν€μλ μ¬μ©νμ</th> | |
<th>μν λ±λ‘ μΉ΄ν κ³ λ¦¬</th> | |
</tr> | |
</thead> | |
<tbody> | |
<tr> | |
<td colspan="9" style="padding: 30px; text-align: center;"> | |
κ²μμ μ€ννλ©΄ μ¬κΈ°μ κ²°κ³Όκ° νμλ©λλ€ | |
</td> | |
</tr> | |
</tbody> | |
</table> | |
""") | |
with gr.Column(visible=False) as keyword_analysis_section: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
category_filter = gr.Dropdown( | |
choices=["μ 체 보기"], | |
label="μΉ΄ν κ³ λ¦¬ νν°", | |
value="μ 체 보기", | |
interactive=True | |
) | |
with gr.Column(scale=1): | |
total_volume_sort = gr.Dropdown( | |
choices=["μ λ ¬ μμ", "μ€λ¦μ°¨μ", "λ΄λ¦Όμ°¨μ"], | |
label="μ΄κ²μλ μ λ ¬", | |
value="μ λ ¬ μμ", | |
interactive=True | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
search_volume_filter = gr.Dropdown( | |
choices=["μ 체"], | |
label="κ²μλ κ΅¬κ° νν°", | |
value="μ 체", | |
interactive=True | |
) | |
with gr.Column(scale=1): | |
usage_count_sort = gr.Dropdown( | |
choices=["μ λ ¬ μμ", "μ€λ¦μ°¨μ", "λ΄λ¦Όμ°¨μ"], | |
label="ν€μλ μ¬μ©νμ μ λ ¬", | |
value="μ λ ¬ μμ", | |
interactive=True | |
) | |
gr.HTML("<div class='data-container' id='table_container'></div>") | |
table_output = gr.HTML(elem_classes="fade-in") | |
# μΉ΄ν κ³ λ¦¬ λΆμ μΉμ | |
with gr.Column(elem_classes="custom-frame fade-in", visible=False) as category_analysis_section: | |
gr.HTML('<div class="section-title"><i class="fas fa-chart-bar"></i> ν€μλ λΆμ</div>') | |
with gr.Row(): | |
with gr.Column(scale=1): | |
analysis_keywords = gr.Textbox( | |
label="ν€μλ μ λ ₯ (μ΅λ 20κ°, μΌν λλ μν°λ‘ ꡬλΆ)", | |
placeholder="μ: μ€μ§μ΄λ³Άμ, μ€μ§μ΄ μμ§, μ€μ§μ΄ μ리...", | |
lines=5 | |
) | |
with gr.Column(scale=1): | |
selected_category = gr.Dropdown( | |
label="λΆμν μΉ΄ν κ³ λ¦¬(λΆμ μ λ°λμ μ νν΄μ£ΌμΈμ)", | |
choices=["μ 체 보기"], | |
value="μ 체 보기", | |
interactive=True | |
) | |
# μ€ν μΉμ | |
with gr.Column(elem_classes="execution-section", visible=False) as execution_section: | |
gr.HTML('<div class="section-title"><i class="fas fa-play-circle"></i> μ€ν</div>') | |
with gr.Row(): | |
with gr.Column(scale=1): | |
analyze_btn = gr.Button( | |
"μΉ΄ν κ³ λ¦¬ μΌμΉ λΆμ", | |
elem_classes=["execution-button", "primary-button"] | |
) | |
with gr.Column(scale=1): | |
reset_btn = gr.Button( | |
"λͺ¨λ μ λ ₯ μ΄κΈ°ν", | |
elem_classes=["execution-button", "secondary-button"] | |
) | |
# λΆμ κ²°κ³Ό μΆλ ₯ μΉμ | |
with gr.Column(elem_classes="custom-frame fade-in", visible=False) as analysis_output_section: | |
gr.HTML('<div class="section-title"><i class="fas fa-list-ul"></i> λΆμ κ²°κ³Ό μμ½</div>') | |
analysis_result = gr.HTML(elem_classes="fade-in") | |
with gr.Row(): | |
download_output = gr.File( | |
label="ν€μλ λͺ©λ‘ λ€μ΄λ‘λ", | |
visible=True | |
) | |
# μν μ μ₯μ© λ³μ | |
state_df = gr.State() | |
# μ΄λ²€νΈ μ°κ²° - λͺ¨λ ν¨μμ session_id μΆκ° | |
search_btn.click( | |
fn=search_with_loading, | |
inputs=[keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id], | |
outputs=[progress_section, empty_table_html] | |
).then( | |
fn=process_search_results, | |
inputs=[keyword, korean_only, apply_main_keyword, exclude_zero_volume, session_id], | |
outputs=[ | |
table_output, category_filter, search_volume_filter, | |
state_df, selected_category, download_output, | |
keyword_analysis_section, category_analysis_section, | |
progress_section, empty_table_html, execution_section, | |
keyword_state | |
] | |
) | |
# νν° λ° μ λ ¬ λ³κ²½ μ΄λ²€νΈ μ°κ²° - session_id μΆκ° | |
category_filter.change( | |
fn=filter_and_sort_table, | |
inputs=[ | |
state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
total_volume_sort, usage_count_sort, | |
search_volume_filter, exclude_zero_volume, session_id | |
], | |
outputs=[table_output] | |
) | |
category_filter.change( | |
fn=update_category_selection, | |
inputs=[category_filter, session_id], | |
outputs=[selected_category] | |
) | |
total_volume_sort.change( | |
fn=filter_and_sort_table, | |
inputs=[ | |
state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
total_volume_sort, usage_count_sort, | |
search_volume_filter, exclude_zero_volume, session_id | |
], | |
outputs=[table_output] | |
) | |
usage_count_sort.change( | |
fn=filter_and_sort_table, | |
inputs=[ | |
state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
total_volume_sort, usage_count_sort, | |
search_volume_filter, exclude_zero_volume, session_id | |
], | |
outputs=[table_output] | |
) | |
search_volume_filter.change( | |
fn=filter_and_sort_table, | |
inputs=[ | |
state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
total_volume_sort, usage_count_sort, | |
search_volume_filter, exclude_zero_volume, session_id | |
], | |
outputs=[table_output] | |
) | |
exclude_zero_volume.change( | |
fn=filter_and_sort_table, | |
inputs=[ | |
state_df, category_filter, gr.Textbox(value="μ λ ¬ μμ", visible=False), | |
total_volume_sort, usage_count_sort, | |
search_volume_filter, exclude_zero_volume, session_id | |
], | |
outputs=[table_output] | |
) | |
# μΉ΄ν κ³ λ¦¬ λΆμ λ²νΌ μ΄λ²€νΈ - session_id μΆκ° | |
analyze_btn.click( | |
fn=analyze_with_loading, | |
inputs=[analysis_keywords, selected_category, state_df, session_id], | |
outputs=[progress_section] | |
).then( | |
fn=process_analyze_results, | |
inputs=[analysis_keywords, selected_category, state_df, session_id], | |
outputs=[analysis_result, download_output, analysis_output_section, progress_section] | |
) | |
# 리μ λ²νΌ μ΄λ²€νΈ μ°κ²° - session_id μΆκ° | |
reset_btn.click( | |
fn=reset_interface, | |
inputs=[session_id], | |
outputs=[ | |
keyword, korean_only, exclude_zero_volume, apply_main_keyword, | |
table_output, category_filter, category_filter, | |
search_volume_filter, search_volume_filter, | |
total_volume_sort, usage_count_sort, | |
state_df, selected_category, selected_category, | |
analysis_keywords, analysis_result, download_output, | |
keyword_analysis_section, analysis_output_section, | |
keyword_state | |
] | |
) | |
return demo | |
if __name__ == "__main__": | |
# ========== μμ μ μ 체 μ΄κΈ°ν ========== | |
logger.info("π λ©μΈν€μλ λΆμ μ ν리μΌμ΄μ μμ...") | |
# 1. 첫 λ²μ§Έ: νκΉ νμ΄μ€ μμ ν΄λ μ 리 λ° νκ²½ μ€μ | |
app_temp_dir = cleanup_on_startup() | |
# 2. μΈμ μ 리 μ€μΌμ€λ¬ μμ | |
start_session_cleanup_scheduler() | |
# 3. API μ€μ μ΄κΈ°ν | |
try: | |
api_utils.initialize_api_configs() | |
except Exception as e: | |
logger.warning(f"API μ€μ μ΄κΈ°ν μ€ μ€λ₯ (κ³μ μ§ν): {e}") | |
# 4. Gemini λͺ¨λΈ μ΄κΈ°ν | |
try: | |
gemini_model = text_utils.get_gemini_model() | |
except Exception as e: | |
logger.warning(f"Gemini λͺ¨λΈ μ΄κΈ°ν μ€ μ€λ₯ (κ³μ μ§ν): {e}") | |
logger.info("===== λ©ν°μ μ λ©μΈν€μλ λΆμ Application Startup at %s =====", time.strftime("%Y-%m-%d %H:%M:%S")) | |
logger.info(f"π μμ νμΌ μ μ₯ μμΉ: {app_temp_dir}") | |
# ========== μ± μ€ν ========== | |
try: | |
app = create_app() | |
app.launch( | |
share=False, # 보μμ μν΄ share λΉνμ±ν | |
server_name="0.0.0.0", # λͺ¨λ IPμμ μ κ·Ό νμ© | |
server_port=7860, # ν¬νΈ μ§μ | |
max_threads=40, # λ©ν°μ μ λ₯Ό μν μ€λ λ μ μ¦κ° | |
auth=None, # νμμ μΈμ¦ μΆκ° κ°λ₯ | |
show_error=True, # μλ¬ νμ | |
quiet=False, # λ‘κ·Έ νμ | |
favicon_path=None, # νλΉμ½ μ€μ | |
ssl_verify=False # SSL κ²μ¦ λΉνμ±ν (κ°λ°μ©) | |
) | |
except Exception as e: | |
logger.error(f"μ ν리μΌμ΄μ μ€ν μ€ν¨: {e}") | |
raise | |
finally: | |
# μ ν리μΌμ΄μ μ’ λ£ μ μ 리 | |
logger.info("π§Ή μ ν리μΌμ΄μ μ’ λ£ - μ΅μ’ μ 리 μμ ...") | |
try: | |
cleanup_huggingface_temp_folders() | |
if os.path.exists(app_temp_dir): | |
shutil.rmtree(app_temp_dir, ignore_errors=True) | |
logger.info("β μ΅μ’ μ 리 μλ£") | |
except Exception as e: | |
logger.error(f"μ΅μ’ μ 리 μ€ μ€λ₯: {e}") |