p763nx9tf / keyword_search.py
ssboost's picture
Upload 11 files
1271db4 verified
"""
ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰๋Ÿ‰ ์กฐํšŒ ๊ด€๋ จ ๊ธฐ๋Šฅ
- ๋„ค์ด๋ฒ„ API๋ฅผ ํ†ตํ•œ ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰๋Ÿ‰ ์กฐํšŒ
- ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ
"""
import requests
import time
import random
from concurrent.futures import ThreadPoolExecutor, as_completed
import api_utils
import logging
# ๋กœ๊น… ์„ค์ •
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
def exponential_backoff_sleep(retry_count, base_delay=0.3, max_delay=5.0):
"""์ง€์ˆ˜ ๋ฐฑ์˜คํ”„ ๋ฐฉ์‹์˜ ๋Œ€๊ธฐ ์‹œ๊ฐ„ ๊ณ„์‚ฐ"""
delay = min(base_delay * (2 ** retry_count), max_delay)
# ์•ฝ๊ฐ„์˜ ๋žœ๋ค์„ฑ ์ถ”๊ฐ€ (์ง€ํ„ฐ)
jitter = random.uniform(0, 0.5) * delay
time.sleep(delay + jitter)
def fetch_search_volume_batch(keywords_batch):
"""ํ‚ค์›Œ๋“œ ๋ฐฐ์น˜์— ๋Œ€ํ•œ ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰๋Ÿ‰ ์กฐํšŒ"""
# 1. ์ŠคํŽ˜์ด์Šค๋ฐ” ์ œ๊ฑฐ ๊ฐœ์„  - ๋ฐฐ์น˜ ํ‚ค์›Œ๋“œ๋“ค ์ „์ฒ˜๋ฆฌ
cleaned_keywords_batch = []
for kw in keywords_batch:
cleaned_kw = kw.strip().replace(" ", "") if kw else ""
cleaned_keywords_batch.append(cleaned_kw)
keywords_batch = cleaned_keywords_batch
result = {}
max_retries = 3
retry_count = 0
while retry_count < max_retries:
try:
# ์ˆœ์ฐจ์ ์œผ๋กœ API ์„ค์ • ๊ฐ€์ ธ์˜ค๊ธฐ (๋ฐฐ์น˜๋งˆ๋‹ค ํ•œ ๋ฒˆ๋งŒ ํ˜ธ์ถœ)
api_config = api_utils.get_next_api_config()
API_KEY = api_config["API_KEY"]
SECRET_KEY = api_config["SECRET_KEY"]
CUSTOMER_ID_STR = api_config["CUSTOMER_ID"]
logger.debug(f"=== ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์ฒดํฌ (์‹œ๋„ #{retry_count+1}) ===")
logger.info(f"๋ฐฐ์น˜ ํฌ๊ธฐ: {len(keywords_batch)}๊ฐœ ํ‚ค์›Œ๋“œ")
# API ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ
is_valid, message = api_utils.validate_api_config(api_config)
if not is_valid:
logger.error(f"โŒ {message}")
retry_count += 1
exponential_backoff_sleep(retry_count)
continue
# CUSTOMER_ID๋ฅผ ์ •์ˆ˜๋กœ ๋ณ€ํ™˜
try:
CUSTOMER_ID = int(CUSTOMER_ID_STR)
except ValueError:
logger.error(f"โŒ CUSTOMER_ID ๋ณ€ํ™˜ ์˜ค๋ฅ˜: '{CUSTOMER_ID_STR}'๋Š” ์œ ํšจํ•œ ์ˆซ์ž๊ฐ€ ์•„๋‹™๋‹ˆ๋‹ค.")
retry_count += 1
exponential_backoff_sleep(retry_count)
continue
BASE_URL = "https://api.naver.com"
uri = "/keywordstool"
method = "GET"
headers = api_utils.get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
# ํ‚ค์›Œ๋“œ ๋ฐฐ์น˜๋ฅผ ํ•œ ๋ฒˆ์— API๋กœ ์ „์†ก
params = {
"hintKeywords": keywords_batch,
"showDetail": "1"
}
logger.debug(f"์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ: {len(keywords_batch)}๊ฐœ ํ‚ค์›Œ๋“œ")
# API ํ˜ธ์ถœ
response = requests.get(BASE_URL + uri, params=params, headers=headers, timeout=10)
logger.debug(f"์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
if response.status_code != 200:
logger.error(f"โŒ API ์˜ค๋ฅ˜ ์‘๋‹ต (์‹œ๋„ #{retry_count+1}):")
logger.error(f" ๋ณธ๋ฌธ: {response.text}")
retry_count += 1
exponential_backoff_sleep(retry_count)
continue
# ์‘๋‹ต ๋ฐ์ดํ„ฐ ํŒŒ์‹ฑ
result_data = response.json()
logger.debug(f"์‘๋‹ต ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ:")
logger.debug(f" ํƒ€์ž…: {type(result_data)}")
logger.debug(f" ํ‚ค๋“ค: {result_data.keys() if isinstance(result_data, dict) else 'N/A'}")
if isinstance(result_data, dict) and "keywordList" in result_data:
logger.debug(f" keywordList ๊ธธ์ด: {len(result_data['keywordList'])}")
# ๋ฐฐ์น˜ ๋‚ด ๊ฐ ํ‚ค์›Œ๋“œ์™€ ๋งค์นญ
for keyword in keywords_batch:
found = False
for item in result_data["keywordList"]:
rel_keyword = item.get("relKeyword", "")
if rel_keyword == keyword:
pc_count = item.get("monthlyPcQcCnt", 0)
mobile_count = item.get("monthlyMobileQcCnt", 0)
# ์ˆซ์ž ๋ณ€ํ™˜
try:
if isinstance(pc_count, str):
pc_count_converted = int(pc_count.replace(",", ""))
else:
pc_count_converted = int(pc_count)
except:
pc_count_converted = 0
try:
if isinstance(mobile_count, str):
mobile_count_converted = int(mobile_count.replace(",", ""))
else:
mobile_count_converted = int(mobile_count)
except:
mobile_count_converted = 0
total_count = pc_count_converted + mobile_count_converted
result[keyword] = {
"PC๊ฒ€์ƒ‰๋Ÿ‰": pc_count_converted,
"๋ชจ๋ฐ”์ผ๊ฒ€์ƒ‰๋Ÿ‰": mobile_count_converted,
"์ด๊ฒ€์ƒ‰๋Ÿ‰": total_count
}
logger.debug(f"โœ… '{keyword}': PC={pc_count_converted}, Mobile={mobile_count_converted}, Total={total_count}")
found = True
break
if not found:
logger.warning(f"โŒ '{keyword}': ๋งค์นญ๋˜๋Š” ๋ฐ์ดํ„ฐ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
# ์„ฑ๊ณต์ ์œผ๋กœ ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์™”์œผ๋ฏ€๋กœ ๋ฃจํ”„ ์ข…๋ฃŒ
break
else:
logger.error(f"โŒ keywordList๊ฐ€ ์—†์Œ (์‹œ๋„ #{retry_count+1})")
logger.error(f"์ „์ฒด ์‘๋‹ต: {result_data}")
retry_count += 1
exponential_backoff_sleep(retry_count)
except Exception as e:
logger.error(f"โŒ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ (์‹œ๋„ #{retry_count+1}): {str(e)}")
import traceback
logger.error(traceback.format_exc())
retry_count += 1
exponential_backoff_sleep(retry_count)
logger.info(f"\n=== ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ์™„๋ฃŒ ===")
logger.info(f"์„ฑ๊ณต์ ์œผ๋กœ ์ฒ˜๋ฆฌ๋œ ํ‚ค์›Œ๋“œ ์ˆ˜: {len(result)}")
return result
def fetch_all_search_volumes(keywords, batch_size=5):
"""ํ‚ค์›Œ๋“œ ๋ฆฌ์ŠคํŠธ์— ๋Œ€ํ•œ ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ณ‘๋ ฌ ์กฐํšŒ"""
results = {}
batches = []
# ํ‚ค์›Œ๋“œ๋ฅผ 5๊ฐœ์”ฉ ๋ฌถ์–ด์„œ ๋ฐฐ์น˜ ์ƒ์„ฑ
for i in range(0, len(keywords), batch_size):
batch = keywords[i:i + batch_size]
batches.append(batch)
logger.info(f"์ด {len(batches)}๊ฐœ ๋ฐฐ์น˜๋กœ {len(keywords)}๊ฐœ ํ‚ค์›Œ๋“œ ์ฒ˜๋ฆฌ ์ค‘โ€ฆ")
logger.info(f"๋ฐฐ์น˜ ํฌ๊ธฐ: {batch_size}, ๋ณ‘๋ ฌ ์›Œ์ปค: 3๊ฐœ, API ๊ณ„์ •: {len(api_utils.NAVER_API_CONFIGS)}๊ฐœ ์ˆœ์ฐจ ์‚ฌ์šฉ")
with ThreadPoolExecutor(max_workers=3) as executor: # ์›Œ์ปค ์ˆ˜ ์ œํ•œ
futures = {executor.submit(fetch_search_volume_batch, batch): batch for batch in batches}
for future in as_completed(futures):
batch = futures[future]
try:
batch_results = future.result()
results.update(batch_results)
logger.info(f"๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {len(batch)}๊ฐœ ํ‚ค์›Œ๋“œ (์„ฑ๊ณต: {len(batch_results)}๊ฐœ)")
except Exception as e:
logger.error(f"๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
# API ๋ ˆ์ดํŠธ ๋ฆฌ๋ฐ‹ ๋ฐฉ์ง€๋ฅผ ์œ„ํ•œ ์ง€์ˆ˜ ๋ฐฑ์˜คํ”„ ์‚ฌ์šฉ
exponential_backoff_sleep(0) # ์ดˆ๊ธฐ ์ง€์—ฐ ์ ์šฉ
logger.info(f"๊ฒ€์ƒ‰๋Ÿ‰ ์กฐํšŒ ์™„๋ฃŒ: {len(results)}๊ฐœ ํ‚ค์›Œ๋“œ")
return results