|
""" |
|
ํค์๋ ๊ฒ์๋ ์กฐํ ๊ด๋ จ ๊ธฐ๋ฅ |
|
- ๋ค์ด๋ฒ API๋ฅผ ํตํ ํค์๋ ๊ฒ์๋ ์กฐํ |
|
- ๊ฒ์๋ ๋ฐฐ์น ์ฒ๋ฆฌ |
|
""" |
|
|
|
import requests |
|
import time |
|
import random |
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
import api_utils |
|
import logging |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
logger.setLevel(logging.INFO) |
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
handler = logging.StreamHandler() |
|
handler.setFormatter(formatter) |
|
logger.addHandler(handler) |
|
|
|
def exponential_backoff_sleep(retry_count, base_delay=0.3, max_delay=5.0): |
|
"""์ง์ ๋ฐฑ์คํ ๋ฐฉ์์ ๋๊ธฐ ์๊ฐ ๊ณ์ฐ""" |
|
delay = min(base_delay * (2 ** retry_count), max_delay) |
|
|
|
jitter = random.uniform(0, 0.5) * delay |
|
time.sleep(delay + jitter) |
|
|
|
def fetch_search_volume_batch(keywords_batch): |
|
"""ํค์๋ ๋ฐฐ์น์ ๋ํ ๋ค์ด๋ฒ ๊ฒ์๋ ์กฐํ""" |
|
|
|
|
|
cleaned_keywords_batch = [] |
|
for kw in keywords_batch: |
|
cleaned_kw = kw.strip().replace(" ", "") if kw else "" |
|
cleaned_keywords_batch.append(cleaned_kw) |
|
|
|
keywords_batch = cleaned_keywords_batch |
|
|
|
result = {} |
|
max_retries = 3 |
|
retry_count = 0 |
|
|
|
while retry_count < max_retries: |
|
try: |
|
|
|
api_config = api_utils.get_next_api_config() |
|
API_KEY = api_config["API_KEY"] |
|
SECRET_KEY = api_config["SECRET_KEY"] |
|
CUSTOMER_ID_STR = api_config["CUSTOMER_ID"] |
|
|
|
logger.debug(f"=== ํ๊ฒฝ ๋ณ์ ์ฒดํฌ (์๋ #{retry_count+1}) ===") |
|
logger.info(f"๋ฐฐ์น ํฌ๊ธฐ: {len(keywords_batch)}๊ฐ ํค์๋") |
|
|
|
|
|
is_valid, message = api_utils.validate_api_config(api_config) |
|
if not is_valid: |
|
logger.error(f"โ {message}") |
|
retry_count += 1 |
|
exponential_backoff_sleep(retry_count) |
|
continue |
|
|
|
|
|
try: |
|
CUSTOMER_ID = int(CUSTOMER_ID_STR) |
|
except ValueError: |
|
logger.error(f"โ CUSTOMER_ID ๋ณํ ์ค๋ฅ: '{CUSTOMER_ID_STR}'๋ ์ ํจํ ์ซ์๊ฐ ์๋๋๋ค.") |
|
retry_count += 1 |
|
exponential_backoff_sleep(retry_count) |
|
continue |
|
|
|
BASE_URL = "https://api.naver.com" |
|
uri = "/keywordstool" |
|
method = "GET" |
|
headers = api_utils.get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID) |
|
|
|
|
|
params = { |
|
"hintKeywords": keywords_batch, |
|
"showDetail": "1" |
|
} |
|
|
|
logger.debug(f"์์ฒญ ํ๋ผ๋ฏธํฐ: {len(keywords_batch)}๊ฐ ํค์๋") |
|
|
|
|
|
response = requests.get(BASE_URL + uri, params=params, headers=headers, timeout=10) |
|
|
|
logger.debug(f"์๋ต ์ํ ์ฝ๋: {response.status_code}") |
|
|
|
if response.status_code != 200: |
|
logger.error(f"โ API ์ค๋ฅ ์๋ต (์๋ #{retry_count+1}):") |
|
logger.error(f" ๋ณธ๋ฌธ: {response.text}") |
|
retry_count += 1 |
|
exponential_backoff_sleep(retry_count) |
|
continue |
|
|
|
|
|
result_data = response.json() |
|
|
|
logger.debug(f"์๋ต ๋ฐ์ดํฐ ๊ตฌ์กฐ:") |
|
logger.debug(f" ํ์
: {type(result_data)}") |
|
logger.debug(f" ํค๋ค: {result_data.keys() if isinstance(result_data, dict) else 'N/A'}") |
|
|
|
if isinstance(result_data, dict) and "keywordList" in result_data: |
|
logger.debug(f" keywordList ๊ธธ์ด: {len(result_data['keywordList'])}") |
|
|
|
|
|
for keyword in keywords_batch: |
|
found = False |
|
for item in result_data["keywordList"]: |
|
rel_keyword = item.get("relKeyword", "") |
|
if rel_keyword == keyword: |
|
pc_count = item.get("monthlyPcQcCnt", 0) |
|
mobile_count = item.get("monthlyMobileQcCnt", 0) |
|
|
|
|
|
try: |
|
if isinstance(pc_count, str): |
|
pc_count_converted = int(pc_count.replace(",", "")) |
|
else: |
|
pc_count_converted = int(pc_count) |
|
except: |
|
pc_count_converted = 0 |
|
|
|
try: |
|
if isinstance(mobile_count, str): |
|
mobile_count_converted = int(mobile_count.replace(",", "")) |
|
else: |
|
mobile_count_converted = int(mobile_count) |
|
except: |
|
mobile_count_converted = 0 |
|
|
|
total_count = pc_count_converted + mobile_count_converted |
|
|
|
result[keyword] = { |
|
"PC๊ฒ์๋": pc_count_converted, |
|
"๋ชจ๋ฐ์ผ๊ฒ์๋": mobile_count_converted, |
|
"์ด๊ฒ์๋": total_count |
|
} |
|
logger.debug(f"โ
'{keyword}': PC={pc_count_converted}, Mobile={mobile_count_converted}, Total={total_count}") |
|
found = True |
|
break |
|
|
|
if not found: |
|
logger.warning(f"โ '{keyword}': ๋งค์นญ๋๋ ๋ฐ์ดํฐ๋ฅผ ์ฐพ์ ์ ์์") |
|
|
|
|
|
break |
|
else: |
|
logger.error(f"โ keywordList๊ฐ ์์ (์๋ #{retry_count+1})") |
|
logger.error(f"์ ์ฒด ์๋ต: {result_data}") |
|
retry_count += 1 |
|
exponential_backoff_sleep(retry_count) |
|
|
|
except Exception as e: |
|
logger.error(f"โ ๋ฐฐ์น ์ฒ๋ฆฌ ์ค ์ค๋ฅ (์๋ #{retry_count+1}): {str(e)}") |
|
import traceback |
|
logger.error(traceback.format_exc()) |
|
retry_count += 1 |
|
exponential_backoff_sleep(retry_count) |
|
|
|
logger.info(f"\n=== ๋ฐฐ์น ์ฒ๋ฆฌ ์๋ฃ ===") |
|
logger.info(f"์ฑ๊ณต์ ์ผ๋ก ์ฒ๋ฆฌ๋ ํค์๋ ์: {len(result)}") |
|
|
|
return result |
|
|
|
def fetch_all_search_volumes(keywords, batch_size=5): |
|
"""ํค์๋ ๋ฆฌ์คํธ์ ๋ํ ๋ค์ด๋ฒ ๊ฒ์๋ ๋ณ๋ ฌ ์กฐํ""" |
|
results = {} |
|
batches = [] |
|
|
|
|
|
for i in range(0, len(keywords), batch_size): |
|
batch = keywords[i:i + batch_size] |
|
batches.append(batch) |
|
|
|
logger.info(f"์ด {len(batches)}๊ฐ ๋ฐฐ์น๋ก {len(keywords)}๊ฐ ํค์๋ ์ฒ๋ฆฌ ์คโฆ") |
|
logger.info(f"๋ฐฐ์น ํฌ๊ธฐ: {batch_size}, ๋ณ๋ ฌ ์์ปค: 3๊ฐ, API ๊ณ์ : {len(api_utils.NAVER_API_CONFIGS)}๊ฐ ์์ฐจ ์ฌ์ฉ") |
|
|
|
with ThreadPoolExecutor(max_workers=3) as executor: |
|
futures = {executor.submit(fetch_search_volume_batch, batch): batch for batch in batches} |
|
for future in as_completed(futures): |
|
batch = futures[future] |
|
try: |
|
batch_results = future.result() |
|
results.update(batch_results) |
|
logger.info(f"๋ฐฐ์น ์ฒ๋ฆฌ ์๋ฃ: {len(batch)}๊ฐ ํค์๋ (์ฑ๊ณต: {len(batch_results)}๊ฐ)") |
|
except Exception as e: |
|
logger.error(f"๋ฐฐ์น ์ฒ๋ฆฌ ์ค๋ฅ: {e}") |
|
|
|
exponential_backoff_sleep(0) |
|
|
|
logger.info(f"๊ฒ์๋ ์กฐํ ์๋ฃ: {len(results)}๊ฐ ํค์๋") |
|
return results |