Spaces:

Kims12
/

blog

Sleeping

File size: 2,114 Bytes

4e5b597

import requests
from bs4 import BeautifulSoup
import gradio as gr

def convert_to_mobile_url(url):
    """
    PC URL을 모바일 URL로 변환.
    """
    if "m.blog.naver.com" not in url:
        if "blog.naver.com" in url:
            url_parts = url.split("/")
            if len(url_parts) >= 5:
                user_id = url_parts[3]
                post_id = url_parts[4]
                return f"https://m.blog.naver.com/{user_id}/{post_id}"
    return url

def scrape_naver_blog(url):
    """
    네이버 블로그의 제목과 내용(텍스트만) 스크래핑.
    """
    try:
        # 모바일 URL 변환
        mobile_url = convert_to_mobile_url(url)
        print(f"Converted Mobile URL: {mobile_url}")

        response = requests.get(mobile_url)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        # 제목 스크래핑
        title_element = soup.find("div", class_="se-module se-module-text se-title-text")
        title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없음"

        # 본문 내용 스크래핑
        content_elements = soup.find_all("div", class_="se-module se-module-text")
        content = "\n".join(
            elem.get_text(strip=True) for elem in content_elements
        ) if content_elements else "내용을 찾을 수 없음"

        # 디버깅 메시지 출력
        print(f"Scraped Title: {title}")
        print(f"Scraped Content: {content}")

        # 결과 반환
        result = f"제목: {title}\n\n내용: {content}"
        return result

    except Exception as e:
        print(f"Error: {e}")
        return f"Error: {e}"

# Gradio 인터페이스 정의
def run_scraper(url):
    return scrape_naver_blog(url)

interface = gr.Interface(
    fn=run_scraper,
    inputs=gr.Textbox(label="네이버 블로그 URL"),
    outputs=gr.Textbox(label="스크래핑 결과"),
    title="네이버 블로그 스크래핑",
    description="네이버 블로그의 제목과 내용을 스크래핑합니다."
)

if __name__ == "__main__":
    interface.launch()