Spaces:

Rooobert
/

MOMO_PCHOME_Pytrend_R

Sleeping

File size: 8,621 Bytes

import os
import requests
import pandas as pd
import streamlit as st
import json
import time
from pytrends.request import TrendReq
import plotly.express as px
import plotly.graph_objects as go
from tenacity import retry, wait_exponential, stop_after_attempt

# Set up Streamlit app title
st.title("🐣MOMO 🆚 PCHOME 商品搜索和 Google Trends 分析👁️‍🗨️")

# Get user input for keyword
keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")

# Get date range input for Google Trends
start_date = st.date_input("請選擇開始日期", value=pd.to_datetime("2024-08-01"))
end_date = st.date_input("請選擇結束日期", value=pd.to_datetime("2024-08-11"))
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)

# Format timeframe for Google Trends
search_timeframe = f"{start_date} {end_date}"

# Create a button to start the scraping process
if st.button("開始搜索"):
    start_time = time.time()

    # MOMO scraping
    momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
    momo_headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
    }
    momo_payload = {
        "host": "momoshop",
        "flag": "searchEngine",
        "data": {
            "searchValue": keyword,
            "curPage": str(page_number),
            "priceS": "0",
            "priceE": "9999999",
            "searchType": "1"
        }
    }
    momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
    
    if momo_response.status_code == 200:
        momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
        momo_product_list = []
        for product in momo_data:
            name = product.get('goodsName', '')
            price = product.get('goodsPrice', '')
            price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
            try:
                product_price = float(price_str)
            except ValueError:
                product_price = 0
            momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
        
        momo_df = pd.DataFrame(momo_product_list)
        st.write("MOMO 商品數據:", momo_df)
        
        # MOMO data analysis
        momo_avg_price = momo_df['price'].mean()
        st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
        st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
        st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
        
        # MOMO visualization with Plotly
        fig = px.scatter(momo_df[:70], x='title', y='price', hover_data=['title'],
                         title=f'MOMO 電商網站上 "{keyword}" 的銷售價格',
                         labels={'title': '商品名稱', 'price': '價格'})
        fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(momo_df[:70]))), ticktext=momo_df['title'][:70])
        fig.add_hline(y=momo_avg_price, line_dash="dash", line_color="red", 
                      annotation_text=f"參考價格: {momo_avg_price:.2f}", 
                      annotation_position="bottom right")
        fig.update_layout(height=600)
        st.plotly_chart(fig)

        # MOMO Sunburst Chart
        momo_sunburst_data = momo_df.copy()
        momo_sunburst_data['price_range'] = pd.cut(momo_sunburst_data['price'], 
                                                   bins=[0, 1000, 5000, 10000, 50000, float('inf')], 
                                                   labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+'])
        fig = px.sunburst(momo_sunburst_data, path=['price_range', 'title'], values='price',
                          title=f'MOMO "{keyword}" 價格分佈 (Sunburst 圖)')
        fig.update_layout(height=800)
        st.plotly_chart(fig)
    else:
        st.error(f"MOMO 請求失敗，狀態碼: {momo_response.status_code}")
    
    # PCHOME scraping
    pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
    pchome_data = pd.DataFrame()
    
    for i in range(1, page_number + 1):
        pchome_url = f'{pchome_base_url}{keyword}&page={i}&sort=sale/dc'
        pchome_response = requests.get(pchome_url)
        if pchome_response.status_code == 200:
            pchome_json_data = json.loads(pchome_response.content)
            pchome_df = pd.DataFrame(pchome_json_data['prods'])
            
            # Safely select only available columns
            available_columns = ['name', 'describe', 'price', 'orig']
            selected_columns = [col for col in available_columns if col in pchome_df.columns]
            pchome_df = pchome_df[selected_columns]
            if 'orig' in pchome_df.columns:
                pchome_df = pchome_df.rename(columns={'orig': 'original_price'})
            pchome_df['platform'] = 'PCHOME'  # Add platform identifier
            pchome_df['price'] = pchome_df['price'].astype(float)  # Ensure price is float
            pchome_data = pd.concat([pchome_data, pchome_df])
            time.sleep(1)
        else:
            st.error(f"PCHOME 請求失敗，狀態碼: {pchome_response.status_code}")
    
    if not pchome_data.empty:
        st.write("PCHOME 商品數據:", pchome_data)
        
        # PCHOME data analysis
        pchome_avg_price = pchome_data['price'].mean()
        st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
        st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
        st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
        
        # PCHOME visualization with Plotly
        fig = px.scatter(pchome_data[:70], x='name', y='price', hover_data=['name'],
                         title=f'PCHOME 電商網站上 "{keyword}" 的銷售價格',
                         labels={'name': '商品名稱', 'price': '價格'})
        fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(pchome_data[:70]))), ticktext=pchome_data['name'][:70])
        fig.add_hline(y=pchome_avg_price, line_dash="dash", line_color="red", 
                      annotation_text=f"參考價格: {pchome_avg_price:.2f}", 
                      annotation_position="bottom right")
        fig.update_layout(height=600)
        st.plotly_chart(fig)

        # PCHOME Sunburst Chart
        pchome_sunburst_data = pchome_data.copy()
        pchome_sunburst_data['price_range'] = pd.cut(pchome_sunburst_data['price'], 
                                                     bins=[0, 1000, 5000, 10000, 50000, float('inf')], 
                                                     labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+'])
        fig = px.sunburst(pchome_sunburst_data, path=['price_range', 'name'], values='price',
                          title=f'PCHOME "{keyword}" 價格分佈 (Sunburst 圖)')
        fig.update_layout(height=800)
        st.plotly_chart(fig)
    
    # Combine MOMO and PCHOME data
    combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
    
    st.write("合併的商品數據:", combined_data)
    
    # Data analysis on combined data
    combined_avg_price = combined_data['price'].mean()
    st.write(f"合併後的平均價格: {combined_avg_price:.2f}")

    # Google Trends analysis
    st.subheader("Google趨勢分析")

    # Retry mechanism with exponential backoff
    @retry(wait=wait_exponential(multiplier=1, min=4, max=60), stop=stop_after_attempt(5))
    def fetch_trends_data(pytrend):
        return pytrend.interest_over_time()

    pytrend = TrendReq(hl="zh-TW", tz=-480)
    pytrend.build_payload(
        kw_list=[keyword],
        cat=3,
        timeframe=search_timeframe,
        geo="TW",
        gprop=""
    )

    try:
        trends_df = fetch_trends_data(pytrend)
        trends_df = trends_df.drop(["isPartial"], axis=1)

        # 使用Plotly創建趨勢圖
        fig = px.line(trends_df, x=trends_df.index, y=keyword, 
                      title=f"Google趨勢 - '{keyword}' 的趨勢分析")
        fig.update_traces(mode='lines+markers')
        fig.update_layout(xaxis_title="時間", yaxis_title="興趣指數", height=600)
        st.plotly_chart(fig)

        # 顯示趨勢數據統計
        st.write("趨勢數據統計:")
        st.write(trends_df.describe())

    except Exception as e:
        st.error(f"獲取Google趨勢數據時出錯: {e}")

    end_time = time.time()
    st.write(f"執行時間: {end_time - start_time:.2f} 秒") #