File size: 8,621 Bytes
253a096
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
943ec55
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import os
import requests
import pandas as pd
import streamlit as st
import json
import time
from pytrends.request import TrendReq
import plotly.express as px
import plotly.graph_objects as go
from tenacity import retry, wait_exponential, stop_after_attempt

# Set up Streamlit app title
st.title("🐣MOMO 🆚 PCHOME 商品搜索和 Google Trends 分析👁️‍🗨️")

# Get user input for keyword
keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")

# Get date range input for Google Trends
start_date = st.date_input("請選擇開始日期", value=pd.to_datetime("2024-08-01"))
end_date = st.date_input("請選擇結束日期", value=pd.to_datetime("2024-08-11"))
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)

# Format timeframe for Google Trends
search_timeframe = f"{start_date} {end_date}"

# Create a button to start the scraping process
if st.button("開始搜索"):
    start_time = time.time()

    # MOMO scraping
    momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
    momo_headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
    }
    momo_payload = {
        "host": "momoshop",
        "flag": "searchEngine",
        "data": {
            "searchValue": keyword,
            "curPage": str(page_number),
            "priceS": "0",
            "priceE": "9999999",
            "searchType": "1"
        }
    }
    momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
    
    if momo_response.status_code == 200:
        momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
        momo_product_list = []
        for product in momo_data:
            name = product.get('goodsName', '')
            price = product.get('goodsPrice', '')
            price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
            try:
                product_price = float(price_str)
            except ValueError:
                product_price = 0
            momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
        
        momo_df = pd.DataFrame(momo_product_list)
        st.write("MOMO 商品數據:", momo_df)
        
        # MOMO data analysis
        momo_avg_price = momo_df['price'].mean()
        st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
        st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
        st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
        
        # MOMO visualization with Plotly
        fig = px.scatter(momo_df[:70], x='title', y='price', hover_data=['title'],
                         title=f'MOMO 電商網站上 "{keyword}" 的銷售價格',
                         labels={'title': '商品名稱', 'price': '價格'})
        fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(momo_df[:70]))), ticktext=momo_df['title'][:70])
        fig.add_hline(y=momo_avg_price, line_dash="dash", line_color="red", 
                      annotation_text=f"參考價格: {momo_avg_price:.2f}", 
                      annotation_position="bottom right")
        fig.update_layout(height=600)
        st.plotly_chart(fig)

        # MOMO Sunburst Chart
        momo_sunburst_data = momo_df.copy()
        momo_sunburst_data['price_range'] = pd.cut(momo_sunburst_data['price'], 
                                                   bins=[0, 1000, 5000, 10000, 50000, float('inf')], 
                                                   labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+'])
        fig = px.sunburst(momo_sunburst_data, path=['price_range', 'title'], values='price',
                          title=f'MOMO "{keyword}" 價格分佈 (Sunburst 圖)')
        fig.update_layout(height=800)
        st.plotly_chart(fig)
    else:
        st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}")
    
    # PCHOME scraping
    pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
    pchome_data = pd.DataFrame()
    
    for i in range(1, page_number + 1):
        pchome_url = f'{pchome_base_url}{keyword}&page={i}&sort=sale/dc'
        pchome_response = requests.get(pchome_url)
        if pchome_response.status_code == 200:
            pchome_json_data = json.loads(pchome_response.content)
            pchome_df = pd.DataFrame(pchome_json_data['prods'])
            
            # Safely select only available columns
            available_columns = ['name', 'describe', 'price', 'orig']
            selected_columns = [col for col in available_columns if col in pchome_df.columns]
            pchome_df = pchome_df[selected_columns]
            if 'orig' in pchome_df.columns:
                pchome_df = pchome_df.rename(columns={'orig': 'original_price'})
            pchome_df['platform'] = 'PCHOME'  # Add platform identifier
            pchome_df['price'] = pchome_df['price'].astype(float)  # Ensure price is float
            pchome_data = pd.concat([pchome_data, pchome_df])
            time.sleep(1)
        else:
            st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}")
    
    if not pchome_data.empty:
        st.write("PCHOME 商品數據:", pchome_data)
        
        # PCHOME data analysis
        pchome_avg_price = pchome_data['price'].mean()
        st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
        st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
        st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
        
        # PCHOME visualization with Plotly
        fig = px.scatter(pchome_data[:70], x='name', y='price', hover_data=['name'],
                         title=f'PCHOME 電商網站上 "{keyword}" 的銷售價格',
                         labels={'name': '商品名稱', 'price': '價格'})
        fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(pchome_data[:70]))), ticktext=pchome_data['name'][:70])
        fig.add_hline(y=pchome_avg_price, line_dash="dash", line_color="red", 
                      annotation_text=f"參考價格: {pchome_avg_price:.2f}", 
                      annotation_position="bottom right")
        fig.update_layout(height=600)
        st.plotly_chart(fig)

        # PCHOME Sunburst Chart
        pchome_sunburst_data = pchome_data.copy()
        pchome_sunburst_data['price_range'] = pd.cut(pchome_sunburst_data['price'], 
                                                     bins=[0, 1000, 5000, 10000, 50000, float('inf')], 
                                                     labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+'])
        fig = px.sunburst(pchome_sunburst_data, path=['price_range', 'name'], values='price',
                          title=f'PCHOME "{keyword}" 價格分佈 (Sunburst 圖)')
        fig.update_layout(height=800)
        st.plotly_chart(fig)
    
    # Combine MOMO and PCHOME data
    combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
    
    st.write("合併的商品數據:", combined_data)
    
    # Data analysis on combined data
    combined_avg_price = combined_data['price'].mean()
    st.write(f"合併後的平均價格: {combined_avg_price:.2f}")

    # Google Trends analysis
    st.subheader("Google趨勢分析")

    # Retry mechanism with exponential backoff
    @retry(wait=wait_exponential(multiplier=1, min=4, max=60), stop=stop_after_attempt(5))
    def fetch_trends_data(pytrend):
        return pytrend.interest_over_time()

    pytrend = TrendReq(hl="zh-TW", tz=-480)
    pytrend.build_payload(
        kw_list=[keyword],
        cat=3,
        timeframe=search_timeframe,
        geo="TW",
        gprop=""
    )

    try:
        trends_df = fetch_trends_data(pytrend)
        trends_df = trends_df.drop(["isPartial"], axis=1)

        # 使用Plotly創建趨勢圖
        fig = px.line(trends_df, x=trends_df.index, y=keyword, 
                      title=f"Google趨勢 - '{keyword}' 的趨勢分析")
        fig.update_traces(mode='lines+markers')
        fig.update_layout(xaxis_title="時間", yaxis_title="興趣指數", height=600)
        st.plotly_chart(fig)

        # 顯示趨勢數據統計
        st.write("趨勢數據統計:")
        st.write(trends_df.describe())

    except Exception as e:
        st.error(f"獲取Google趨勢數據時出錯: {e}")

    end_time = time.time()
    st.write(f"執行時間: {end_time - start_time:.2f} 秒") #