File size: 6,409 Bytes
20ea5d1
 
 
 
 
 
 
 
 
 
 
68a4670
20ea5d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39d6306
20ea5d1
 
 
 
 
 
 
39d6306
 
 
 
 
 
 
 
 
 
20ea5d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c336718
078863a
 
 
 
 
 
20ea5d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c336718
20ea5d1
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import requests
import pandas as pd
import matplotlib as mpl
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import streamlit as st
import json
import time

# Set up Streamlit app title
st.title("🐣MOMO 🆚 PCHOME 商品搜索和價格分析👁️‍🗨️")

# Get user input
search_keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)

# Create a button to start the scraping process
if st.button("開始搜索"):
    start_time = time.time()
    
    # MOMO scraping
    momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
    momo_headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
    }
    momo_payload = {
        "host": "momoshop",
        "flag": "searchEngine",
        "data": {
            "searchValue": search_keyword,
            "curPage": str(page_number),
            "priceS": "0",
            "priceE": "9999999",
            "searchType": "1"
        }
    }
    momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
    
    if momo_response.status_code == 200:
        momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
        momo_product_list = []
        for product in momo_data:
            name = product.get('goodsName', '')
            price = product.get('goodsPrice', '')
            price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
            try:
                product_price = float(price_str)
            except ValueError:
                product_price = 0
            momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
        
        momo_df = pd.DataFrame(momo_product_list)
        st.write("MOMO 商品數據:", momo_df)
        
        # MOMO data analysis
        momo_avg_price = momo_df['price'].mean()
        st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
        st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
        st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
        
        # MOMO visualization - Unified with PCHOME
        font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
        font_response = requests.get(font_url)
        with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
            font_file.write(font_response.content)
        fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
        mpl.rc('font', family='Taipei Sans TC Beta')
        
        fig, ax = plt.subplots(figsize=(15, 8))
        ax.plot(momo_df.index[:70], momo_df['price'][:70], 'o', color='skyblue', markersize=8)
        ax.set_title(f'MOMO 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
        ax.axhline(y=momo_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {momo_avg_price:.2f}')
        ax.set_xlabel('商品索引', fontsize=14)
        ax.set_ylabel('價格', fontsize=14)
        ax.tick_params(axis='x', rotation=45, labelsize=12)
        ax.tick_params(axis='y', labelsize=12)
        ax.legend(fontsize=12, loc='upper left')
        ax.grid(axis='y', linestyle='--', alpha=0.7)
        plt.tight_layout()
        st.pyplot(fig)
    else:
        st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}")
    
    # PCHOME scraping
    pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
    pchome_data = pd.DataFrame()
    
    for i in range(1, page_number + 1):
        pchome_url = f'{pchome_base_url}{search_keyword}&page={i}&sort=sale/dc'
        pchome_response = requests.get(pchome_url)
        if pchome_response.status_code == 200:
            pchome_json_data = json.loads(pchome_response.content)
            pchome_df = pd.DataFrame(pchome_json_data['prods'])
            
            # Safely select only available columns
            available_columns = ['name', 'describe', 'price', 'orig']
            selected_columns = [col for col in available_columns if col in pchome_df.columns]
            pchome_df = pchome_df[selected_columns]
            if 'orig' in pchome_df.columns:
                pchome_df = pchome_df.rename(columns={'orig': 'original_price'})
            pchome_df['platform'] = 'PCHOME'  # Add platform identifier
            pchome_data = pd.concat([pchome_data, pchome_df])
            time.sleep(1)
        else:
            st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}")
    
    if not pchome_data.empty:
        st.write("PCHOME 商品數據:", pchome_data)
        
        # PCHOME data analysis
        pchome_avg_price = pchome_data['price'].mean()
        st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
        st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
        st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
        
        # PCHOME visualization
        fig, ax = plt.subplots(figsize=(15, 8))
        ax.plot(pchome_data.index[:70], pchome_data['price'][:70], 'o', color='skyblue', markersize=8)
        ax.set_title(f'PCHOME 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
        ax.axhline(y=pchome_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {pchome_avg_price:.2f}')
        ax.set_xlabel('商品索引', fontsize=14)
        ax.set_ylabel('價格', fontsize=14)
        ax.tick_params(axis='x', rotation=45, labelsize=12)
        ax.tick_params(axis='y', labelsize=12)
        ax.legend(fontsize=12, loc='upper left')
        ax.grid(axis='y', linestyle='--', alpha=0.7)
        plt.tight_layout()
        st.pyplot(fig)
    
    # Combine MOMO and PCHOME data
    combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
    
    # Add a button to download the combined data as CSV
    csv = combined_data.to_csv(index=False, encoding='utf-8-sig').encode('utf-8-sig')
    st.download_button(
        label="下載CSV檔案",
        data=csv,
        file_name=f'{search_keyword}_combined_data.csv',
        mime='text/csv'
    )
    
    end_time = time.time()
    st.write(f"Execution time: {end_time - start_time:.2f} seconds")