File size: 6,409 Bytes
20ea5d1 68a4670 20ea5d1 39d6306 20ea5d1 39d6306 20ea5d1 c336718 078863a 20ea5d1 c336718 20ea5d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import requests
import pandas as pd
import matplotlib as mpl
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import streamlit as st
import json
import time
# Set up Streamlit app title
st.title("🐣MOMO 🆚 PCHOME 商品搜索和價格分析👁️🗨️")
# Get user input
search_keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)
# Create a button to start the scraping process
if st.button("開始搜索"):
start_time = time.time()
# MOMO scraping
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
momo_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
}
momo_payload = {
"host": "momoshop",
"flag": "searchEngine",
"data": {
"searchValue": search_keyword,
"curPage": str(page_number),
"priceS": "0",
"priceE": "9999999",
"searchType": "1"
}
}
momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
if momo_response.status_code == 200:
momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
momo_product_list = []
for product in momo_data:
name = product.get('goodsName', '')
price = product.get('goodsPrice', '')
price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
try:
product_price = float(price_str)
except ValueError:
product_price = 0
momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
momo_df = pd.DataFrame(momo_product_list)
st.write("MOMO 商品數據:", momo_df)
# MOMO data analysis
momo_avg_price = momo_df['price'].mean()
st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
# MOMO visualization - Unified with PCHOME
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
font_response = requests.get(font_url)
with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
font_file.write(font_response.content)
fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
mpl.rc('font', family='Taipei Sans TC Beta')
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(momo_df.index[:70], momo_df['price'][:70], 'o', color='skyblue', markersize=8)
ax.set_title(f'MOMO 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
ax.axhline(y=momo_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {momo_avg_price:.2f}')
ax.set_xlabel('商品索引', fontsize=14)
ax.set_ylabel('價格', fontsize=14)
ax.tick_params(axis='x', rotation=45, labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.legend(fontsize=12, loc='upper left')
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
st.pyplot(fig)
else:
st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}")
# PCHOME scraping
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
pchome_data = pd.DataFrame()
for i in range(1, page_number + 1):
pchome_url = f'{pchome_base_url}{search_keyword}&page={i}&sort=sale/dc'
pchome_response = requests.get(pchome_url)
if pchome_response.status_code == 200:
pchome_json_data = json.loads(pchome_response.content)
pchome_df = pd.DataFrame(pchome_json_data['prods'])
# Safely select only available columns
available_columns = ['name', 'describe', 'price', 'orig']
selected_columns = [col for col in available_columns if col in pchome_df.columns]
pchome_df = pchome_df[selected_columns]
if 'orig' in pchome_df.columns:
pchome_df = pchome_df.rename(columns={'orig': 'original_price'})
pchome_df['platform'] = 'PCHOME' # Add platform identifier
pchome_data = pd.concat([pchome_data, pchome_df])
time.sleep(1)
else:
st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}")
if not pchome_data.empty:
st.write("PCHOME 商品數據:", pchome_data)
# PCHOME data analysis
pchome_avg_price = pchome_data['price'].mean()
st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
# PCHOME visualization
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(pchome_data.index[:70], pchome_data['price'][:70], 'o', color='skyblue', markersize=8)
ax.set_title(f'PCHOME 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
ax.axhline(y=pchome_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {pchome_avg_price:.2f}')
ax.set_xlabel('商品索引', fontsize=14)
ax.set_ylabel('價格', fontsize=14)
ax.tick_params(axis='x', rotation=45, labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.legend(fontsize=12, loc='upper left')
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
st.pyplot(fig)
# Combine MOMO and PCHOME data
combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
# Add a button to download the combined data as CSV
csv = combined_data.to_csv(index=False, encoding='utf-8-sig').encode('utf-8-sig')
st.download_button(
label="下載CSV檔案",
data=csv,
file_name=f'{search_keyword}_combined_data.csv',
mime='text/csv'
)
end_time = time.time()
st.write(f"Execution time: {end_time - start_time:.2f} seconds")
|