Commit
·
7602f66
1
Parent(s):
402be75
Update app.py
Browse files
app.py
CHANGED
@@ -122,14 +122,88 @@ def scrape_tokped(nama_barang, num_items):
|
|
122 |
products = products[:num_items]
|
123 |
break
|
124 |
|
125 |
-
except requests.exceptions.RequestException as e:
|
126 |
-
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
127 |
-
break
|
128 |
-
except requests.exceptions.HTTPError as e:
|
129 |
-
st.error(f"HTTP Error: {e}")
|
130 |
-
break
|
131 |
except Exception as e:
|
132 |
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
break
|
134 |
page += 1
|
135 |
return products
|
|
|
122 |
products = products[:num_items]
|
123 |
break
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
except Exception as e:
|
126 |
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
127 |
+
st.write("Jalankan script ini di IDE/colab.research.google.com Anda :")
|
128 |
+
code = '''!pip install beautifulsoup4
|
129 |
+
!pip install requests
|
130 |
+
!pip install streamlit
|
131 |
+
from bs4 import BeautifulSoup
|
132 |
+
import requests
|
133 |
+
from urllib.parse import quote
|
134 |
+
import pandas as pd
|
135 |
+
import streamlit as st
|
136 |
+
def scrape_tokped(nama_barang, num_items):
|
137 |
+
products = []
|
138 |
+
page = 1
|
139 |
+
query = quote(nama_barang)
|
140 |
+
while len(products) < num_items :
|
141 |
+
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
142 |
+
|
143 |
+
headers = {
|
144 |
+
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
145 |
+
}
|
146 |
+
timeout = 10
|
147 |
+
try :
|
148 |
+
response = requests.get(url, headers = headers, timeout = timeout)
|
149 |
+
response.raise_for_status()
|
150 |
+
|
151 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
152 |
+
|
153 |
+
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
|
154 |
+
|
155 |
+
for product_info in product_container_list:
|
156 |
+
link = product_info['href']
|
157 |
+
title_element = product_info.find('div', class_="prd_link-product-name css-3um8ox")
|
158 |
+
title = title_element.text.strip() if title_element else None
|
159 |
+
|
160 |
+
harga_element = product_info.find('div', class_="prd_link-product-price css-h66vau")
|
161 |
+
harga = harga_element.text.strip() if harga_element else None
|
162 |
+
|
163 |
+
terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h")
|
164 |
+
terjual = terjual_element.text if terjual_element else None
|
165 |
+
|
166 |
+
rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
|
167 |
+
rating = rating_element.text if rating_element else None
|
168 |
+
|
169 |
+
toko_element = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip")
|
170 |
+
toko = toko_element.text.strip() if toko_element else None
|
171 |
+
|
172 |
+
asal_product_element = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip")
|
173 |
+
asal_product = asal_product_element.text.strip() if asal_product_element else None
|
174 |
+
|
175 |
+
products.append({
|
176 |
+
'link': link,
|
177 |
+
'produk' : title,
|
178 |
+
'harga' : harga,
|
179 |
+
'terjual' : terjual,
|
180 |
+
'rating' : rating,
|
181 |
+
'toko' : toko,
|
182 |
+
'asal_product' : asal_product,
|
183 |
+
})
|
184 |
+
if len(products) >= num_items:
|
185 |
+
products = products[:num_items]
|
186 |
+
break
|
187 |
+
|
188 |
+
except requests.exceptions.RequestException as e:
|
189 |
+
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
190 |
+
break
|
191 |
+
except requests.exceptions.HTTPError as e:
|
192 |
+
logging.error(f"HTTP Error: {e}")
|
193 |
+
break
|
194 |
+
except Exception as e:
|
195 |
+
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
196 |
+
break
|
197 |
+
page += 1
|
198 |
+
return products)
|
199 |
+
|
200 |
+
nama_barang = input("Masukkan nama barang: ")
|
201 |
+
num_items = int(input("Masukkan jumlah barang yang ingin diambil: "))
|
202 |
+
|
203 |
+
# Melakukan scraping menggunakan fungsi scrape_tokped
|
204 |
+
hasil = scrape_tokped(nama_barang, num_items)
|
205 |
+
pd.DataFrame(hasil)'''
|
206 |
+
st.code(code, language='python')
|
207 |
break
|
208 |
page += 1
|
209 |
return products
|