Commit
·
a6136fe
1
Parent(s):
c3e21c7
Update app.py
Browse files
app.py
CHANGED
@@ -109,10 +109,16 @@ def scrape_shopee(nama_barang, num_items):
|
|
109 |
|
110 |
except requests.exceptions.RequestException as e:
|
111 |
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
|
|
|
|
112 |
except requests.exceptions.HTTPError as e:
|
113 |
logging.error(f"HTTP Error: {e}")
|
|
|
|
|
114 |
except Exception as e:
|
115 |
-
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
|
|
|
|
116 |
except WebDriverException as e:
|
117 |
st.error(f"An error occurred: {e}")
|
118 |
finally:
|
@@ -182,18 +188,100 @@ def scrape_tokped(nama_barang, num_items):
|
|
182 |
|
183 |
except requests.exceptions.RequestException as e:
|
184 |
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
185 |
-
st.
|
186 |
break
|
187 |
except requests.exceptions.HTTPError as e:
|
188 |
logging.error(f"HTTP Error: {e}")
|
189 |
-
st.
|
190 |
break
|
191 |
except Exception as e:
|
192 |
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
193 |
-
st.
|
194 |
break
|
195 |
page += 1
|
196 |
return products
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
198 |
|
199 |
# Streamlit UI
|
@@ -201,7 +289,7 @@ st.title("Scraping E-Commerce")
|
|
201 |
|
202 |
with st.expander("Settings :"):
|
203 |
# Pilihan untuk memilih situs web
|
204 |
-
selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id", "tokopedia.com"])
|
205 |
|
206 |
nama_barang = st.text_input("Masukkan Nama Barang :")
|
207 |
num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
@@ -224,16 +312,16 @@ if selected_site == "klikindomaret.com":
|
|
224 |
scraping_done = True # Set scraping_done menjadi True
|
225 |
|
226 |
if selected_site == "shopee.co.id":
|
227 |
-
st.error("
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
|
236 |
-
if selected_site == "tokopedia.com":
|
237 |
#st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
|
238 |
if st.button("Mulai Scraping"):
|
239 |
if not nama_barang:
|
|
|
109 |
|
110 |
except requests.exceptions.RequestException as e:
|
111 |
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
112 |
+
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
113 |
+
break
|
114 |
except requests.exceptions.HTTPError as e:
|
115 |
logging.error(f"HTTP Error: {e}")
|
116 |
+
st.error(f"HTTP Error: {e}")
|
117 |
+
break
|
118 |
except Exception as e:
|
119 |
+
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
120 |
+
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
121 |
+
break
|
122 |
except WebDriverException as e:
|
123 |
st.error(f"An error occurred: {e}")
|
124 |
finally:
|
|
|
188 |
|
189 |
except requests.exceptions.RequestException as e:
|
190 |
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
191 |
+
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
192 |
break
|
193 |
except requests.exceptions.HTTPError as e:
|
194 |
logging.error(f"HTTP Error: {e}")
|
195 |
+
st.error(f"HTTP Error: {e}")
|
196 |
break
|
197 |
except Exception as e:
|
198 |
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
199 |
+
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
200 |
break
|
201 |
page += 1
|
202 |
return products
|
203 |
+
|
204 |
+
@st.cache_data
|
205 |
+
def scrape_tokped_with_selenium(nama_barang, num_items):
|
206 |
+
products = []
|
207 |
+
page = 1
|
208 |
+
query = quote(nama_barang)
|
209 |
+
driver = webdriver.Chrome(options = options)
|
210 |
+
|
211 |
+
|
212 |
+
options = webdriver.ChromeOptions()
|
213 |
+
options.add_argument('--no-sandbox')
|
214 |
+
options.add_argument('--headless')
|
215 |
+
options.add_argument('--disable-notifications')
|
216 |
+
options.add_argument('--disable-infobars')
|
217 |
+
options.add_argument('--disable-dev-shm-usage')
|
218 |
+
|
219 |
+
|
220 |
+
try :
|
221 |
+
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
222 |
+
driver.get(url)
|
223 |
+
time.sleep(10)
|
224 |
+
|
225 |
+
html = driver.page_source
|
226 |
+
soup = BeautifulSoup(html, "html.parser")
|
227 |
+
# Dapatkan HTML dari elemen
|
228 |
+
#html = html_element.get_attribute("innerHTML")
|
229 |
+
#html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
|
230 |
+
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
|
231 |
+
|
232 |
+
for product_info in product_container_list:
|
233 |
+
link = product_info['href']
|
234 |
+
st.write(link)
|
235 |
+
title_element = product_info.find('div', class_="prd_link-product-name css-3um8ox")
|
236 |
+
title = title_element.text.strip() if title_element else None
|
237 |
+
|
238 |
+
harga_element = product_info.find('div', class_="prd_link-product-price css-h66vau")
|
239 |
+
harga = harga_element.text.strip() if harga_element else None
|
240 |
+
|
241 |
+
terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h")
|
242 |
+
terjual = terjual_element.text if terjual_element else None
|
243 |
+
|
244 |
+
rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
|
245 |
+
rating = rating_element.text if rating_element else None
|
246 |
+
|
247 |
+
toko_element = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip")
|
248 |
+
toko = toko_element.text.strip() if toko_element else None
|
249 |
+
|
250 |
+
asal_product_element = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip")
|
251 |
+
asal_product = asal_product_element.text.strip() if asal_product_element else None
|
252 |
+
|
253 |
+
products.append({
|
254 |
+
'link': link,
|
255 |
+
'produk' : title,
|
256 |
+
'harga' : harga,
|
257 |
+
'terjual' : terjual,
|
258 |
+
'rating' : rating,
|
259 |
+
'toko' : toko,
|
260 |
+
'asal_product' : asal_product,
|
261 |
+
})
|
262 |
+
if len(products) >= num_items:
|
263 |
+
products = products[:num_items]
|
264 |
+
break
|
265 |
+
|
266 |
+
except requests.exceptions.RequestException as e:
|
267 |
+
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
268 |
+
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
269 |
+
break
|
270 |
+
except requests.exceptions.HTTPError as e:
|
271 |
+
logging.error(f"HTTP Error: {e}")
|
272 |
+
st.error(f"HTTP Error: {e}")
|
273 |
+
break
|
274 |
+
except Exception as e:
|
275 |
+
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
276 |
+
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
277 |
+
break
|
278 |
+
except WebDriverException as e:
|
279 |
+
st.error(f"An error occurred: {e}")
|
280 |
+
finally:
|
281 |
+
if driver:
|
282 |
+
driver.quit()
|
283 |
+
|
284 |
+
return products
|
285 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
286 |
|
287 |
# Streamlit UI
|
|
|
289 |
|
290 |
with st.expander("Settings :"):
|
291 |
# Pilihan untuk memilih situs web
|
292 |
+
selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id", "tokopedia.com", "tokopedia.com(selenium)"])
|
293 |
|
294 |
nama_barang = st.text_input("Masukkan Nama Barang :")
|
295 |
num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
|
|
312 |
scraping_done = True # Set scraping_done menjadi True
|
313 |
|
314 |
if selected_site == "shopee.co.id":
|
315 |
+
st.error("Jika error, karena sedang dalam pengembangan. Silahkan pilih situs yang lain")
|
316 |
+
if st.button("Mulai Scraping"):
|
317 |
+
if not nama_barang:
|
318 |
+
st.error("Mohon isi Nama Barang.")
|
319 |
+
else:
|
320 |
+
scraped_products = scrape_shopee(nama_barang, num_items)
|
321 |
+
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
|
322 |
+
scraping_done = True # Set scraping_done menjadi True
|
323 |
|
324 |
+
if selected_site == "tokopedia.com"|"tokopedia.com(selenium)":
|
325 |
#st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
|
326 |
if st.button("Mulai Scraping"):
|
327 |
if not nama_barang:
|