naufalnashif commited on
Commit
98dbc9e
·
1 Parent(s): f585df9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -219,15 +219,18 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
219
  while len(products) < num_items :
220
  try :
221
  url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
222
- headers = {
223
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
224
- 'Accept-Language': 'en-US,en;q=0.5',
225
- 'Accept-Encoding': 'gzip, deflate, br',
226
- 'Connection': 'keep-alive'
227
- }
228
- driver.get(url, headers = headers)
229
- # Tunggu hingga elemen tertentu muncul (gunakan sesuai kebutuhan)
230
- WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "pcv3__info-content css-gwkf0u")))
 
 
 
231
 
232
  # Dapatkan sumber halaman setelah eksekusi JavaScript
233
  html = driver.page_source
@@ -238,7 +241,6 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
238
  # Cari semua elemen yang sesuai
239
  product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href=True)
240
 
241
-
242
  for product_info in product_container_list:
243
  link = product_info['href']
244
  st.write(link)
@@ -333,7 +335,7 @@ if selected_site == "shopee.co.id":
333
  scraping_done = True # Set scraping_done menjadi True
334
 
335
  if selected_site in ["tokopedia.com", "tokopedia.com(selenium)"]:
336
- st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
337
  if st.button("Mulai Scraping"):
338
  if not nama_barang:
339
  st.error("Mohon isi Nama Barang.")
 
219
  while len(products) < num_items :
220
  try :
221
  url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
222
+
223
+ driver.get(url)
224
+ # Eksekusi JavaScript untuk mengatur header
225
+ driver.execute_script(
226
+ """
227
+ var xhr = new XMLHttpRequest();
228
+ xhr.open('GET', arguments[0], false);
229
+ xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36');
230
+ xhr.send(null);
231
+ """
232
+ , url
233
+ )
234
 
235
  # Dapatkan sumber halaman setelah eksekusi JavaScript
236
  html = driver.page_source
 
241
  # Cari semua elemen yang sesuai
242
  product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href=True)
243
 
 
244
  for product_info in product_container_list:
245
  link = product_info['href']
246
  st.write(link)
 
335
  scraping_done = True # Set scraping_done menjadi True
336
 
337
  if selected_site in ["tokopedia.com", "tokopedia.com(selenium)"]:
338
+ st.error("Jika mengalami error karena sedang dalam pengembangan. Silahkan pilih situs yang lain")
339
  if st.button("Mulai Scraping"):
340
  if not nama_barang:
341
  st.error("Mohon isi Nama Barang.")