Commit
·
98dbc9e
1
Parent(s):
f585df9
Update app.py
Browse files
app.py
CHANGED
@@ -219,15 +219,18 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
|
|
219 |
while len(products) < num_items :
|
220 |
try :
|
221 |
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
231 |
|
232 |
# Dapatkan sumber halaman setelah eksekusi JavaScript
|
233 |
html = driver.page_source
|
@@ -238,7 +241,6 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
|
|
238 |
# Cari semua elemen yang sesuai
|
239 |
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href=True)
|
240 |
|
241 |
-
|
242 |
for product_info in product_container_list:
|
243 |
link = product_info['href']
|
244 |
st.write(link)
|
@@ -333,7 +335,7 @@ if selected_site == "shopee.co.id":
|
|
333 |
scraping_done = True # Set scraping_done menjadi True
|
334 |
|
335 |
if selected_site in ["tokopedia.com", "tokopedia.com(selenium)"]:
|
336 |
-
st.error("
|
337 |
if st.button("Mulai Scraping"):
|
338 |
if not nama_barang:
|
339 |
st.error("Mohon isi Nama Barang.")
|
|
|
219 |
while len(products) < num_items :
|
220 |
try :
|
221 |
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
222 |
+
|
223 |
+
driver.get(url)
|
224 |
+
# Eksekusi JavaScript untuk mengatur header
|
225 |
+
driver.execute_script(
|
226 |
+
"""
|
227 |
+
var xhr = new XMLHttpRequest();
|
228 |
+
xhr.open('GET', arguments[0], false);
|
229 |
+
xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36');
|
230 |
+
xhr.send(null);
|
231 |
+
"""
|
232 |
+
, url
|
233 |
+
)
|
234 |
|
235 |
# Dapatkan sumber halaman setelah eksekusi JavaScript
|
236 |
html = driver.page_source
|
|
|
241 |
# Cari semua elemen yang sesuai
|
242 |
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href=True)
|
243 |
|
|
|
244 |
for product_info in product_container_list:
|
245 |
link = product_info['href']
|
246 |
st.write(link)
|
|
|
335 |
scraping_done = True # Set scraping_done menjadi True
|
336 |
|
337 |
if selected_site in ["tokopedia.com", "tokopedia.com(selenium)"]:
|
338 |
+
st.error("Jika mengalami error karena sedang dalam pengembangan. Silahkan pilih situs yang lain")
|
339 |
if st.button("Mulai Scraping"):
|
340 |
if not nama_barang:
|
341 |
st.error("Mohon isi Nama Barang.")
|