Commit
·
29ad273
1
Parent(s):
897f2a4
Update app.py
Browse files
app.py
CHANGED
@@ -147,7 +147,7 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
|
|
147 |
|
148 |
options = webdriver.ChromeOptions()
|
149 |
options.add_argument('--no-sandbox')
|
150 |
-
|
151 |
options.add_argument('--disable-notifications')
|
152 |
options.add_argument('--disable-infobars')
|
153 |
options.add_argument('--disable-dev-shm-usage')
|
@@ -160,18 +160,25 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
|
|
160 |
|
161 |
driver.get(url)
|
162 |
# Eksekusi JavaScript untuk mengatur header
|
163 |
-
driver.execute_script(
|
164 |
-
"""
|
165 |
-
var xhr = new XMLHttpRequest();
|
166 |
-
xhr.open('GET', arguments[0], false);
|
167 |
-
xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36');
|
168 |
-
xhr.send(null);
|
169 |
-
"""
|
170 |
-
|
171 |
-
)
|
172 |
|
173 |
# Dapatkan sumber halaman setelah eksekusi JavaScript
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
# Gunakan BeautifulSoup untuk melakukan parsing HTML
|
177 |
soup = BeautifulSoup(html, "html.parser")
|
|
|
147 |
|
148 |
options = webdriver.ChromeOptions()
|
149 |
options.add_argument('--no-sandbox')
|
150 |
+
options.add_argument('--headless')
|
151 |
options.add_argument('--disable-notifications')
|
152 |
options.add_argument('--disable-infobars')
|
153 |
options.add_argument('--disable-dev-shm-usage')
|
|
|
160 |
|
161 |
driver.get(url)
|
162 |
# Eksekusi JavaScript untuk mengatur header
|
163 |
+
#driver.execute_script(
|
164 |
+
#"""
|
165 |
+
#var xhr = new XMLHttpRequest();
|
166 |
+
#xhr.open('GET', arguments[0], false);
|
167 |
+
#xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36');
|
168 |
+
#xhr.send(null);
|
169 |
+
#"""
|
170 |
+
#, url
|
171 |
+
#)
|
172 |
|
173 |
# Dapatkan sumber halaman setelah eksekusi JavaScript
|
174 |
+
# Tunggu hingga halaman selesai dimuat (opsional, tergantung kebutuhan)
|
175 |
+
driver.implicitly_wait(10) # Tunggu maksimal 10 detik
|
176 |
+
|
177 |
+
# Temukan elemen kontainer produk berdasarkan XPath atau CSS selector
|
178 |
+
# Di sini, saya menggunakan XPath sebagai contoh:
|
179 |
+
product_container_xpath = "//div[@class='css-llwpbs']" # Ganti dengan XPath yang sesuai
|
180 |
+
html = driver.find_element(By.XPATH, product_container_xpath)
|
181 |
+
html = driver.find
|
182 |
|
183 |
# Gunakan BeautifulSoup untuk melakukan parsing HTML
|
184 |
soup = BeautifulSoup(html, "html.parser")
|