Commit
·
46125ea
1
Parent(s):
74d0899
Update app.py
Browse files
app.py
CHANGED
@@ -121,62 +121,60 @@ def scrape_shopee(nama_barang, num_items):
|
|
121 |
|
122 |
@st.cache_data
|
123 |
def scrape_tokped(nama_barang, num_items):
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
171 |
break
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
except requests.exceptions.HTTPError as e:
|
177 |
-
st.error("HTTP Error :", str(e))
|
178 |
-
break
|
179 |
-
page += 1
|
180 |
return products
|
181 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
182 |
|
|
|
121 |
|
122 |
@st.cache_data
|
123 |
def scrape_tokped(nama_barang, num_items):
|
124 |
+
products = []
|
125 |
+
page = 1
|
126 |
+
query = quote(nama_barang)
|
127 |
+
while len(products) < num_items :
|
128 |
+
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
129 |
+
|
130 |
+
headers = {
|
131 |
+
'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
|
132 |
+
'Accept-Language' : 'en-US, en;q-0.5',
|
133 |
+
'Accept-Encoding' : 'grip, deflate, bt',
|
134 |
+
'Connection': 'keep-alive'
|
135 |
+
}
|
136 |
+
timeout = 10
|
137 |
+
try :
|
138 |
+
response = requests.get(url, headers = headers, timeout = timeout)
|
139 |
+
response.raise_for_status()
|
140 |
|
141 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
142 |
+
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
|
143 |
+
|
144 |
+
for product_info in product_container_list:
|
145 |
+
link = product_info['href']
|
146 |
+
st.write(link)
|
147 |
+
title = product_info.find('div', class_="prd_link-product-name css-3um8ox" ).text.strip()
|
148 |
+
harga = product_info.find('div', class_="prd_link-product-price css-h66vau").text.strip()
|
149 |
+
#Dapatkan terjual
|
150 |
+
terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h").text.strip()
|
151 |
+
terjual = terjual_element if terjual_element else None
|
152 |
+
# Dapatkan rating
|
153 |
+
rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
|
154 |
+
rating = rating_element.text if rating_element else None
|
155 |
+
|
156 |
+
toko = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip").text.strip()
|
157 |
+
asal_product = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip" ).text.strip()
|
158 |
+
|
159 |
+
products.append({
|
160 |
+
'link': link,
|
161 |
+
'produk' : title,
|
162 |
+
'harga' : harga,
|
163 |
+
'terjual' : terjual,
|
164 |
+
'rating' : rating,
|
165 |
+
'toko' : toko,
|
166 |
+
'asal_product' : asal_product,
|
167 |
+
})
|
168 |
+
if len(products) >= num_items:
|
169 |
+
products = products[:num_items]
|
170 |
+
break
|
171 |
+
except requests.exceptions.RequestException as e:
|
172 |
+
st.error("Terjadi kesalahan")
|
173 |
break
|
174 |
+
except requests.exceptions.HTTPError as e:
|
175 |
+
st.error("HTTP Error :", str(e))
|
176 |
+
break
|
177 |
+
page += 1
|
|
|
|
|
|
|
|
|
178 |
return products
|
179 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
180 |
|