yusufenes commited on
Commit
3b250a9
·
verified ·
1 Parent(s): 12116ca

Update get_real_home_listing.py

Browse files
Files changed (1) hide show
  1. get_real_home_listing.py +138 -111
get_real_home_listing.py CHANGED
@@ -1,111 +1,138 @@
1
- from selenium import webdriver
2
- from selenium.webdriver.chrome.service import Service
3
- from selenium.webdriver.chrome.options import Options
4
- from selenium.webdriver.common.by import By
5
- from selenium.webdriver.support.ui import WebDriverWait
6
- from selenium.webdriver.support import expected_conditions as EC
7
- from selenium.common.exceptions import NoSuchElementException
8
- from selenium.webdriver.chrome.service import Service
9
- from webdriver_manager.chrome import ChromeDriverManager
10
- import json
11
- import pandas as pd
12
- import requests
13
- from bs4 import BeautifulSoup
14
-
15
- def format_price(price):
16
- price = str(price)[0:-2]
17
- clean_price = price.replace('.', '')
18
- return int(clean_price)
19
-
20
-
21
- def get_home_listings(selected_il, price_value):
22
- chrome_options = Options()
23
- chrome_options.add_argument('--headless')
24
- chrome_options.add_argument('--no-sandbox')
25
- chrome_options.add_argument('--disable-dev-shm-usage')
26
- chrome_options.add_argument('--disable-gpu')
27
- chrome_options.add_argument('--remote-debugging-port=9222')
28
- chrome_options.binary_location = "/usr/bin/chromium"
29
-
30
- service = Service(ChromeDriverManager().install())
31
- driver = webdriver.Chrome(service=service, options=chrome_options)
32
-
33
- driver.get('https://www.emlakjet.com/')
34
-
35
- search_input = WebDriverWait(driver, 10).until(
36
- EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[2]/div/div/div/input'))
37
- )
38
- search_input.send_keys(f'{selected_il}')
39
- dropdown_button = driver.find_element(By.XPATH, '//*[@id="headlessui-listbox-button-:rh:"]')
40
- dropdown_button.click()
41
-
42
- # burada tahmin edilen değerin 500.000 TL altı ve üstü aralığında arama yapılacak
43
- price_value = format_price(price_value)
44
- lower_bound = price_value - 500000
45
- upper_bound = price_value + 500000
46
-
47
- first_input = WebDriverWait(driver, 10).until(
48
- EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[1]/div[1]/div/div[1]/input'))
49
- )
50
- first_input.clear()
51
- first_input.send_keys(str(lower_bound))
52
-
53
- second_input = WebDriverWait(driver, 10).until(
54
- EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[2]/div[1]/div/div[1]/input'))
55
- )
56
- second_input.clear()
57
- second_input.send_keys(str(upper_bound))
58
-
59
- find_button = WebDriverWait(driver, 10).until(
60
- EC.element_to_be_clickable((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[5]/div/button'))
61
- )
62
- find_button.click()
63
-
64
- i = 1
65
- data = []
66
-
67
- while i<=10:
68
- WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')))
69
- link = driver.find_element(By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')
70
- driver.get(link.get_attribute('href'))
71
- detail_url = driver.current_url
72
- WebDriverWait(driver, 3).until(
73
- EC.presence_of_element_located((By.ID, "ilan-hakkinda"))
74
- )
75
-
76
- try:
77
- ul = driver.find_element(By.XPATH, '//*[@id="ilan-hakkinda"]/div/div/ul')
78
- list_items = ul.find_elements(By.TAG_NAME, 'li')
79
-
80
- details = {}
81
- for item in list_items:
82
- try:
83
- key = item.find_element(By.CLASS_NAME, 'styles_key__VqMhC').text
84
- value = item.find_element(By.CLASS_NAME, 'styles_value__3QmL3').text
85
- details[key] = value
86
- except NoSuchElementException:
87
- continue
88
-
89
- title = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[1]/div/h1').text
90
- resim_url = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[2]/div[2]/img').get_attribute('src')
91
- fiyat = driver.find_element(By.XPATH, '//*[@id="genel-bakis"]/div[1]/div[1]/div[1]/div/span').text
92
- fiyat = int(fiyat.replace('.','').replace('TL',''))
93
- details['url'] = detail_url
94
- details['title'] = title
95
- details['resim_url'] = resim_url
96
- details['price'] = fiyat
97
-
98
-
99
-
100
- data.append(details)
101
-
102
- except NoSuchElementException as e:
103
- print(f"Element not found: {e}")
104
- except Exception as e:
105
- print(f"An error occurred: {e}")
106
-
107
- driver.execute_script("window.history.go(-1)")
108
- i += 1
109
-
110
- driver.quit()
111
- return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium import webdriver
2
+ from selenium.webdriver.chrome.service import Service
3
+ from selenium.webdriver.chrome.options import Options
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.support.ui import WebDriverWait
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.common.exceptions import NoSuchElementException
8
+ from selenium.webdriver.chrome.service import Service
9
+ from webdriver_manager.chrome import ChromeDriverManager
10
+ import json
11
+ import pandas as pd
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+
15
+ def install_chrome():
16
+ if not os.path.exists("/usr/bin/google-chrome"):
17
+ print("Installing Google Chrome...")
18
+ subprocess.run(
19
+ "wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add - && "
20
+ "echo 'deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main' | sudo tee /etc/apt/sources.list.d/google-chrome.list && "
21
+ "sudo apt update && sudo apt install -y google-chrome-stable",
22
+ shell=True,
23
+ check=True,
24
+ )
25
+
26
+ def install_chromedriver():
27
+ if not os.path.exists("/usr/local/bin/chromedriver"):
28
+ print("Installing ChromeDriver...")
29
+ subprocess.run(
30
+ "wget -O /usr/local/bin/chromedriver https://storage.googleapis.com/chrome-for-testing-public/133.0.6943.98/linux64/chromedriver-linux64.zip && "
31
+ "unzip /usr/local/bin/chromedriver -d /usr/local/bin/ && "
32
+ "chmod +x /usr/local/bin/chromedriver",
33
+ shell=True,
34
+ check=True,
35
+ )
36
+
37
+
38
+ def format_price(price):
39
+ price = str(price)[0:-2]
40
+ clean_price = price.replace('.', '')
41
+ return int(clean_price)
42
+
43
+
44
+ def get_home_listings(selected_il, price_value):
45
+
46
+ install_chrome()
47
+ install_chromedriver()
48
+ chrome_options = Options()
49
+ chrome_options.add_argument('--headless')
50
+ chrome_options.add_argument('--no-sandbox')
51
+ chrome_options.add_argument('--disable-dev-shm-usage')
52
+ chrome_options.add_argument('--disable-gpu')
53
+ chrome_options.add_argument('--remote-debugging-port=9222')
54
+
55
+
56
+
57
+
58
+ service = Service("/usr/local/bin/chromedriver")
59
+ driver = webdriver.Chrome(service=service, options=chrome_options)
60
+ driver.get('https://www.emlakjet.com/')
61
+
62
+ search_input = WebDriverWait(driver, 10).until(
63
+ EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[2]/div/div/div/input'))
64
+ )
65
+ search_input.send_keys(f'{selected_il}')
66
+ dropdown_button = driver.find_element(By.XPATH, '//*[@id="headlessui-listbox-button-:rh:"]')
67
+ dropdown_button.click()
68
+
69
+ # burada tahmin edilen değerin 500.000 TL altı ve üstü aralığında arama yapılacak
70
+ price_value = format_price(price_value)
71
+ lower_bound = price_value - 500000
72
+ upper_bound = price_value + 500000
73
+
74
+ first_input = WebDriverWait(driver, 10).until(
75
+ EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[1]/div[1]/div/div[1]/input'))
76
+ )
77
+ first_input.clear()
78
+ first_input.send_keys(str(lower_bound))
79
+
80
+ second_input = WebDriverWait(driver, 10).until(
81
+ EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[2]/div[1]/div/div[1]/input'))
82
+ )
83
+ second_input.clear()
84
+ second_input.send_keys(str(upper_bound))
85
+
86
+ find_button = WebDriverWait(driver, 10).until(
87
+ EC.element_to_be_clickable((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[5]/div/button'))
88
+ )
89
+ find_button.click()
90
+
91
+ i = 1
92
+ data = []
93
+
94
+ while i<=10:
95
+ WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')))
96
+ link = driver.find_element(By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')
97
+ driver.get(link.get_attribute('href'))
98
+ detail_url = driver.current_url
99
+ WebDriverWait(driver, 3).until(
100
+ EC.presence_of_element_located((By.ID, "ilan-hakkinda"))
101
+ )
102
+
103
+ try:
104
+ ul = driver.find_element(By.XPATH, '//*[@id="ilan-hakkinda"]/div/div/ul')
105
+ list_items = ul.find_elements(By.TAG_NAME, 'li')
106
+
107
+ details = {}
108
+ for item in list_items:
109
+ try:
110
+ key = item.find_element(By.CLASS_NAME, 'styles_key__VqMhC').text
111
+ value = item.find_element(By.CLASS_NAME, 'styles_value__3QmL3').text
112
+ details[key] = value
113
+ except NoSuchElementException:
114
+ continue
115
+
116
+ title = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[1]/div/h1').text
117
+ resim_url = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[2]/div[2]/img').get_attribute('src')
118
+ fiyat = driver.find_element(By.XPATH, '//*[@id="genel-bakis"]/div[1]/div[1]/div[1]/div/span').text
119
+ fiyat = int(fiyat.replace('.','').replace('TL',''))
120
+ details['url'] = detail_url
121
+ details['title'] = title
122
+ details['resim_url'] = resim_url
123
+ details['price'] = fiyat
124
+
125
+
126
+
127
+ data.append(details)
128
+
129
+ except NoSuchElementException as e:
130
+ print(f"Element not found: {e}")
131
+ except Exception as e:
132
+ print(f"An error occurred: {e}")
133
+
134
+ driver.execute_script("window.history.go(-1)")
135
+ i += 1
136
+
137
+ driver.quit()
138
+ return data