yusufenes commited on
Commit
8c32736
·
verified ·
1 Parent(s): 17ccda1

Upload 2 files

Browse files
Files changed (2) hide show
  1. get_real_home_listing.py +111 -0
  2. requirements.txt +0 -0
get_real_home_listing.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium import webdriver
2
+ from selenium.webdriver.chrome.service import Service
3
+ from selenium.webdriver.chrome.options import Options
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.support.ui import WebDriverWait
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.common.exceptions import NoSuchElementException
8
+ from selenium.webdriver.chrome.service import Service
9
+ from webdriver_manager.chrome import ChromeDriverManager
10
+ import json
11
+ import pandas as pd
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+
15
+ def format_price(price):
16
+ price = str(price)[0:-2]
17
+ clean_price = price.replace('.', '')
18
+ return int(clean_price)
19
+
20
+
21
+ def get_home_listings(selected_il, price_value):
22
+ chrome_options = Options()
23
+ chrome_options.add_argument('--headless')
24
+ chrome_options.add_argument('--no-sandbox')
25
+ chrome_options.add_argument('--disable-dev-shm-usage')
26
+ chrome_options.add_argument('--disable-gpu')
27
+ chrome_options.add_argument('--remote-debugging-port=9222')
28
+ chrome_options.binary_location = "/usr/bin/chromium"
29
+
30
+ service = Service(ChromeDriverManager().install())
31
+ driver = webdriver.Chrome(service=service, options=chrome_options)
32
+
33
+ driver.get('https://www.emlakjet.com/')
34
+
35
+ search_input = WebDriverWait(driver, 10).until(
36
+ EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[2]/div/div/div/input'))
37
+ )
38
+ search_input.send_keys(f'{selected_il}')
39
+ dropdown_button = driver.find_element(By.XPATH, '//*[@id="headlessui-listbox-button-:rh:"]')
40
+ dropdown_button.click()
41
+
42
+ # burada tahmin edilen değerin 500.000 TL altı ve üstü aralığında arama yapılacak
43
+ price_value = format_price(price_value)
44
+ lower_bound = price_value - 500000
45
+ upper_bound = price_value + 500000
46
+
47
+ first_input = WebDriverWait(driver, 10).until(
48
+ EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[1]/div[1]/div/div[1]/input'))
49
+ )
50
+ first_input.clear()
51
+ first_input.send_keys(str(lower_bound))
52
+
53
+ second_input = WebDriverWait(driver, 10).until(
54
+ EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[2]/div[1]/div/div[1]/input'))
55
+ )
56
+ second_input.clear()
57
+ second_input.send_keys(str(upper_bound))
58
+
59
+ find_button = WebDriverWait(driver, 10).until(
60
+ EC.element_to_be_clickable((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[5]/div/button'))
61
+ )
62
+ find_button.click()
63
+
64
+ i = 1
65
+ data = []
66
+
67
+ while i<=10:
68
+ WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')))
69
+ link = driver.find_element(By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')
70
+ driver.get(link.get_attribute('href'))
71
+ detail_url = driver.current_url
72
+ WebDriverWait(driver, 3).until(
73
+ EC.presence_of_element_located((By.ID, "ilan-hakkinda"))
74
+ )
75
+
76
+ try:
77
+ ul = driver.find_element(By.XPATH, '//*[@id="ilan-hakkinda"]/div/div/ul')
78
+ list_items = ul.find_elements(By.TAG_NAME, 'li')
79
+
80
+ details = {}
81
+ for item in list_items:
82
+ try:
83
+ key = item.find_element(By.CLASS_NAME, 'styles_key__VqMhC').text
84
+ value = item.find_element(By.CLASS_NAME, 'styles_value__3QmL3').text
85
+ details[key] = value
86
+ except NoSuchElementException:
87
+ continue
88
+
89
+ title = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[1]/div/h1').text
90
+ resim_url = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[2]/div[2]/img').get_attribute('src')
91
+ fiyat = driver.find_element(By.XPATH, '//*[@id="genel-bakis"]/div[1]/div[1]/div[1]/div/span').text
92
+ fiyat = int(fiyat.replace('.','').replace('TL',''))
93
+ details['url'] = detail_url
94
+ details['title'] = title
95
+ details['resim_url'] = resim_url
96
+ details['price'] = fiyat
97
+
98
+
99
+
100
+ data.append(details)
101
+
102
+ except NoSuchElementException as e:
103
+ print(f"Element not found: {e}")
104
+ except Exception as e:
105
+ print(f"An error occurred: {e}")
106
+
107
+ driver.execute_script("window.history.go(-1)")
108
+ i += 1
109
+
110
+ driver.quit()
111
+ return data
requirements.txt ADDED
Binary file (5.46 kB). View file