mgokg commited on
Commit
6c2fae1
·
verified ·
1 Parent(s): 9e7dfc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -26,6 +26,52 @@ def llm(message):
26
  except Exception as e:
27
  return f"Error in response generation: {str(e)}"
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def google_search(query):
30
  headers = {
31
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
@@ -42,6 +88,10 @@ def google_search(query):
42
 
43
  return first_div.text.strip()
44
 
 
 
 
 
45
  demo = gr.Interface(
46
  fn=google_search,
47
  inputs=gr.Textbox(lines=1, placeholder="Geben Sie Ihre Suchanfrage ein..."),
 
26
  except Exception as e:
27
  return f"Error in response generation: {str(e)}"
28
 
29
+ def list_of_clubs(ort):
30
+ base_url = "https://vereine-in-deutschland.net"
31
+ all_links_text = []
32
+ initial_url = f"{base_url}/vereine/Bayern/{ort}"
33
+
34
+ try:
35
+ response = requests.get(initial_url)
36
+ response.raise_for_status()
37
+
38
+ soup = BeautifulSoup(response.content, 'html.parser')
39
+
40
+ # Determine the last page
41
+ link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
42
+ last_page = 10
43
+ if link_element and 'href' in link_element.attrs:
44
+ href = link_element['href']
45
+ last_page = int(href.split('/')[-1])
46
+
47
+ # Loop through all pages and collect links
48
+ for page_number in range(1, last_page + 1):
49
+ page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
50
+ response = requests.get(page_url)
51
+ response.raise_for_status()
52
+ soup = BeautifulSoup(response.content, 'html.parser')
53
+ target_div = soup.select_one('div.row-cols-1:nth-child(4)')
54
+
55
+ if target_div:
56
+ #links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
57
+ texts = [a.text for a in target_div.find_all('a', href=True)]
58
+ all_links_text.extend(texts)
59
+ else:
60
+ print(f"Target div not found on page {page_number}")
61
+
62
+ except Exception as e:
63
+ return str(e), []
64
+
65
+ all_links_text = all_links_text[0::2]
66
+
67
+ return all_links_text
68
+
69
+ def extract_vereinsname(url):
70
+ parts = url.split('/')
71
+ vereinsname = parts[-1]
72
+ vereinsname = vereinsname.replace("-", " ")
73
+ return vereinsname
74
+
75
  def google_search(query):
76
  headers = {
77
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 
88
 
89
  return first_div.text.strip()
90
 
91
+ def process_ort(ort):
92
+ links_text = list_of_clubs(ort)
93
+ return links_text
94
+
95
  demo = gr.Interface(
96
  fn=google_search,
97
  inputs=gr.Textbox(lines=1, placeholder="Geben Sie Ihre Suchanfrage ein..."),