Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,52 @@ def llm(message):
|
|
26 |
except Exception as e:
|
27 |
return f"Error in response generation: {str(e)}"
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def google_search(query):
|
30 |
headers = {
|
31 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
@@ -42,6 +88,10 @@ def google_search(query):
|
|
42 |
|
43 |
return first_div.text.strip()
|
44 |
|
|
|
|
|
|
|
|
|
45 |
demo = gr.Interface(
|
46 |
fn=google_search,
|
47 |
inputs=gr.Textbox(lines=1, placeholder="Geben Sie Ihre Suchanfrage ein..."),
|
|
|
26 |
except Exception as e:
|
27 |
return f"Error in response generation: {str(e)}"
|
28 |
|
29 |
+
def list_of_clubs(ort):
|
30 |
+
base_url = "https://vereine-in-deutschland.net"
|
31 |
+
all_links_text = []
|
32 |
+
initial_url = f"{base_url}/vereine/Bayern/{ort}"
|
33 |
+
|
34 |
+
try:
|
35 |
+
response = requests.get(initial_url)
|
36 |
+
response.raise_for_status()
|
37 |
+
|
38 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
39 |
+
|
40 |
+
# Determine the last page
|
41 |
+
link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
|
42 |
+
last_page = 10
|
43 |
+
if link_element and 'href' in link_element.attrs:
|
44 |
+
href = link_element['href']
|
45 |
+
last_page = int(href.split('/')[-1])
|
46 |
+
|
47 |
+
# Loop through all pages and collect links
|
48 |
+
for page_number in range(1, last_page + 1):
|
49 |
+
page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
|
50 |
+
response = requests.get(page_url)
|
51 |
+
response.raise_for_status()
|
52 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
53 |
+
target_div = soup.select_one('div.row-cols-1:nth-child(4)')
|
54 |
+
|
55 |
+
if target_div:
|
56 |
+
#links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
|
57 |
+
texts = [a.text for a in target_div.find_all('a', href=True)]
|
58 |
+
all_links_text.extend(texts)
|
59 |
+
else:
|
60 |
+
print(f"Target div not found on page {page_number}")
|
61 |
+
|
62 |
+
except Exception as e:
|
63 |
+
return str(e), []
|
64 |
+
|
65 |
+
all_links_text = all_links_text[0::2]
|
66 |
+
|
67 |
+
return all_links_text
|
68 |
+
|
69 |
+
def extract_vereinsname(url):
|
70 |
+
parts = url.split('/')
|
71 |
+
vereinsname = parts[-1]
|
72 |
+
vereinsname = vereinsname.replace("-", " ")
|
73 |
+
return vereinsname
|
74 |
+
|
75 |
def google_search(query):
|
76 |
headers = {
|
77 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
|
88 |
|
89 |
return first_div.text.strip()
|
90 |
|
91 |
+
def process_ort(ort):
|
92 |
+
links_text = list_of_clubs(ort)
|
93 |
+
return links_text
|
94 |
+
|
95 |
demo = gr.Interface(
|
96 |
fn=google_search,
|
97 |
inputs=gr.Textbox(lines=1, placeholder="Geben Sie Ihre Suchanfrage ein..."),
|