Spaces:

mgokg
/

google_search

Running

File size: 6,172 Bytes

ca5f84f
62040f2
93f2984
b2ba4fb
9e7dfc2
f5d9f24
9e7dfc2
 
b2ba4fb
dc25526
b2ba4fb
9edd3cc
 
 
 
 
6cd0567
 
9edd3cc
6cd0567
9edd3cc
6cd0567
 
 
304b184
6cd0567
 
91487cc
4948ad8
 
 
 
 
 
d95af9b
4948ad8
6cd0567
9edd3cc
 
 
b2ba4fb
9edd3cc
 
 
 
 
 
 
 
2b93b91
8e85569
95503e4
 
 
 
cf15a41
e654979
 
cf15a41
e654979
 
cf15a41
e654979
95503e4
 
 
 
 
 
cf15a41
b764634
95503e4
b2ba4fb
 
09e15cd
b2ba4fb
 
1e4e902
b2ba4fb
 
 
 
 
ca5f84f
9edd3cc
 
 
2cdddfd
9edd3cc
 
 
 
 
 
6c2fae1
 
 
 
 
 
 
 
 
af406c2
6c2fae1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fea63a0
cd0f9aa
7b69f4d
d95af9b
e654979
 
8354006
3b22068
e6fea94
18d1c3d
7b69f4d
9edd3cc
fbd0747
 
f68fbd1
2d583fc
 
 
f68fbd1
2d583fc
99c69ca
 
2139770
ba251cc
99c69ca
 
 
 
10ae401
2d583fc
f68fbd1
1d394b2
 
 
 
ca5f84f
6cd0567
 
 
e6fea94
e4cd67b
6cd0567
db8fb45
6cd0567
 
 
 
f6d2b74
6cd0567

import gradio as gr
import requests
from bs4 import BeautifulSoup
from gradio_client import Client
from urllib.parse import urljoin
import pandas as pd
from io import StringIO
import json
import groq
import os

google_api_key = os.getenv('google_search')
API_URL = "https://blavken-flowiseblav.hf.space/api/v1/prediction/fbc118dc-ec00-4b59-acff-600648958be3"
api_key = os.getenv('groq')
client = groq.Client(api_key=api_key)

custom_css = """
#md {
    height: 200px;  
    font-size: 30px;
    background: #121212;
    padding: 20px;
    color: white;
    border: 1 px solid white;
    font-size:10px;
}
"""

def perplexica_search(payloads):
    client = Client("mgokg/PerplexicaApi")
    result = client.predict(
        prompt=f"{payloads}",
        optimization_mode="balanced",
        api_name="/question"
    )   
    return result

def query(payload):
    response = requests.post(API_URL, json=payload)
    return response.json()

def google_search(payloads):
    output = query({
        "question": f"{payloads}",
    })
    #search_query = f"{payloads} antworte kurz und knapp. antworte auf deutsch. du findest die antwort hier:\n {output}"
    texte=""
    for o in output:
        texte +=o
    return output

scheme = """
{"name":"","email":"","website":""}
"""

def llama(messages):
    client = Client("mgokg/selenium-screenshot-gradio")
    result = client.predict(
		message=f"{messages}",
		api_name="/predict"
    )
    return result
    
    client = Client("AiActivity/AI-Assistant")
    result = client.predict(
		message={"text":f"instruction: return a valid json object only, no comments or explanaition, fill in the missing information. use this json scheme.\n {scheme}\n leave blank if information is not verfügbar. here is the information for the values:\n{message}","files":[]},
		api_name="/chat"
    )
    print(result)

def llm(message):
    message = f'return a json object with the keys: name,email,phone,website \n the values can be found here, leave blank if value is not available:\n {message} \n return a json object only. no text, no explanaition'
    try:        
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{message}"}
            ],
        )       
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

def qwen(jsondata):
    client = Client("Qwen/Qwen2.5-72B-Instruct")
    result = client.predict(
		query= f'return a json object with the keys: name,email,phone,website for each verein \n the values can be found here, leave blank if value is not available:\n {jsondata} \n return a json object only. no text, no explanaition',
		history=[],
		system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
		api_name="/model_chat"
    )
    return result

def list_of_clubs(ort):
    base_url = "https://vereine-in-deutschland.net"
    all_links_text = []
    initial_url = f"{base_url}/vereine/Bayern/{ort}"

    try:
        response = requests.get(initial_url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Determine the last page
        link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
        last_page = 10
        if link_element and 'href' in link_element.attrs:
            href = link_element['href']
            last_page = int(href.split('/')[-1])

        # Loop through all pages and collect links
        for page_number in range(1, last_page + 1):
            page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
            response = requests.get(page_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            target_div = soup.select_one('div.row-cols-1:nth-child(4)')

            if target_div:
                texts = [a.text for a in target_div.find_all('a', href=True)]
                all_links_text.extend(texts)
            else:
                print(f"Target div not found on page {page_number}")

    except Exception as e:
        return str(e), []

    all_links_text = all_links_text[0::2]
    return all_links_text

def process_ort(ort):
    links_text = list_of_clubs(ort)
    #return links_text
    vereine = []
    
    for verein in links_text:       
        prompt=f"{verein}",
        result = llama(prompt)
        vereine.append(result)
        print(result)
    #data = json.loads(vereine)
    #df = pd.DataFrame(vereine)
    return vereine
        
    for verein in links_text:
        client = Client("mgokg/gemini-2.0-flash-exp")
        result = client.predict(
    		prompt=f"impressum {verein}",
    		api_name="/perform_search"
        )
        #json_object = llm(result)
        """
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        }    
        url = f"https://www.google.com/search?q=impressum {verein}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        impressum_div = soup.find('body')
        contact_detailes = impressum_div.text
        json_object = llm(contact_detailes)
        """
        vereine.append(result)
    #dicts = [json.loads(item) for item in vereine] 
    #df = pd.DataFrame(dicts)
    #return df
    return vereine

# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
    with gr.Row():
        #details_output = gr.DataFrame(label="Ausgabe", elem_id="md")        
        details_output = gr.Textbox(label="Ausgabe")  
    with gr.Row():
        ort_input = gr.Textbox(label="Ort eingeben", placeholder="ask anything...")      
    with gr.Row():         
        button = gr.Button("Senden")    

    # Connect the button to the function
    button.click(fn=process_ort, inputs=ort_input, outputs=details_output)   

# Launch the Gradio application
demo.launch()