Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
# Function to scrape a specific website with a User-Agent header | |
def scrape_website(url): | |
try: | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
} | |
# Send a request to the website with the headers | |
response = requests.get(url, headers=headers, allow_redirects=True) | |
# Check if the response was successful | |
if response.status_code == 200: | |
# Parse the page content | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Extract the title of the webpage | |
title = soup.find('title').get_text() | |
# Extract the meta description if available | |
meta_description = soup.find('meta', attrs={'name': 'description'}) | |
if meta_description: | |
meta_description = meta_description.get('content') | |
else: | |
meta_description = "No meta description available" | |
return f"Title: {title}\nMeta Description: {meta_description}" | |
else: | |
return f"Failed to access {url} (status code: {response.status_code})" | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Gradio interface to input URL and display scraped content | |
with gr.Blocks() as demo: | |
url_input = gr.Textbox(value="https://chatgpt.com", label="URL", placeholder="Enter URL") | |
output = gr.Textbox(label="Scraped Data") | |
submit_btn = gr.Button("Scrape Website") | |
# Set the button action | |
submit_btn.click(scrape_website, url_input, output) | |
demo.launch() | |