Spaces:
Sleeping
Sleeping
File size: 1,770 Bytes
0a6201c 1c57ff6 0a6201c 1dfc2a4 1c57ff6 1dfc2a4 1c57ff6 e5f7529 1c57ff6 e5f7529 1c57ff6 e5f7529 1c57ff6 0a6201c 1c57ff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
# Function to scrape a specific website with a User-Agent header
def scrape_website(url):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
# Send a request to the website with the headers
response = requests.get(url, headers=headers, allow_redirects=True)
# Check if the response was successful
if response.status_code == 200:
# Parse the page content
soup = BeautifulSoup(response.content, 'html.parser')
# Extract the title of the webpage
title = soup.find('title').get_text()
# Extract the meta description if available
meta_description = soup.find('meta', attrs={'name': 'description'})
if meta_description:
meta_description = meta_description.get('content')
else:
meta_description = "No meta description available"
return f"Title: {title}\nMeta Description: {meta_description}"
else:
return f"Failed to access {url} (status code: {response.status_code})"
except Exception as e:
return f"An error occurred: {str(e)}"
# Gradio interface to input URL and display scraped content
with gr.Blocks() as demo:
url_input = gr.Textbox(value="https://chatgpt.com", label="URL", placeholder="Enter URL")
output = gr.Textbox(label="Scraped Data")
submit_btn = gr.Button("Scrape Website")
# Set the button action
submit_btn.click(scrape_website, url_input, output)
demo.launch()
|