Spaces:

blazingbunny
/

free-seo-headers-audit-tool-advertools

Running

Update app.py

c2d57f1 verified 4 months ago

1.24 kB

	import streamlit as st
	import advertools as adv
	import pandas as pd

	def extract_headers(url):
	try:
	# Define the output file path
	output_file = "crawl_output.jl"

	# Perform the crawl
	adv.crawl(url, output_file=output_file, follow_links=True)

	# Load the crawl data
	crawl_df = pd.read_json(output_file, lines=True)

	# Extract headers from h1 to h6
	headers_columns = [col for col in crawl_df.columns if col.startswith('h')]
	headers = crawl_df[headers_columns].apply(lambda x: x.str.split('@@').explode()).dropna().reset_index(drop=True)

	return headers

	except Exception as e:
	return str(e)

	def main():
	st.title("Web Page Header Extractor")

	url = st.text_input("Enter the URL of the web page:")
	if st.button("Extract Headers"):
	if url:
	headers = extract_headers(url)
	if isinstance(headers, pd.DataFrame) and not headers.empty:
	st.write("Extracted Headers:")
	st.write(headers)
	else:
	st.error("No headers found or an error occurred.")
	else:
	st.error("Please enter a valid URL.")

	if __name__ == "__main__":
	main()