# Importing libraries import streamlit as st import matplotlib.pyplot as plt from advertools import sitemap_to_df import pandas as pd # Title st.title("Sitemap Analyzer") # Sidebar instructions st.sidebar.markdown("### How to use this Sitemap Analyzer") st.sidebar.markdown(""" This sitemap analyzer shows you how many pages each domain has published over a period of time. To use it, input the client's sitemap on "Input client sitemap here" and put up to 3 competitor sitemaps below it, pressing enter after every time you put the sitemap URL. """) st.sidebar.markdown("You can use this tool to detect or guess where the sitemap of each domain can be: [Free Sitemap Finder & Checker Tool](https://seomator.com/sitemap-finder)") st.sidebar.markdown("## Tool uploaded and maintained by: [Blazing SEO](http://blazing-seo.com/)") # Sidebar inputs for up to 4 sitemaps sitemap_urls = [ st.sidebar.text_input("Input client sitemap here:", ""), st.sidebar.text_input("Enter the competitor sitemap URL 1:", ""), st.sidebar.text_input("Enter the competitor sitemap URL 2:", ""), st.sidebar.text_input("Enter the competitor sitemap URL 3:", "") ] # Colors for sitemaps colors = ['green', 'blue', 'red', 'brown'] # Displaying legend in the sidebar st.sidebar.markdown("### Legend") for idx, sitemap_url in enumerate(sitemap_urls): if sitemap_url: st.sidebar.markdown(f"{sitemap_url}: ", unsafe_allow_html=True) for idx, sitemap_url in enumerate(sitemap_urls): if sitemap_url: try: # Fetching the sitemap data sitemap_data = sitemap_to_df(sitemap_url) sitemap_data['lastmod'] = pd.to_datetime(sitemap_data['lastmod']) # Displaying the DataFrame st.subheader(f"{sitemap_url}") st.dataframe(sitemap_data) # Plotting trends (excluding yearly trends) for trend_name, resample_rule, ylabel in [("Monthly", "M", "Count"), ("Weekly", "W", "Count")]: st.subheader(f"{trend_name} Trends for {sitemap_url}") fig, ax = plt.subplots(figsize=(10, 6)) # Adjusting figure size for wider charts if trend_name == "Weekly": trends = sitemap_data['lastmod'].dt.dayofweek.value_counts().sort_index() trends.index = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] else: trends = sitemap_data.resample(resample_rule, on='lastmod').size() ax.bar(trends.index, trends.values, color=colors[idx]) ax.set_ylabel(ylabel) ax.set_title(f"{trend_name} Trends") st.pyplot(fig) # Total number of URLs st.subheader(f"Total Number of URLs for {sitemap_url}") total_urls = len(sitemap_data) st.write(f"The total number of URLs in the sitemap is {total_urls}.") except Exception as e: st.write(f"An error occurred for {sitemap_url}:", str(e)) else: st.write("Please enter at least one sitemap URL to analyze.") st.sidebar.markdown("Credits") st.sidebar.markdown("[semrush.com/blog/content-analysis-xml-sitemaps-python](https://www.semrush.com/blog/content-analysis-xml-sitemaps-python/)") st.sidebar.markdown("[Using XML Sitemaps for Content Analysis with Python](https://github.com/eliasdabbas/semrush_tutorial_sitemap_analysis/)")