import streamlit as st import numpy as np import plotly.graph_objects as go from sklearn.manifold import MDS from collections import defaultdict # Page configuration st.set_page_config( page_title="Address Similarity Explorer", page_icon="🏠", layout="wide" ) class ShingleSimilarity: def __init__(self, k=3): self.k = k def get_shingles(self, text): text = text.lower().strip() if len(text) < self.k: return {text} return {text[i:i+self.k] for i in range(len(text) - self.k + 1)} def similarity(self, text1, text2): shingles1 = self.get_shingles(text1) shingles2 = self.get_shingles(text2) intersection = len(shingles1.intersection(shingles2)) union = len(shingles1.union(shingles2)) return intersection / union if union > 0 else 0.0 def create_similarity_visualization(texts, shingle_sim): n = len(texts) similarity_matrix = np.zeros((n, n)) for i in range(n): for j in range(n): similarity_matrix[i][j] = shingle_sim.similarity(texts[i], texts[j]) # Create visualization fig = go.Figure(data=go.Heatmap( z=similarity_matrix, x=texts, y=texts, colorscale='Viridis', text=np.round(similarity_matrix, 4), texttemplate='%{text}', textfont={"size": 12}, )) fig.update_layout( title="Address Similarity Matrix", height=600, width=800 ) return fig def main(): st.title("🏠 Address Similarity Analyzer") # Example addresses from the image example_addresses = [ "123 North Hampton Blvd", "123 N Hampton Blvd", "123 North Hampton Boulevard", "123 N. Hampton Blvd", "123 N. Hampton Boulevard", "65 South Hampton Blvd" ] st.markdown("### Address Similarity Analysis") st.info("If the similarity score is over 0.4, addresses are considered to be the same location") # Initialize shingle similarity with k=3 shingle_sim = ShingleSimilarity(k=3) # Display similarity matrix fig = create_similarity_visualization(example_addresses, shingle_sim) st.plotly_chart(fig) # Interactive comparison st.markdown("### Compare Two Addresses") col1, col2 = st.columns(2) with col1: addr1 = st.selectbox("Select first address:", example_addresses) with col2: addr2 = st.selectbox("Select second address:", example_addresses, index=1) if st.button("Compare Addresses"): similarity = shingle_sim.similarity(addr1, addr2) st.metric( label="Similarity Score", value=f"{similarity:.4f}" ) is_same = similarity > 0.4 status = "✅ Same Location" if is_same else "❌ Different Locations" color = "success" if is_same else "error" st.markdown(f"**Status:** :{color}[{status}]") # Show shingles comparison col1, col2 = st.columns(2) with col1: st.markdown(f"**Shingles for Address 1:**") st.write(sorted(shingle_sim.get_shingles(addr1))) with col2: st.markdown(f"**Shingles for Address 2:**") st.write(sorted(shingle_sim.get_shingles(addr2))) if __name__ == "__main__": main()