DexterSptizu's picture
Update app.py
47be852 verified
import streamlit as st
import numpy as np
import plotly.graph_objects as go
from sklearn.manifold import MDS
from collections import defaultdict
# Page configuration
st.set_page_config(
page_title="Address Similarity Explorer",
page_icon="🏠",
layout="wide"
)
class ShingleSimilarity:
def __init__(self, k=3):
self.k = k
def get_shingles(self, text):
text = text.lower().strip()
if len(text) < self.k:
return {text}
return {text[i:i+self.k] for i in range(len(text) - self.k + 1)}
def similarity(self, text1, text2):
shingles1 = self.get_shingles(text1)
shingles2 = self.get_shingles(text2)
intersection = len(shingles1.intersection(shingles2))
union = len(shingles1.union(shingles2))
return intersection / union if union > 0 else 0.0
def create_similarity_visualization(texts, shingle_sim):
n = len(texts)
similarity_matrix = np.zeros((n, n))
for i in range(n):
for j in range(n):
similarity_matrix[i][j] = shingle_sim.similarity(texts[i], texts[j])
# Create visualization
fig = go.Figure(data=go.Heatmap(
z=similarity_matrix,
x=texts,
y=texts,
colorscale='Viridis',
text=np.round(similarity_matrix, 4),
texttemplate='%{text}',
textfont={"size": 12},
))
fig.update_layout(
title="Address Similarity Matrix",
height=600,
width=800
)
return fig
def main():
st.title("🏠 Address Similarity Analyzer")
# Example addresses from the image
example_addresses = [
"123 North Hampton Blvd",
"123 N Hampton Blvd",
"123 North Hampton Boulevard",
"123 N. Hampton Blvd",
"123 N. Hampton Boulevard",
"65 South Hampton Blvd"
]
st.markdown("### Address Similarity Analysis")
st.info("If the similarity score is over 0.4, addresses are considered to be the same location")
# Initialize shingle similarity with k=3
shingle_sim = ShingleSimilarity(k=3)
# Display similarity matrix
fig = create_similarity_visualization(example_addresses, shingle_sim)
st.plotly_chart(fig)
# Interactive comparison
st.markdown("### Compare Two Addresses")
col1, col2 = st.columns(2)
with col1:
addr1 = st.selectbox("Select first address:", example_addresses)
with col2:
addr2 = st.selectbox("Select second address:", example_addresses, index=1)
if st.button("Compare Addresses"):
similarity = shingle_sim.similarity(addr1, addr2)
st.metric(
label="Similarity Score",
value=f"{similarity:.4f}"
)
is_same = similarity > 0.4
status = "βœ… Same Location" if is_same else "❌ Different Locations"
color = "success" if is_same else "error"
st.markdown(f"**Status:** :{color}[{status}]")
# Show shingles comparison
col1, col2 = st.columns(2)
with col1:
st.markdown(f"**Shingles for Address 1:**")
st.write(sorted(shingle_sim.get_shingles(addr1)))
with col2:
st.markdown(f"**Shingles for Address 2:**")
st.write(sorted(shingle_sim.get_shingles(addr2)))
if __name__ == "__main__":
main()