acl-spectrum / app.py
ehsk's picture
Update app.py
e18eaf4
raw
history blame
1.97 kB
import os
import pandas as pd
import plotly.express as px
import streamlit as st
DATA_FILE = "data/anthology-2020-23_specter2_base.json"
def load_df(data_file: os.PathLike):
df = pd.read_json(data_file, orient="records")
df["x"] = df["point2d"].apply(lambda x: x[0])
df["y"] = df["point2d"].apply(lambda x: x[1])
if "publication_type" in df.columns:
df["type"] = df["publication_type"]
df = df.drop(columns=["point2d", "publication_type"])
else:
df = df.drop(columns=["point2d"])
return df
@st.cache_data
def load_dataframe():
return load_df(DATA_FILE)
DF = load_dataframe()
with st.sidebar:
venues = st.multiselect(
"Venues",
["ACL", "EMNLP", "NAACL", "TACL"],
["ACL", "EMNLP", "NAACL", "TACL"],
)
start_year, end_year = st.select_slider("Publication year", options=("2020", "2021", "2022", "2023"), value=("2020", "2023"))
author_names = st.text_input('Author names (separated by comma)')
start_year = int(start_year)
end_year = int(end_year)
df = DF[(DF["year"] >= start_year) & (DF["year"] <= end_year)]
if len(venues) < 4:
selected_venues = [v.lower() for v in venues]
df = df[df["source"].isin(selected_venues)]
if author_names:
authors = [a.strip().lower() for a in author_names.split(",")]
author_mask = df.authors.apply(lambda x: all(a in x for a in authors))
df = df[author_mask]
st.write(f"Number of points: {df.shape[0]}")
fig = px.scatter(
df,
x="x",
y="y",
color="cluster",
width=1200,
height=750,
hover_data=["title", "authors", "year", "source", "type"],
color_continuous_scale="fall",
)
fig.update_layout(
# margin=dict(l=10, r=10, t=10, b=10),
showlegend=False,
font=dict(
family="Times New Roman",
size=30,
),
)
fig.update_xaxes(title="")
fig.update_yaxes(title="")
st.plotly_chart(fig, use_container_width=True)