Spaces:
Sleeping
Sleeping
import gradio as gr | |
from datasets import load_dataset | |
from nltk.util import ngrams | |
from collections import Counter | |
# Load the dataset and convert it to a Pandas dataframe | |
sotu_dataset = 'jsulz/state-of-the-union-addresses' | |
dataset = load_dataset(sotu_dataset) | |
df = dataset['train'].to_pandas() | |
df['word_count'] = df['speech_html'].apply(lambda x: len(x.split())) | |
written = df[df['categories'] == 'Written'] | |
spoken = df[df['categories'] == 'Spoken'] | |
# Create a Gradio interface with blocks | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# A Dashboard to Analyze the State of the Union Addresses | |
""") | |
# get all unique president names | |
presidents = df['potus'].unique() | |
# convert presidents to a list | |
presidents = presidents.tolist() | |
# create a dropdown to select a president | |
president = gr.Dropdown(label="Select a President", choices=["All"] + presidents) | |
with gr.Row(): | |
# if president is not of type string | |
def show_text(potus): | |
if potus is not None: | |
gr.Markdown(f"{potus} was the first president of the United States.") | |
def word_length_bar(potus): | |
# calculate the total number of words in the speech_html column and add it to a new column | |
# if the president is "All", show the word count for all presidents | |
if potus == "All": | |
gr.BarPlot(df, x="date", y="word_count", title="Total Number of Words in the Speeches") | |
else: | |
# if the president is not "All", show the word count for the selected president | |
gr.BarPlot(df[df['potus'] == potus], x="date", y="word_count", title="Total Number of Words in the Speeches") | |
with gr.Row(): | |
def ngram_bar(potus): | |
# create a Counter object from the trigrams | |
potus_df = df[df["potus"] == potus] | |
trigrams = ( | |
potus_df["tokens-nostop"].apply(lambda x: list(ngrams(x, 3))).apply(Counter).sum() | |
) | |
# get the most common trigrams | |
common_trigrams = trigrams.most_common(20) | |
# unzip the list of tuples and plot the trigrams and counts as a bar chart | |
trigrams, counts = zip(*common_trigrams) | |
# join the trigrams into a single string | |
trigrams = [" ".join(trigram) for trigram in trigrams] | |
# create a dataframe from the trigrams and counts | |
trigrams_df = pd.DataFrame({"trigrams": trigrams, "counts": counts}) | |
# plot the trigrams and counts as a bar chart | |
gr.BarPlot(trigrams_df, x="trigrams", y="counts", title="Most Common Trigrams") | |
demo.launch() | |