Spaces:
Sleeping
Sleeping
minor cleanup
Browse files
app.py
CHANGED
@@ -14,22 +14,22 @@ def load_transform_dataset():
|
|
14 |
# Load the dataset and convert it to a Pandas dataframe
|
15 |
sotu_dataset = "jsulz/state-of-the-union-addresses"
|
16 |
dataset = load_dataset(sotu_dataset)
|
17 |
-
|
18 |
# Do some on-the-fly calculations
|
19 |
# calcualte the number of words in each address
|
20 |
-
|
21 |
# calculate the automated readibility index reading ease score for each address
|
22 |
# automated readability index = 4.71 * (characters/words) + 0.5 * (words/sentences) - 21.43
|
23 |
-
|
24 |
lambda x: (4.71 * (len(x.replace(" ", "")) / len(x.split())))
|
25 |
+ (0.5 * (len(x.split()) / len(x.split("."))))
|
26 |
- 21.43
|
27 |
)
|
28 |
# Sort the dataframe by date because Plotly doesn't do any of this automatically
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
return
|
33 |
|
34 |
|
35 |
"""
|
@@ -234,6 +234,7 @@ with gr.Blocks() as demo:
|
|
234 |
minimum=1, maximum=4, step=1, label="N-grams", interactive=True, value=1
|
235 |
)
|
236 |
|
|
|
237 |
df_state = gr.State(df)
|
238 |
|
239 |
# show a bar chart of the top n-grams for a selected president
|
|
|
14 |
# Load the dataset and convert it to a Pandas dataframe
|
15 |
sotu_dataset = "jsulz/state-of-the-union-addresses"
|
16 |
dataset = load_dataset(sotu_dataset)
|
17 |
+
_df = dataset["train"].to_pandas()
|
18 |
# Do some on-the-fly calculations
|
19 |
# calcualte the number of words in each address
|
20 |
+
_df["word_count"] = _df["speech_html"].apply(lambda x: len(x.split()))
|
21 |
# calculate the automated readibility index reading ease score for each address
|
22 |
# automated readability index = 4.71 * (characters/words) + 0.5 * (words/sentences) - 21.43
|
23 |
+
_df["ari"] = _df["no-contractions"].apply(
|
24 |
lambda x: (4.71 * (len(x.replace(" ", "")) / len(x.split())))
|
25 |
+ (0.5 * (len(x.split()) / len(x.split("."))))
|
26 |
- 21.43
|
27 |
)
|
28 |
# Sort the dataframe by date because Plotly doesn't do any of this automatically
|
29 |
+
_df = _df.sort_values(by="date")
|
30 |
+
_written = _df[_df["categories"] == "Written"]
|
31 |
+
_spoken = _df[_df["categories"] == "Spoken"]
|
32 |
+
return _df, _written, _spoken
|
33 |
|
34 |
|
35 |
"""
|
|
|
234 |
minimum=1, maximum=4, step=1, label="N-grams", interactive=True, value=1
|
235 |
)
|
236 |
|
237 |
+
# store the dataframe in a state object before passing to plots
|
238 |
df_state = gr.State(df)
|
239 |
|
240 |
# show a bar chart of the top n-grams for a selected president
|