Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -169,17 +169,18 @@ sdg_colors = {
|
|
169 |
}
|
170 |
|
171 |
# Function to plot SDG dominant bar graphs using Plotly
|
172 |
-
|
173 |
-
def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
174 |
"""
|
175 |
-
Plots a horizontal bar graph of SDG predictions and superimposes the icon of the most frequent SDG.
|
176 |
-
|
177 |
Args:
|
178 |
df (pd.DataFrame): DataFrame containing SDG predictions.
|
179 |
title (str): Title of the plot.
|
180 |
pred_column (str): Column name to use for plotting (e.g., 'pred1').
|
|
|
|
|
181 |
icons_folder (str): Path to the folder containing SDG icons.
|
182 |
-
|
183 |
Returns:
|
184 |
plotly.graph_objs._figure.Figure: The Plotly figure object.
|
185 |
"""
|
@@ -209,6 +210,9 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
|
209 |
textfont=dict(size=10)
|
210 |
)
|
211 |
|
|
|
|
|
|
|
212 |
# Adjust layout for better visibility
|
213 |
fig.update_layout(
|
214 |
title=dict(
|
@@ -219,14 +223,15 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
|
219 |
title=None,
|
220 |
tickfont=dict(size=12)
|
221 |
),
|
222 |
-
|
|
|
|
|
|
|
|
|
223 |
height=600,
|
224 |
#width=800,
|
225 |
showlegend=False,
|
226 |
template="simple_white",
|
227 |
-
xaxis=dict(
|
228 |
-
tickfont=dict(size=12) # Reduce x-axis font size
|
229 |
-
),
|
230 |
)
|
231 |
|
232 |
# Identify the most frequent SDG
|
@@ -250,10 +255,10 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
|
250 |
dict(
|
251 |
source='data:image/png;base64,' + encoded_image,
|
252 |
xref="paper", yref="paper",
|
253 |
-
x=0.
|
254 |
-
sizex=0.2, sizey=0.2, #
|
255 |
-
xanchor="
|
256 |
-
yanchor="
|
257 |
layer="above" # Ensure the icon is above other plot elements
|
258 |
)
|
259 |
)
|
@@ -306,9 +311,9 @@ def generate_page_report(df_pages, report_file_name):
|
|
306 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_page.jpeg"
|
307 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_page.jpeg"
|
308 |
|
309 |
-
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
310 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
311 |
-
plot_sdg(df_doc, "Secondary SDGs", 'pred2').write_image(
|
312 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
313 |
|
314 |
# Add plots to the Word document
|
@@ -358,9 +363,9 @@ def generate_sentence_report(df_sentences, report_file_name):
|
|
358 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_sentence.jpeg"
|
359 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_sentence.jpeg"
|
360 |
|
361 |
-
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
362 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
363 |
-
plot_sdg(df_doc, "Secondary SDGs", 'pred2').write_image(
|
364 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
365 |
|
366 |
# Add plots to the Word document
|
@@ -521,9 +526,10 @@ def launch_interface():
|
|
521 |
outputs=[start_page, end_page]
|
522 |
)
|
523 |
|
524 |
-
#
|
525 |
gr.Markdown("## SDG Analysis Type")
|
526 |
-
|
|
|
527 |
with gr.Tab("π Page-Level Analysis"):
|
528 |
gr.Markdown(
|
529 |
"""
|
@@ -546,10 +552,10 @@ def launch_interface():
|
|
546 |
gr.Markdown(
|
547 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
548 |
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
549 |
-
"SDG will be highlighted above the graph. Download the Page Predictions
|
550 |
-
label
|
551 |
)
|
552 |
-
|
553 |
gr.Markdown("##### Download Results")
|
554 |
with gr.Row():
|
555 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
@@ -562,8 +568,8 @@ def launch_interface():
|
|
562 |
gr.Markdown(
|
563 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
564 |
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
565 |
-
"Primary SDGs. Download the
|
566 |
-
label
|
567 |
)
|
568 |
|
569 |
gr.Markdown("##### Download Results")
|
@@ -594,10 +600,10 @@ def launch_interface():
|
|
594 |
gr.Markdown(
|
595 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
596 |
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
597 |
-
"SDG will be highlighted above the graph. Download the Sentence Predictions
|
598 |
-
label
|
599 |
)
|
600 |
-
|
601 |
gr.Markdown("##### Download Results")
|
602 |
with gr.Row():
|
603 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
@@ -610,10 +616,10 @@ def launch_interface():
|
|
610 |
gr.Markdown(
|
611 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
612 |
"SDGs that are not the primary focus of the sentences analysed. These SDGs are second to the "+
|
613 |
-
"Primary SDGs. Download the Sentence Predictions
|
614 |
-
label
|
615 |
)
|
616 |
-
|
617 |
gr.Markdown("##### Download Results")
|
618 |
with gr.Row():
|
619 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
@@ -662,10 +668,18 @@ def launch_interface():
|
|
662 |
|
663 |
# Generate plots with icon overlay
|
664 |
first_plot = plot_sdg(
|
665 |
-
df_page_predictions,
|
|
|
|
|
|
|
|
|
666 |
)
|
667 |
second_plot = plot_sdg(
|
668 |
-
df_page_predictions,
|
|
|
|
|
|
|
|
|
669 |
)
|
670 |
|
671 |
# Define output file names
|
@@ -689,9 +703,14 @@ def launch_interface():
|
|
689 |
save_figure_as_jpeg(second_plot, secondary_page_jpeg)
|
690 |
|
691 |
return (
|
692 |
-
first_plot,
|
693 |
-
|
694 |
-
|
|
|
|
|
|
|
|
|
|
|
695 |
)
|
696 |
|
697 |
except Exception as e:
|
@@ -740,10 +759,18 @@ def launch_interface():
|
|
740 |
|
741 |
# Generate plots with icon overlay
|
742 |
first_plot = plot_sdg(
|
743 |
-
df_sentence_predictions,
|
|
|
|
|
|
|
|
|
744 |
)
|
745 |
second_plot = plot_sdg(
|
746 |
-
df_sentence_predictions,
|
|
|
|
|
|
|
|
|
747 |
)
|
748 |
|
749 |
# Define output file names
|
@@ -767,9 +794,14 @@ def launch_interface():
|
|
767 |
save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
|
768 |
|
769 |
return (
|
770 |
-
first_plot,
|
771 |
-
|
772 |
-
|
|
|
|
|
|
|
|
|
|
|
773 |
)
|
774 |
|
775 |
except Exception as e:
|
@@ -778,10 +810,10 @@ def launch_interface():
|
|
778 |
|
779 |
# Reset functions to clear the outputs
|
780 |
def reset_page_outputs():
|
781 |
-
return [None
|
782 |
|
783 |
def reset_sentence_outputs():
|
784 |
-
return [None
|
785 |
|
786 |
# Button actions for Page-Level Analysis
|
787 |
page_button.click(
|
@@ -793,7 +825,7 @@ def launch_interface():
|
|
793 |
page_csv, # π Download Page Predictions CSV
|
794 |
page_docx, # π Download Page Report DOCX
|
795 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
796 |
-
|
797 |
page_report_file_secondary, # π Download Page Report DOCX
|
798 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
799 |
]
|
@@ -807,7 +839,7 @@ def launch_interface():
|
|
807 |
page_csv,
|
808 |
page_docx,
|
809 |
page_jpeg1,
|
810 |
-
|
811 |
page_report_file_secondary,
|
812 |
secondary_page_jpeg
|
813 |
]
|
@@ -823,7 +855,7 @@ def launch_interface():
|
|
823 |
sentence_csv, # π Download Sentence Predictions CSV
|
824 |
sentence_docx, # π Download Sentence Report DOCX
|
825 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
826 |
-
|
827 |
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
828 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
829 |
]
|
@@ -837,7 +869,7 @@ def launch_interface():
|
|
837 |
sentence_csv,
|
838 |
sentence_docx,
|
839 |
sentence_jpeg1,
|
840 |
-
|
841 |
sentence_report_file_secondary,
|
842 |
secondary_sentence_jpeg
|
843 |
]
|
|
|
169 |
}
|
170 |
|
171 |
# Function to plot SDG dominant bar graphs using Plotly
|
172 |
+
def plot_sdg(df, title, pred_column, analysis_level, sdg_type, icons_folder='assets/icons/'):
|
|
|
173 |
"""
|
174 |
+
Plots a horizontal bar graph of SDG predictions and superimposes the icon of the most frequent SDG at the center.
|
175 |
+
|
176 |
Args:
|
177 |
df (pd.DataFrame): DataFrame containing SDG predictions.
|
178 |
title (str): Title of the plot.
|
179 |
pred_column (str): Column name to use for plotting (e.g., 'pred1').
|
180 |
+
analysis_level (str): Level of analysis ('pages' or 'sentences').
|
181 |
+
sdg_type (str): Type of SDG analysis ('primary' or 'secondary').
|
182 |
icons_folder (str): Path to the folder containing SDG icons.
|
183 |
+
|
184 |
Returns:
|
185 |
plotly.graph_objs._figure.Figure: The Plotly figure object.
|
186 |
"""
|
|
|
210 |
textfont=dict(size=10)
|
211 |
)
|
212 |
|
213 |
+
# Construct dynamic x-axis title
|
214 |
+
xaxis_title = f"Percentage of {analysis_level} aligned with {sdg_type.capitalize()} SDGs"
|
215 |
+
|
216 |
# Adjust layout for better visibility
|
217 |
fig.update_layout(
|
218 |
title=dict(
|
|
|
223 |
title=None,
|
224 |
tickfont=dict(size=12)
|
225 |
),
|
226 |
+
xaxis=dict(
|
227 |
+
title=xaxis_title, # Dynamic x-axis title
|
228 |
+
tickfont=dict(size=12) # Reduce x-axis font size
|
229 |
+
),
|
230 |
+
margin=dict(l=20, r=30, t=100, b=20), # Adjusted margins
|
231 |
height=600,
|
232 |
#width=800,
|
233 |
showlegend=False,
|
234 |
template="simple_white",
|
|
|
|
|
|
|
235 |
)
|
236 |
|
237 |
# Identify the most frequent SDG
|
|
|
255 |
dict(
|
256 |
source='data:image/png;base64,' + encoded_image,
|
257 |
xref="paper", yref="paper",
|
258 |
+
x=0.3, y=1.2, # Center of the plot
|
259 |
+
sizex=0.2, sizey=0.2, # Adjust size as needed
|
260 |
+
xanchor="center",
|
261 |
+
yanchor="middle",
|
262 |
layer="above" # Ensure the icon is above other plot elements
|
263 |
)
|
264 |
)
|
|
|
311 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_page.jpeg"
|
312 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_page.jpeg"
|
313 |
|
314 |
+
plot_sdg(df_doc, "Primary SDGs", 'pred1', analysis_level='pages', sdg_type='primary').write_image(
|
315 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
316 |
+
plot_sdg(df_doc, "Secondary SDGs", 'pred2', analysis_level='pages', sdg_type='secondary').write_image(
|
317 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
318 |
|
319 |
# Add plots to the Word document
|
|
|
363 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_sentence.jpeg"
|
364 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_sentence.jpeg"
|
365 |
|
366 |
+
plot_sdg(df_doc, "Primary SDGs", 'pred1', analysis_level='sentences', sdg_type='primary').write_image(
|
367 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
368 |
+
plot_sdg(df_doc, "Secondary SDGs", 'pred2', analysis_level='sentences', sdg_type='secondary').write_image(
|
369 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
370 |
|
371 |
# Add plots to the Word document
|
|
|
526 |
outputs=[start_page, end_page]
|
527 |
)
|
528 |
|
529 |
+
# SDG Analysis Type Section
|
530 |
gr.Markdown("## SDG Analysis Type")
|
531 |
+
|
532 |
+
# Main Tabs for Page-Level and Sentence-Level Analysis
|
533 |
with gr.Tab("π Page-Level Analysis"):
|
534 |
gr.Markdown(
|
535 |
"""
|
|
|
552 |
gr.Markdown(
|
553 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
554 |
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
555 |
+
"SDG will be highlighted above the graph. Download the Page Predictions CSV for further details.",
|
556 |
+
label="Note", container=True
|
557 |
)
|
558 |
+
|
559 |
gr.Markdown("##### Download Results")
|
560 |
with gr.Row():
|
561 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
|
|
568 |
gr.Markdown(
|
569 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
570 |
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
571 |
+
"Primary SDGs. Download the Page Predictions CSV for further details.",
|
572 |
+
label="Note", container=True
|
573 |
)
|
574 |
|
575 |
gr.Markdown("##### Download Results")
|
|
|
600 |
gr.Markdown(
|
601 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
602 |
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
603 |
+
"SDG will be highlighted above the graph. Download the Sentence Predictions CSV for further details.",
|
604 |
+
label="Note", container=True
|
605 |
)
|
606 |
+
|
607 |
gr.Markdown("##### Download Results")
|
608 |
with gr.Row():
|
609 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
|
|
616 |
gr.Markdown(
|
617 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
618 |
"SDGs that are not the primary focus of the sentences analysed. These SDGs are second to the "+
|
619 |
+
"Primary SDGs. Download the Sentence Predictions CSV for further details.",
|
620 |
+
label="Note", container=True
|
621 |
)
|
622 |
+
|
623 |
gr.Markdown("##### Download Results")
|
624 |
with gr.Row():
|
625 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
|
|
668 |
|
669 |
# Generate plots with icon overlay
|
670 |
first_plot = plot_sdg(
|
671 |
+
df_page_predictions,
|
672 |
+
"π Primary SDGs",
|
673 |
+
'pred1',
|
674 |
+
analysis_level='pages', # Specify analysis level
|
675 |
+
sdg_type='primary' # Specify SDG type
|
676 |
)
|
677 |
second_plot = plot_sdg(
|
678 |
+
df_page_predictions,
|
679 |
+
"π Secondary SDGs",
|
680 |
+
'pred2',
|
681 |
+
analysis_level='pages', # Specify analysis level
|
682 |
+
sdg_type='secondary' # Specify SDG type
|
683 |
)
|
684 |
|
685 |
# Define output file names
|
|
|
703 |
save_figure_as_jpeg(second_plot, secondary_page_jpeg)
|
704 |
|
705 |
return (
|
706 |
+
first_plot,
|
707 |
+
second_plot,
|
708 |
+
page_csv_file,
|
709 |
+
page_report_file,
|
710 |
+
primary_page_jpeg,
|
711 |
+
page_csv_file_secondary,
|
712 |
+
page_report_file_secondary,
|
713 |
+
secondary_page_jpeg
|
714 |
)
|
715 |
|
716 |
except Exception as e:
|
|
|
759 |
|
760 |
# Generate plots with icon overlay
|
761 |
first_plot = plot_sdg(
|
762 |
+
df_sentence_predictions,
|
763 |
+
"π Primary SDGs",
|
764 |
+
'pred1',
|
765 |
+
analysis_level='sentences', # Specify analysis level
|
766 |
+
sdg_type='primary' # Specify SDG type
|
767 |
)
|
768 |
second_plot = plot_sdg(
|
769 |
+
df_sentence_predictions,
|
770 |
+
"π Secondary SDGs",
|
771 |
+
'pred2',
|
772 |
+
analysis_level='sentences', # Specify analysis level
|
773 |
+
sdg_type='secondary' # Specify SDG type
|
774 |
)
|
775 |
|
776 |
# Define output file names
|
|
|
794 |
save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
|
795 |
|
796 |
return (
|
797 |
+
first_plot,
|
798 |
+
second_plot,
|
799 |
+
sentence_csv_file,
|
800 |
+
sentence_report_file,
|
801 |
+
primary_sentence_jpeg,
|
802 |
+
sentence_csv_file_secondary,
|
803 |
+
sentence_report_file_secondary,
|
804 |
+
secondary_sentence_jpeg
|
805 |
)
|
806 |
|
807 |
except Exception as e:
|
|
|
810 |
|
811 |
# Reset functions to clear the outputs
|
812 |
def reset_page_outputs():
|
813 |
+
return [None] * 8
|
814 |
|
815 |
def reset_sentence_outputs():
|
816 |
+
return [None] * 8
|
817 |
|
818 |
# Button actions for Page-Level Analysis
|
819 |
page_button.click(
|
|
|
825 |
page_csv, # π Download Page Predictions CSV
|
826 |
page_docx, # π Download Page Report DOCX
|
827 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
828 |
+
page_csv_file_secondary, # π Download Page Predictions CSV
|
829 |
page_report_file_secondary, # π Download Page Report DOCX
|
830 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
831 |
]
|
|
|
839 |
page_csv,
|
840 |
page_docx,
|
841 |
page_jpeg1,
|
842 |
+
page_csv_file_secondary,
|
843 |
page_report_file_secondary,
|
844 |
secondary_page_jpeg
|
845 |
]
|
|
|
855 |
sentence_csv, # π Download Sentence Predictions CSV
|
856 |
sentence_docx, # π Download Sentence Report DOCX
|
857 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
858 |
+
sentence_csv_file_secondary, # π Download Sentence Predictions CSV
|
859 |
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
860 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
861 |
]
|
|
|
869 |
sentence_csv,
|
870 |
sentence_docx,
|
871 |
sentence_jpeg1,
|
872 |
+
sentence_csv_file_secondary,
|
873 |
sentence_report_file_secondary,
|
874 |
secondary_sentence_jpeg
|
875 |
]
|