Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -169,18 +169,17 @@ sdg_colors = {
|
|
169 |
}
|
170 |
|
171 |
# Function to plot SDG dominant bar graphs using Plotly
|
172 |
-
|
|
|
173 |
"""
|
174 |
-
Plots a horizontal bar graph of SDG predictions and superimposes the icon of the most frequent SDG
|
175 |
-
|
176 |
Args:
|
177 |
df (pd.DataFrame): DataFrame containing SDG predictions.
|
178 |
title (str): Title of the plot.
|
179 |
pred_column (str): Column name to use for plotting (e.g., 'pred1').
|
180 |
-
analysis_level (str): Level of analysis ('pages' or 'sentences').
|
181 |
-
sdg_type (str): Type of SDG analysis ('primary' or 'secondary').
|
182 |
icons_folder (str): Path to the folder containing SDG icons.
|
183 |
-
|
184 |
Returns:
|
185 |
plotly.graph_objs._figure.Figure: The Plotly figure object.
|
186 |
"""
|
@@ -210,9 +209,6 @@ def plot_sdg(df, title, pred_column, analysis_level, sdg_type, icons_folder='ass
|
|
210 |
textfont=dict(size=10)
|
211 |
)
|
212 |
|
213 |
-
# Construct dynamic x-axis title
|
214 |
-
xaxis_title = f"Percentage of {analysis_level} aligned with {sdg_type.capitalize()} SDGs"
|
215 |
-
|
216 |
# Adjust layout for better visibility
|
217 |
fig.update_layout(
|
218 |
title=dict(
|
@@ -223,15 +219,14 @@ def plot_sdg(df, title, pred_column, analysis_level, sdg_type, icons_folder='ass
|
|
223 |
title=None,
|
224 |
tickfont=dict(size=12)
|
225 |
),
|
226 |
-
|
227 |
-
title=xaxis_title, # Dynamic x-axis title
|
228 |
-
tickfont=dict(size=12) # Reduce x-axis font size
|
229 |
-
),
|
230 |
-
margin=dict(l=20, r=30, t=100, b=20), # Adjusted margins
|
231 |
height=600,
|
232 |
#width=800,
|
233 |
showlegend=False,
|
234 |
template="simple_white",
|
|
|
|
|
|
|
235 |
)
|
236 |
|
237 |
# Identify the most frequent SDG
|
@@ -255,10 +250,10 @@ def plot_sdg(df, title, pred_column, analysis_level, sdg_type, icons_folder='ass
|
|
255 |
dict(
|
256 |
source='data:image/png;base64,' + encoded_image,
|
257 |
xref="paper", yref="paper",
|
258 |
-
x=0.
|
259 |
-
sizex=0.2, sizey=0.2, #
|
260 |
-
xanchor="
|
261 |
-
yanchor="
|
262 |
layer="above" # Ensure the icon is above other plot elements
|
263 |
)
|
264 |
)
|
@@ -311,9 +306,9 @@ def generate_page_report(df_pages, report_file_name):
|
|
311 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_page.jpeg"
|
312 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_page.jpeg"
|
313 |
|
314 |
-
plot_sdg(df_doc, "Primary SDGs", 'pred1'
|
315 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
316 |
-
plot_sdg(df_doc, "Secondary SDGs", 'pred2'
|
317 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
318 |
|
319 |
# Add plots to the Word document
|
@@ -363,9 +358,9 @@ def generate_sentence_report(df_sentences, report_file_name):
|
|
363 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_sentence.jpeg"
|
364 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_sentence.jpeg"
|
365 |
|
366 |
-
plot_sdg(df_doc, "Primary SDGs", 'pred1'
|
367 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
368 |
-
plot_sdg(df_doc, "Secondary SDGs", 'pred2'
|
369 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
370 |
|
371 |
# Add plots to the Word document
|
@@ -526,10 +521,9 @@ def launch_interface():
|
|
526 |
outputs=[start_page, end_page]
|
527 |
)
|
528 |
|
529 |
-
# SDG Analysis Type Section
|
530 |
-
gr.Markdown("## SDG Analysis Type")
|
531 |
-
|
532 |
# Main Tabs for Page-Level and Sentence-Level Analysis
|
|
|
|
|
533 |
with gr.Tab("π Page-Level Analysis"):
|
534 |
gr.Markdown(
|
535 |
"""
|
@@ -552,10 +546,10 @@ def launch_interface():
|
|
552 |
gr.Markdown(
|
553 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
554 |
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
555 |
-
"SDG will be highlighted above the graph. Download the Page Predictions
|
556 |
-
label="Note", container=True
|
557 |
)
|
558 |
-
|
559 |
gr.Markdown("##### Download Results")
|
560 |
with gr.Row():
|
561 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
@@ -568,8 +562,8 @@ def launch_interface():
|
|
568 |
gr.Markdown(
|
569 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
570 |
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
571 |
-
"Primary SDGs. Download the
|
572 |
-
label="Note", container=True
|
573 |
)
|
574 |
|
575 |
gr.Markdown("##### Download Results")
|
@@ -600,10 +594,10 @@ def launch_interface():
|
|
600 |
gr.Markdown(
|
601 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
602 |
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
603 |
-
"SDG will be highlighted above the graph. Download the Sentence Predictions
|
604 |
-
label="Note", container=True
|
605 |
)
|
606 |
-
|
607 |
gr.Markdown("##### Download Results")
|
608 |
with gr.Row():
|
609 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
@@ -616,10 +610,10 @@ def launch_interface():
|
|
616 |
gr.Markdown(
|
617 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
618 |
"SDGs that are not the primary focus of the sentences analysed. These SDGs are second to the "+
|
619 |
-
"Primary SDGs. Download the Sentence Predictions
|
620 |
-
label="Note", container=True
|
621 |
)
|
622 |
-
|
623 |
gr.Markdown("##### Download Results")
|
624 |
with gr.Row():
|
625 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
@@ -631,7 +625,7 @@ def launch_interface():
|
|
631 |
def process_pages(file, extraction_mode, start_page, end_page):
|
632 |
if not file:
|
633 |
# Return None for each output component
|
634 |
-
return [None
|
635 |
|
636 |
try:
|
637 |
if hasattr(file, 'name'):
|
@@ -668,18 +662,10 @@ def launch_interface():
|
|
668 |
|
669 |
# Generate plots with icon overlay
|
670 |
first_plot = plot_sdg(
|
671 |
-
df_page_predictions,
|
672 |
-
"π Primary SDGs",
|
673 |
-
'pred1',
|
674 |
-
analysis_level='pages', # Specify analysis level
|
675 |
-
sdg_type='primary' # Specify SDG type
|
676 |
)
|
677 |
second_plot = plot_sdg(
|
678 |
-
df_page_predictions,
|
679 |
-
"π Secondary SDGs",
|
680 |
-
'pred2',
|
681 |
-
analysis_level='pages', # Specify analysis level
|
682 |
-
sdg_type='secondary' # Specify SDG type
|
683 |
)
|
684 |
|
685 |
# Define output file names
|
@@ -687,7 +673,7 @@ def launch_interface():
|
|
687 |
page_report_file = f"{sanitized_file_name}_SDG-Page_report.docx"
|
688 |
primary_page_jpeg = f"{sanitized_file_name}_SDG-Page_primary_graph.jpeg"
|
689 |
|
690 |
-
|
691 |
page_report_file_secondary = f"{sanitized_file_name}_SDG-Page_report.docx"
|
692 |
secondary_page_jpeg = f"{sanitized_file_name}_SDG-Page_secondary_graph.jpeg"
|
693 |
|
@@ -703,26 +689,21 @@ def launch_interface():
|
|
703 |
save_figure_as_jpeg(second_plot, secondary_page_jpeg)
|
704 |
|
705 |
return (
|
706 |
-
first_plot,
|
707 |
-
|
708 |
-
|
709 |
-
page_report_file,
|
710 |
-
primary_page_jpeg,
|
711 |
-
page_csv_secondary,
|
712 |
-
page_report_file_secondary,
|
713 |
-
secondary_page_jpeg
|
714 |
)
|
715 |
|
716 |
except Exception as e:
|
717 |
print(f"Error: {e}")
|
718 |
-
return [None
|
719 |
|
720 |
# Function to process sentence-level analysis
|
721 |
@spaces.GPU
|
722 |
def process_sentences(file, extraction_mode, start_page, end_page):
|
723 |
if not file:
|
724 |
# Return None for each output component
|
725 |
-
return [None
|
726 |
|
727 |
try:
|
728 |
if hasattr(file, 'name'):
|
@@ -759,18 +740,10 @@ def launch_interface():
|
|
759 |
|
760 |
# Generate plots with icon overlay
|
761 |
first_plot = plot_sdg(
|
762 |
-
df_sentence_predictions,
|
763 |
-
"π Primary SDGs",
|
764 |
-
'pred1',
|
765 |
-
analysis_level='sentences', # Specify analysis level
|
766 |
-
sdg_type='primary' # Specify SDG type
|
767 |
)
|
768 |
second_plot = plot_sdg(
|
769 |
-
df_sentence_predictions,
|
770 |
-
"π Secondary SDGs",
|
771 |
-
'pred2',
|
772 |
-
analysis_level='sentences', # Specify analysis level
|
773 |
-
sdg_type='secondary' # Specify SDG type
|
774 |
)
|
775 |
|
776 |
# Define output file names
|
@@ -778,7 +751,7 @@ def launch_interface():
|
|
778 |
sentence_report_file = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
779 |
primary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_primary_graph.jpeg"
|
780 |
|
781 |
-
|
782 |
sentence_report_file_secondary = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
783 |
secondary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_secondary_graph.jpeg"
|
784 |
|
@@ -794,26 +767,21 @@ def launch_interface():
|
|
794 |
save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
|
795 |
|
796 |
return (
|
797 |
-
first_plot,
|
798 |
-
|
799 |
-
|
800 |
-
sentence_report_file,
|
801 |
-
primary_sentence_jpeg,
|
802 |
-
sentence_csv_secondary,
|
803 |
-
sentence_report_file_secondary,
|
804 |
-
secondary_sentence_jpeg
|
805 |
)
|
806 |
|
807 |
except Exception as e:
|
808 |
print(f"Error: {e}")
|
809 |
-
return [None
|
810 |
|
811 |
# Reset functions to clear the outputs
|
812 |
def reset_page_outputs():
|
813 |
-
return [None
|
814 |
|
815 |
def reset_sentence_outputs():
|
816 |
-
return [None
|
817 |
|
818 |
# Button actions for Page-Level Analysis
|
819 |
page_button.click(
|
@@ -877,4 +845,4 @@ def launch_interface():
|
|
877 |
|
878 |
demo.queue().launch()
|
879 |
|
880 |
-
launch_interface()
|
|
|
169 |
}
|
170 |
|
171 |
# Function to plot SDG dominant bar graphs using Plotly
|
172 |
+
# Function to plot SDG dominant bar graphs using Plotly
|
173 |
+
def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
174 |
"""
|
175 |
+
Plots a horizontal bar graph of SDG predictions and superimposes the icon of the most frequent SDG.
|
176 |
+
|
177 |
Args:
|
178 |
df (pd.DataFrame): DataFrame containing SDG predictions.
|
179 |
title (str): Title of the plot.
|
180 |
pred_column (str): Column name to use for plotting (e.g., 'pred1').
|
|
|
|
|
181 |
icons_folder (str): Path to the folder containing SDG icons.
|
182 |
+
|
183 |
Returns:
|
184 |
plotly.graph_objs._figure.Figure: The Plotly figure object.
|
185 |
"""
|
|
|
209 |
textfont=dict(size=10)
|
210 |
)
|
211 |
|
|
|
|
|
|
|
212 |
# Adjust layout for better visibility
|
213 |
fig.update_layout(
|
214 |
title=dict(
|
|
|
219 |
title=None,
|
220 |
tickfont=dict(size=12)
|
221 |
),
|
222 |
+
margin=dict(l=20, r=30, t=100, b=20), # Increased right margin for icon
|
|
|
|
|
|
|
|
|
223 |
height=600,
|
224 |
#width=800,
|
225 |
showlegend=False,
|
226 |
template="simple_white",
|
227 |
+
xaxis=dict(
|
228 |
+
tickfont=dict(size=12) # Reduce x-axis font size
|
229 |
+
),
|
230 |
)
|
231 |
|
232 |
# Identify the most frequent SDG
|
|
|
250 |
dict(
|
251 |
source='data:image/png;base64,' + encoded_image,
|
252 |
xref="paper", yref="paper",
|
253 |
+
x=0.4, y=1.2, # Positioning: slightly to the right and top
|
254 |
+
sizex=0.2, sizey=0.2, # Size of the icon
|
255 |
+
xanchor="left",
|
256 |
+
yanchor="top",
|
257 |
layer="above" # Ensure the icon is above other plot elements
|
258 |
)
|
259 |
)
|
|
|
306 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_page.jpeg"
|
307 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_page.jpeg"
|
308 |
|
309 |
+
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
310 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
311 |
+
plot_sdg(df_doc, "Secondary SDGs", 'pred2').write_image(
|
312 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
313 |
|
314 |
# Add plots to the Word document
|
|
|
358 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_sentence.jpeg"
|
359 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_sentence.jpeg"
|
360 |
|
361 |
+
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
362 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
363 |
+
plot_sdg(df_doc, "Secondary SDGs", 'pred2').write_image(
|
364 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
365 |
|
366 |
# Add plots to the Word document
|
|
|
521 |
outputs=[start_page, end_page]
|
522 |
)
|
523 |
|
|
|
|
|
|
|
524 |
# Main Tabs for Page-Level and Sentence-Level Analysis
|
525 |
+
gr.Markdown("## SDG Analysis Type")
|
526 |
+
|
527 |
with gr.Tab("π Page-Level Analysis"):
|
528 |
gr.Markdown(
|
529 |
"""
|
|
|
546 |
gr.Markdown(
|
547 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
548 |
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
549 |
+
"SDG will be highlighted above the graph. Download the Page Predictions CVS for further details.",
|
550 |
+
label = "Note", container=True
|
551 |
)
|
552 |
+
|
553 |
gr.Markdown("##### Download Results")
|
554 |
with gr.Row():
|
555 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
|
|
562 |
gr.Markdown(
|
563 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
564 |
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
565 |
+
"Primary SDGs. Download the Sentence Predictions CVS for further details",
|
566 |
+
label = "Note", container=True
|
567 |
)
|
568 |
|
569 |
gr.Markdown("##### Download Results")
|
|
|
594 |
gr.Markdown(
|
595 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
596 |
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
597 |
+
"SDG will be highlighted above the graph. Download the Sentence Predictions CVS for further details.",
|
598 |
+
label = "Note", container=True
|
599 |
)
|
600 |
+
|
601 |
gr.Markdown("##### Download Results")
|
602 |
with gr.Row():
|
603 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
|
|
610 |
gr.Markdown(
|
611 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
612 |
"SDGs that are not the primary focus of the sentences analysed. These SDGs are second to the "+
|
613 |
+
"Primary SDGs. Download the Sentence Predictions CVS for further details",
|
614 |
+
label = "Note", container=True
|
615 |
)
|
616 |
+
|
617 |
gr.Markdown("##### Download Results")
|
618 |
with gr.Row():
|
619 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
|
|
625 |
def process_pages(file, extraction_mode, start_page, end_page):
|
626 |
if not file:
|
627 |
# Return None for each output component
|
628 |
+
return [None, None, None, None, None, None, None, None]
|
629 |
|
630 |
try:
|
631 |
if hasattr(file, 'name'):
|
|
|
662 |
|
663 |
# Generate plots with icon overlay
|
664 |
first_plot = plot_sdg(
|
665 |
+
df_page_predictions, "π Primary SDGs", 'pred1'
|
|
|
|
|
|
|
|
|
666 |
)
|
667 |
second_plot = plot_sdg(
|
668 |
+
df_page_predictions, "π Secondary SDGs", 'pred2'
|
|
|
|
|
|
|
|
|
669 |
)
|
670 |
|
671 |
# Define output file names
|
|
|
673 |
page_report_file = f"{sanitized_file_name}_SDG-Page_report.docx"
|
674 |
primary_page_jpeg = f"{sanitized_file_name}_SDG-Page_primary_graph.jpeg"
|
675 |
|
676 |
+
page_csv_file_secondary = f"{sanitized_file_name}_SDG-Page_predictions.csv"
|
677 |
page_report_file_secondary = f"{sanitized_file_name}_SDG-Page_report.docx"
|
678 |
secondary_page_jpeg = f"{sanitized_file_name}_SDG-Page_secondary_graph.jpeg"
|
679 |
|
|
|
689 |
save_figure_as_jpeg(second_plot, secondary_page_jpeg)
|
690 |
|
691 |
return (
|
692 |
+
first_plot, second_plot,
|
693 |
+
page_csv_file, page_report_file, primary_page_jpeg,
|
694 |
+
page_csv_file_secondary, page_report_file_secondary, secondary_page_jpeg
|
|
|
|
|
|
|
|
|
|
|
695 |
)
|
696 |
|
697 |
except Exception as e:
|
698 |
print(f"Error: {e}")
|
699 |
+
return [None, None, None, None, None, None, None, None]
|
700 |
|
701 |
# Function to process sentence-level analysis
|
702 |
@spaces.GPU
|
703 |
def process_sentences(file, extraction_mode, start_page, end_page):
|
704 |
if not file:
|
705 |
# Return None for each output component
|
706 |
+
return [None, None, None, None, None, None, None, None]
|
707 |
|
708 |
try:
|
709 |
if hasattr(file, 'name'):
|
|
|
740 |
|
741 |
# Generate plots with icon overlay
|
742 |
first_plot = plot_sdg(
|
743 |
+
df_sentence_predictions, "π Primary SDGs", 'pred1'
|
|
|
|
|
|
|
|
|
744 |
)
|
745 |
second_plot = plot_sdg(
|
746 |
+
df_sentence_predictions, "π Secondary SDGs", 'pred2'
|
|
|
|
|
|
|
|
|
747 |
)
|
748 |
|
749 |
# Define output file names
|
|
|
751 |
sentence_report_file = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
752 |
primary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_primary_graph.jpeg"
|
753 |
|
754 |
+
sentence_csv_file_secondary = f"{sanitized_file_name}_SDG-Sentence_predictions.csv"
|
755 |
sentence_report_file_secondary = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
756 |
secondary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_secondary_graph.jpeg"
|
757 |
|
|
|
767 |
save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
|
768 |
|
769 |
return (
|
770 |
+
first_plot, second_plot,
|
771 |
+
sentence_csv_file, sentence_report_file, primary_sentence_jpeg,
|
772 |
+
sentence_csv_file_secondary, sentence_report_file_secondary, secondary_sentence_jpeg
|
|
|
|
|
|
|
|
|
|
|
773 |
)
|
774 |
|
775 |
except Exception as e:
|
776 |
print(f"Error: {e}")
|
777 |
+
return [None, None, None, None, None, None, None, None]
|
778 |
|
779 |
# Reset functions to clear the outputs
|
780 |
def reset_page_outputs():
|
781 |
+
return [None, None, None, None, None, None, None, None]
|
782 |
|
783 |
def reset_sentence_outputs():
|
784 |
+
return [None, None, None, None, None, None, None, None]
|
785 |
|
786 |
# Button actions for Page-Level Analysis
|
787 |
page_button.click(
|
|
|
845 |
|
846 |
demo.queue().launch()
|
847 |
|
848 |
+
launch_interface()
|