Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -282,14 +282,14 @@ def generate_page_report(df_pages, report_file_name):
|
|
282 |
'(Primary and Secondary) for each page with a probability score greater than zero.'
|
283 |
)
|
284 |
|
285 |
-
doc.add_heading("
|
286 |
doc.add_paragraph(
|
287 |
'This graph displays the most essential SDG the AI model associates with pages. The bars '
|
288 |
'represent the percentage of pages most strongly aligned with each SDG. This offers insight into the dominant '
|
289 |
'sustainable development theme within the document.'
|
290 |
)
|
291 |
|
292 |
-
doc.add_heading("
|
293 |
doc.add_paragraph(
|
294 |
'This graph shows the second most relevant SDGs for pages. Although these SDGs are '
|
295 |
'not the primary focus, the text has some relevance to these goals.'
|
@@ -334,14 +334,14 @@ def generate_sentence_report(df_sentences, report_file_name):
|
|
334 |
'(Primary and Secondary) for each sentence with a probability score greater than zero.'
|
335 |
)
|
336 |
|
337 |
-
doc.add_heading("
|
338 |
doc.add_paragraph(
|
339 |
'This graph displays the most essential SDG the AI model associates with sentences. The bars '
|
340 |
'represent the percentage of sentences most strongly aligned with each SDG. This offers more profound insight '
|
341 |
'into the dominant sustainable development theme within the document.'
|
342 |
)
|
343 |
|
344 |
-
doc.add_heading("
|
345 |
doc.add_paragraph(
|
346 |
'This graph shows the second most relevant SDGs for sentences. Although these SDGs are not '
|
347 |
'the primary focus, the text has some relevance to these goals.'
|
@@ -483,6 +483,7 @@ def launch_interface():
|
|
483 |
)
|
484 |
|
485 |
# Shared PDF file input for both analyses
|
|
|
486 |
with gr.Row():
|
487 |
file_input = gr.File(
|
488 |
label="π Upload PDF File for Analysis", file_types=[".pdf"]
|
@@ -492,7 +493,8 @@ def launch_interface():
|
|
492 |
gr.Markdown(
|
493 |
"""
|
494 |
## PDF Text Extraction Mode
|
495 |
-
Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select
|
|
|
496 |
"""
|
497 |
)
|
498 |
with gr.Row():
|
@@ -503,7 +505,7 @@ def launch_interface():
|
|
503 |
)
|
504 |
|
505 |
with gr.Row():
|
506 |
-
start_page = gr.Number(value=1, label="π’ Start Page", visible=False)
|
507 |
end_page = gr.Number(value=1, label="π’ End Page", visible=False)
|
508 |
|
509 |
# Function to update visibility of start_page and end_page
|
@@ -531,27 +533,44 @@ def launch_interface():
|
|
531 |
It provides **high-level SDG mapping** of documents at the page level.
|
532 |
"""
|
533 |
)
|
|
|
|
|
|
|
|
|
|
|
534 |
# Nested Tabs for Primary and Secondary SDGs
|
535 |
with gr.Tabs():
|
536 |
with gr.TabItem("π Primary SDGs"):
|
537 |
-
|
538 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
with gr.Row():
|
540 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
541 |
page_docx = gr.File(label="π Download Page Report DOCX")
|
542 |
page_jpeg1 = gr.File(label="πΌοΈ Download Primary SDGs JPEG")
|
543 |
|
544 |
with gr.TabItem("π Secondary SDGs"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
secondary_page_plot = gr.Plot(label="π Secondary SDGs [Page-Level]")
|
546 |
|
547 |
with gr.Row():
|
548 |
page_csv_secondary = gr.File(label="π Download Page Predictions CSV")
|
549 |
page_report_file_secondary = gr.File(label="π Download Page Report DOCX")
|
550 |
-
secondary_page_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
551 |
-
|
552 |
-
with gr.Row():
|
553 |
-
page_button = gr.Button("πββοΈ Run Page-Level Analysis")
|
554 |
-
reset_page_button = gr.Button("π Reset Page-Level Analysis", elem_classes="reset-button")
|
555 |
|
556 |
with gr.Tab("βοΈ Sentence-Level Analysis"):
|
557 |
gr.Markdown(
|
@@ -562,11 +581,23 @@ def launch_interface():
|
|
562 |
It provides **detailed SDG mapping** at the sentence level.
|
563 |
"""
|
564 |
)
|
|
|
|
|
|
|
|
|
|
|
565 |
# Nested Tabs for Primary and Secondary SDGs
|
566 |
with gr.Tabs():
|
567 |
with gr.TabItem("π Primary SDGs"):
|
568 |
-
|
569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
with gr.Row():
|
571 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
572 |
sentence_docx = gr.File(label="π Download Sentence Report DOCX")
|
@@ -579,11 +610,7 @@ def launch_interface():
|
|
579 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
580 |
sentence_report_file_secondary = gr.File(label="π Download Sentence Report DOCX")
|
581 |
secondary_sentence_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
582 |
-
|
583 |
-
with gr.Row():
|
584 |
-
sentence_button = gr.Button("πββοΈ Run Sentence-Level Analysis")
|
585 |
-
reset_sentence_button = gr.Button("π Reset Sentence-Level Analysis", elem_classes="reset-button")
|
586 |
-
|
587 |
# Function to process page-level analysis
|
588 |
@spaces.GPU
|
589 |
def process_pages(file, extraction_mode, start_page, end_page):
|
@@ -633,13 +660,13 @@ def launch_interface():
|
|
633 |
)
|
634 |
|
635 |
# Define output file names
|
636 |
-
page_csv_file = f"{sanitized_file_name}
|
637 |
-
page_report_file = f"{sanitized_file_name}
|
638 |
-
primary_page_jpeg = f"{sanitized_file_name}
|
639 |
|
640 |
-
page_csv_file_secondary = f"{sanitized_file_name}
|
641 |
-
page_report_file_secondary = f"{sanitized_file_name}
|
642 |
-
secondary_page_jpeg = f"{sanitized_file_name}
|
643 |
|
644 |
# Save CSV and reports
|
645 |
df_page_predictions.to_csv(page_csv_file, index=False)
|
@@ -711,13 +738,13 @@ def launch_interface():
|
|
711 |
)
|
712 |
|
713 |
# Define output file names
|
714 |
-
sentence_csv_file = f"{sanitized_file_name}
|
715 |
-
sentence_report_file = f"{sanitized_file_name}
|
716 |
-
primary_sentence_jpeg = f"{sanitized_file_name}
|
717 |
|
718 |
-
sentence_csv_file_secondary = f"{sanitized_file_name}
|
719 |
-
sentence_report_file_secondary = f"{sanitized_file_name}
|
720 |
-
secondary_sentence_jpeg = f"{sanitized_file_name}
|
721 |
|
722 |
# Save CSV and reports
|
723 |
df_sentence_predictions.to_csv(sentence_csv_file, index=False)
|
@@ -757,8 +784,8 @@ def launch_interface():
|
|
757 |
page_csv, # π Download Page Predictions CSV
|
758 |
page_docx, # π Download Page Report DOCX
|
759 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
760 |
-
page_csv_secondary, # π Download Page Predictions CSV
|
761 |
-
page_report_file_secondary, # π Download Page Report DOCX
|
762 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
763 |
]
|
764 |
)
|
@@ -787,8 +814,8 @@ def launch_interface():
|
|
787 |
sentence_csv, # π Download Sentence Predictions CSV
|
788 |
sentence_docx, # π Download Sentence Report DOCX
|
789 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
790 |
-
sentence_csv_secondary, # π Download Sentence Predictions CSV
|
791 |
-
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
792 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
793 |
]
|
794 |
)
|
|
|
282 |
'(Primary and Secondary) for each page with a probability score greater than zero.'
|
283 |
)
|
284 |
|
285 |
+
doc.add_heading("Primary SDGs Bar Graph", level=3)
|
286 |
doc.add_paragraph(
|
287 |
'This graph displays the most essential SDG the AI model associates with pages. The bars '
|
288 |
'represent the percentage of pages most strongly aligned with each SDG. This offers insight into the dominant '
|
289 |
'sustainable development theme within the document.'
|
290 |
)
|
291 |
|
292 |
+
doc.add_heading("Secondary SDGs Bar Graph", level=3)
|
293 |
doc.add_paragraph(
|
294 |
'This graph shows the second most relevant SDGs for pages. Although these SDGs are '
|
295 |
'not the primary focus, the text has some relevance to these goals.'
|
|
|
334 |
'(Primary and Secondary) for each sentence with a probability score greater than zero.'
|
335 |
)
|
336 |
|
337 |
+
doc.add_heading("Primary SDGs Bar Graph", level=3)
|
338 |
doc.add_paragraph(
|
339 |
'This graph displays the most essential SDG the AI model associates with sentences. The bars '
|
340 |
'represent the percentage of sentences most strongly aligned with each SDG. This offers more profound insight '
|
341 |
'into the dominant sustainable development theme within the document.'
|
342 |
)
|
343 |
|
344 |
+
doc.add_heading("Secondary SDGs Bar Graph", level=3)
|
345 |
doc.add_paragraph(
|
346 |
'This graph shows the second most relevant SDGs for sentences. Although these SDGs are not '
|
347 |
'the primary focus, the text has some relevance to these goals.'
|
|
|
483 |
)
|
484 |
|
485 |
# Shared PDF file input for both analyses
|
486 |
+
gr.Markdown("## Upload PDF File")
|
487 |
with gr.Row():
|
488 |
file_input = gr.File(
|
489 |
label="π Upload PDF File for Analysis", file_types=[".pdf"]
|
|
|
493 |
gr.Markdown(
|
494 |
"""
|
495 |
## PDF Text Extraction Mode
|
496 |
+
Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select
|
497 |
+
"Range of Pages" and specify the start and end pages.
|
498 |
"""
|
499 |
)
|
500 |
with gr.Row():
|
|
|
505 |
)
|
506 |
|
507 |
with gr.Row():
|
508 |
+
start_page = gr.Number(value=1, label="π’ Start Page", visible=False, info="The cover page is page 1")
|
509 |
end_page = gr.Number(value=1, label="π’ End Page", visible=False)
|
510 |
|
511 |
# Function to update visibility of start_page and end_page
|
|
|
533 |
It provides **high-level SDG mapping** of documents at the page level.
|
534 |
"""
|
535 |
)
|
536 |
+
|
537 |
+
with gr.Row():
|
538 |
+
page_button = gr.Button("πββοΈ Run Page-Level Analysis")
|
539 |
+
reset_page_button = gr.Button("π Reset Page-Level Analysis", elem_classes="reset-button")
|
540 |
+
|
541 |
# Nested Tabs for Primary and Secondary SDGs
|
542 |
with gr.Tabs():
|
543 |
with gr.TabItem("π Primary SDGs"):
|
544 |
+
with gr.Row():
|
545 |
+
primary_page_plot = gr.Plot(label="π Primary SDGs [Page-Level]", scale=2)
|
546 |
+
gr.Textbox(
|
547 |
+
"When the analysis is completed, the Primary SDGs bar graph on the left will show "+
|
548 |
+
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
549 |
+
"SDG is highlighted above the graph. Download the Page Predictions CVS for further details.",
|
550 |
+
interactive=False, scale=1
|
551 |
+
)
|
552 |
+
gr.Markdown("##### Download results")
|
553 |
with gr.Row():
|
554 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
555 |
page_docx = gr.File(label="π Download Page Report DOCX")
|
556 |
page_jpeg1 = gr.File(label="πΌοΈ Download Primary SDGs JPEG")
|
557 |
|
558 |
with gr.TabItem("π Secondary SDGs"):
|
559 |
+
with gr.Row():
|
560 |
+
primary_page_plot = gr.Plot(label="π Primary SDGs [Page-Level]", scale=2)
|
561 |
+
gr.Textbox(
|
562 |
+
"When the analysis is completed, the Secondary SDGs bar graph on the left will show "+
|
563 |
+
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
564 |
+
"Primary SDGs.",
|
565 |
+
interactive=False, scale=1
|
566 |
+
)
|
567 |
+
gr.Markdown("##### Download results")
|
568 |
secondary_page_plot = gr.Plot(label="π Secondary SDGs [Page-Level]")
|
569 |
|
570 |
with gr.Row():
|
571 |
page_csv_secondary = gr.File(label="π Download Page Predictions CSV")
|
572 |
page_report_file_secondary = gr.File(label="π Download Page Report DOCX")
|
573 |
+
secondary_page_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
|
|
|
|
|
|
|
|
574 |
|
575 |
with gr.Tab("βοΈ Sentence-Level Analysis"):
|
576 |
gr.Markdown(
|
|
|
581 |
It provides **detailed SDG mapping** at the sentence level.
|
582 |
"""
|
583 |
)
|
584 |
+
|
585 |
+
with gr.Row():
|
586 |
+
sentence_button = gr.Button("πββοΈ Run Sentence-Level Analysis")
|
587 |
+
reset_sentence_button = gr.Button("π Reset Sentence-Level Analysis", elem_classes="reset-button")
|
588 |
+
|
589 |
# Nested Tabs for Primary and Secondary SDGs
|
590 |
with gr.Tabs():
|
591 |
with gr.TabItem("π Primary SDGs"):
|
592 |
+
with gr.Row():
|
593 |
+
primary_sentence_plot = gr.Plot(label="π Primary SDGs [Sentence-Level]", scale=2)
|
594 |
+
gr.Textbox(
|
595 |
+
"When the analysis is completed, the Primary SDGs bar graph on the left will show "+
|
596 |
+
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
597 |
+
"SDG is highlighted above the graph. Download the Sentence Predictions CVS for further details.",
|
598 |
+
interactive=False, scale=1
|
599 |
+
)
|
600 |
+
gr.Markdown("##### Download results")
|
601 |
with gr.Row():
|
602 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
603 |
sentence_docx = gr.File(label="π Download Sentence Report DOCX")
|
|
|
610 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
611 |
sentence_report_file_secondary = gr.File(label="π Download Sentence Report DOCX")
|
612 |
secondary_sentence_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
613 |
+
|
|
|
|
|
|
|
|
|
614 |
# Function to process page-level analysis
|
615 |
@spaces.GPU
|
616 |
def process_pages(file, extraction_mode, start_page, end_page):
|
|
|
660 |
)
|
661 |
|
662 |
# Define output file names
|
663 |
+
page_csv_file = f"{sanitized_file_name}_SDG-Page_predictions.csv"
|
664 |
+
page_report_file = f"{sanitized_file_name}_SDG-Page_report.docx"
|
665 |
+
primary_page_jpeg = f"{sanitized_file_name}_SDG-Page_primary_graph.jpeg"
|
666 |
|
667 |
+
page_csv_file_secondary = f"{sanitized_file_name}_SDG-Page_predictions.csv"
|
668 |
+
page_report_file_secondary = f"{sanitized_file_name}_SDG-Page_report.docx"
|
669 |
+
secondary_page_jpeg = f"{sanitized_file_name}_SDG-Page_secondary_graph.jpeg"
|
670 |
|
671 |
# Save CSV and reports
|
672 |
df_page_predictions.to_csv(page_csv_file, index=False)
|
|
|
738 |
)
|
739 |
|
740 |
# Define output file names
|
741 |
+
sentence_csv_file = f"{sanitized_file_name}_SDG-Sentence_predictions.csv"
|
742 |
+
sentence_report_file = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
743 |
+
primary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_primary_graph.jpeg"
|
744 |
|
745 |
+
sentence_csv_file_secondary = f"{sanitized_file_name}_SDG-Sentence_predictions.csv"
|
746 |
+
sentence_report_file_secondary = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
747 |
+
secondary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_secondary_graph.jpeg"
|
748 |
|
749 |
# Save CSV and reports
|
750 |
df_sentence_predictions.to_csv(sentence_csv_file, index=False)
|
|
|
784 |
page_csv, # π Download Page Predictions CSV
|
785 |
page_docx, # π Download Page Report DOCX
|
786 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
787 |
+
page_csv_secondary, # π Download Page Predictions CSV
|
788 |
+
page_report_file_secondary, # π Download Page Report DOCX
|
789 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
790 |
]
|
791 |
)
|
|
|
814 |
sentence_csv, # π Download Sentence Predictions CSV
|
815 |
sentence_docx, # π Download Sentence Report DOCX
|
816 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
817 |
+
sentence_csv_secondary, # π Download Sentence Predictions CSV
|
818 |
+
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
819 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
820 |
]
|
821 |
)
|