sadickam commited on
Commit
8dbd1eb
Β·
verified Β·
1 Parent(s): d7675d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -62
app.py CHANGED
@@ -168,6 +168,7 @@ sdg_colors = {
168
  "SDG16_Peace, Justice and Strong Institutions": "#00689D"
169
  }
170
 
 
171
  # Function to plot SDG dominant bar graphs using Plotly
172
  def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
173
  """
@@ -218,9 +219,9 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
218
  title=None,
219
  tickfont=dict(size=12)
220
  ),
221
- margin=dict(l=20, r=150, t=30, b=20), # Increased right margin for icon
222
  height=600,
223
- width=600, # Increased width to accommodate the icon
224
  showlegend=False,
225
  template="simple_white",
226
  xaxis=dict(
@@ -249,7 +250,7 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
249
  dict(
250
  source='data:image/png;base64,' + encoded_image,
251
  xref="paper", yref="paper",
252
- x=0.5, y=1.05, # Positioning: slightly to the right and top
253
  sizex=0.2, sizey=0.2, # Size of the icon
254
  xanchor="left",
255
  yanchor="top",
@@ -263,7 +264,7 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
263
 
264
  def save_figure_as_jpeg(fig, filename):
265
  """Saves the Plotly figure as a high-resolution JPEG."""
266
- pio.write_image(fig, filename, format='jpeg', width=1000, height=600, scale=5)
267
 
268
  # Generate reports (page and sentence levels)
269
  def generate_page_report(df_pages, report_file_name):
@@ -471,12 +472,12 @@ def df_to_csv_bytes(df):
471
  raise RuntimeError(f"Error during CSV conversion: {e}")
472
 
473
  def launch_interface():
474
- with gr.Blocks(title="SDG Document Analysis App", css=custom_css) as demo:
475
 
476
  # Title as a visible heading at the top of the page with an icon
477
  gr.Markdown(
478
  """
479
- # 🌍 SDG Document Analysis App
480
  Analyze documents to map Sustainable Development Goals (SDGs) at both page and sentence levels.
481
  """
482
  )
@@ -490,7 +491,7 @@ def launch_interface():
490
  # Extraction mode selection with explanatory text
491
  gr.Markdown(
492
  """
493
- ### πŸ“‹ PDF Text Extraction Mode
494
  Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select "Range of Pages" and specify the start and end pages.
495
  """
496
  )
@@ -518,61 +519,77 @@ def launch_interface():
518
  outputs=[start_page, end_page]
519
  )
520
 
521
- # Tabs for page-level and sentence-level analysis
 
 
522
  with gr.Tab("πŸ“„ Page-Level Analysis"):
523
  gr.Markdown(
524
  """
525
- ### πŸ“„ Page-Level SDG Analysis
526
  This section conducts Sustainable Development Goals (SDG) mapping
527
  of documents using the [sdgBERT model](https://huggingface.co/sadickam/sdgBERT).
528
  It provides **high-level SDG mapping** of documents at the page level.
529
  """
530
  )
531
- with gr.Row():
532
- with gr.Column():
 
533
  primary_page_plot = gr.Plot(label="πŸ“Š Primary SDGs [Page-Level]")
534
- with gr.Column():
 
 
 
 
 
 
535
  secondary_page_plot = gr.Plot(label="πŸ“ˆ Secondary SDGs [Page-Level]")
536
 
537
- with gr.Row():
538
- page_csv = gr.File(label="πŸ“Š Download Page Predictions CSV")
539
- page_docx = gr.File(label="πŸ“„ Download Page Report DOCX")
540
- page_jpeg1 = gr.File(label="πŸ–ΌοΈ Download Primary SDGs JPEG")
541
- page_jpeg2 = gr.File(label="πŸ–ΌοΈ Download Secondary SDGs JPEG")
542
 
543
- page_button = gr.Button("πŸƒβ€β™‚οΈ Run Page-Level Analysis")
544
- reset_page_button = gr.Button("πŸ”„ Reset Page-Level Analysis")
 
545
 
546
  with gr.Tab("✍️ Sentence-Level Analysis"):
547
  gr.Markdown(
548
  """
549
- ### ✍️ Sentence-Level SDG Analysis
550
  This section conducts Sustainable Development Goals (SDG) mapping
551
  using the [sdgBERT model](https://huggingface.co/sadickam/sdgBERT).
552
- It provides **detailed SDG mapping** at the sentence level. **Note that
553
- all sentences under 70 characters are excluded from the analysis.**
554
  """
555
  )
556
- with gr.Row():
557
- with gr.Column():
 
558
  primary_sentence_plot = gr.Plot(label="πŸ“Š Primary SDGs [Sentence-Level]")
559
- with gr.Column():
 
 
 
 
 
 
560
  secondary_sentence_plot = gr.Plot(label="πŸ“ˆ Secondary SDGs [Sentence-Level]")
561
 
562
- with gr.Row():
563
- sentence_csv = gr.File(label="πŸ“Š Download Sentence Predictions CSV")
564
- sentence_docx = gr.File(label="πŸ“„ Download Sentence Report DOCX")
565
- sentence_jpeg1 = gr.File(label="πŸ–ΌοΈ Download Primary SDGs JPEG")
566
- sentence_jpeg2 = gr.File(label="πŸ–ΌοΈ Download Secondary SDGs JPEG")
567
 
568
- sentence_button = gr.Button("πŸƒβ€β™‚οΈ Run Sentence-Level Analysis")
569
- reset_sentence_button = gr.Button("πŸ”„ Reset Sentence-Level Analysis")
 
570
 
571
  # Function to process page-level analysis
572
  @spaces.GPU
573
  def process_pages(file, extraction_mode, start_page, end_page):
574
  if not file:
575
- return None, None, None, None, None, None
 
576
 
577
  try:
578
  if hasattr(file, 'name'):
@@ -607,6 +624,7 @@ def launch_interface():
607
  # Predict SDGs at page level
608
  df_page_predictions = predict_pages(page_df)
609
 
 
610
  first_plot = plot_sdg(
611
  df_page_predictions, "πŸ“Š Primary SDGs", 'pred1'
612
  )
@@ -615,31 +633,41 @@ def launch_interface():
615
  )
616
 
617
  # Define output file names
618
- page_csv_file = f"{sanitized_file_name}_page_predictions.csv"
619
- page_report_file = f"{sanitized_file_name}_page_report.docx"
620
  primary_page_jpeg = f"{sanitized_file_name}_primary_page.jpeg"
 
 
 
621
  secondary_page_jpeg = f"{sanitized_file_name}_secondary_page.jpeg"
622
 
 
623
  df_page_predictions.to_csv(page_csv_file, index=False)
624
- page_report = generate_page_report(df_page_predictions, page_report_file)
 
 
 
625
 
626
  # Save figures as JPEG
627
  save_figure_as_jpeg(first_plot, primary_page_jpeg)
628
  save_figure_as_jpeg(second_plot, secondary_page_jpeg)
629
 
630
  return (
631
- first_plot, second_plot, page_csv_file, page_report_file,
632
- primary_page_jpeg, secondary_page_jpeg)
 
 
633
 
634
  except Exception as e:
635
  print(f"Error: {e}")
636
- return None, None, None, None, None, None
637
 
638
  # Function to process sentence-level analysis
639
  @spaces.GPU
640
  def process_sentences(file, extraction_mode, start_page, end_page):
641
  if not file:
642
- return None, None, None, None, None, None
 
643
 
644
  try:
645
  if hasattr(file, 'name'):
@@ -674,6 +702,7 @@ def launch_interface():
674
  # Predict SDGs at sentence level
675
  df_sentence_predictions = predict_sentences(sentence_df)
676
 
 
677
  first_plot = plot_sdg(
678
  df_sentence_predictions, "πŸ“Š Primary SDGs", 'pred1'
679
  )
@@ -682,59 +711,100 @@ def launch_interface():
682
  )
683
 
684
  # Define output file names
685
- sentence_csv_file = f"{sanitized_file_name}_sentence_predictions.csv"
686
- sentence_report_file = f"{sanitized_file_name}_sentence_report.docx"
687
  primary_sentence_jpeg = f"{sanitized_file_name}_primary_sentence.jpeg"
 
 
 
688
  secondary_sentence_jpeg = f"{sanitized_file_name}_secondary_sentence.jpeg"
689
 
 
690
  df_sentence_predictions.to_csv(sentence_csv_file, index=False)
691
- sentence_report = generate_sentence_report(df_sentence_predictions, sentence_report_file)
 
 
 
692
 
693
  # Save figures as JPEG
694
  save_figure_as_jpeg(first_plot, primary_sentence_jpeg)
695
  save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
696
 
697
  return (
698
- first_plot, second_plot, sentence_csv_file, sentence_report_file,
699
- primary_sentence_jpeg, secondary_sentence_jpeg)
 
 
700
 
701
  except Exception as e:
702
  print(f"Error: {e}")
703
- return None, None, None, None, None, None
704
 
705
  # Reset functions to clear the outputs
706
  def reset_page_outputs():
707
- return None, None, None, None, None, None
708
 
709
  def reset_sentence_outputs():
710
- return None, None, None, None, None, None
711
 
712
- # Button actions for each tab
713
  page_button.click(
714
  process_pages,
715
  inputs=[file_input, extraction_mode, start_page, end_page],
716
- outputs=[primary_page_plot, secondary_page_plot, page_csv, page_docx,
717
- page_jpeg1, page_jpeg2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  )
719
 
 
720
  sentence_button.click(
721
  process_sentences,
722
  inputs=[file_input, extraction_mode, start_page, end_page],
723
- outputs=[primary_sentence_plot, secondary_sentence_plot, sentence_csv, sentence_docx,
724
- sentence_jpeg1, sentence_jpeg2]
725
- )
726
-
727
- # Reset button actions to clear outputs
728
- reset_page_button.click(
729
- reset_page_outputs,
730
- outputs=[primary_page_plot, secondary_page_plot, page_csv, page_docx,
731
- page_jpeg1, page_jpeg2]
 
732
  )
733
 
734
  reset_sentence_button.click(
735
  reset_sentence_outputs,
736
- outputs=[primary_sentence_plot, secondary_sentence_plot, sentence_csv, sentence_docx,
737
- sentence_jpeg1, sentence_jpeg2]
 
 
 
 
 
 
 
 
738
  )
739
 
740
  demo.queue().launch()
 
168
  "SDG16_Peace, Justice and Strong Institutions": "#00689D"
169
  }
170
 
171
+ # Function to plot SDG dominant bar graphs using Plotly
172
  # Function to plot SDG dominant bar graphs using Plotly
173
  def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
174
  """
 
219
  title=None,
220
  tickfont=dict(size=12)
221
  ),
222
+ margin=dict(l=20, r=30, t=100, b=20), # Increased right margin for icon
223
  height=600,
224
+ width=800,
225
  showlegend=False,
226
  template="simple_white",
227
  xaxis=dict(
 
250
  dict(
251
  source='data:image/png;base64,' + encoded_image,
252
  xref="paper", yref="paper",
253
+ x=0.4, y=1.2, # Positioning: slightly to the right and top
254
  sizex=0.2, sizey=0.2, # Size of the icon
255
  xanchor="left",
256
  yanchor="top",
 
264
 
265
  def save_figure_as_jpeg(fig, filename):
266
  """Saves the Plotly figure as a high-resolution JPEG."""
267
+ pio.write_image(fig, filename, format='jpeg', width=900, height=600, scale=6)
268
 
269
  # Generate reports (page and sentence levels)
270
  def generate_page_report(df_pages, report_file_name):
 
472
  raise RuntimeError(f"Error during CSV conversion: {e}")
473
 
474
  def launch_interface():
475
+ with gr.Blocks(css=custom_css) as demo:
476
 
477
  # Title as a visible heading at the top of the page with an icon
478
  gr.Markdown(
479
  """
480
+ # 🌍 SDG Document Analysis App - CPU
481
  Analyze documents to map Sustainable Development Goals (SDGs) at both page and sentence levels.
482
  """
483
  )
 
491
  # Extraction mode selection with explanatory text
492
  gr.Markdown(
493
  """
494
+ ## PDF Text Extraction Mode
495
  Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select "Range of Pages" and specify the start and end pages.
496
  """
497
  )
 
519
  outputs=[start_page, end_page]
520
  )
521
 
522
+ # Main Tabs for Page-Level and Sentence-Level Analysis
523
+ gr.Markdown("## SDG Analysis Type")
524
+
525
  with gr.Tab("πŸ“„ Page-Level Analysis"):
526
  gr.Markdown(
527
  """
528
+ ### Page-Level SDG Analysis
529
  This section conducts Sustainable Development Goals (SDG) mapping
530
  of documents using the [sdgBERT model](https://huggingface.co/sadickam/sdgBERT).
531
  It provides **high-level SDG mapping** of documents at the page level.
532
  """
533
  )
534
+ # Nested Tabs for Primary and Secondary SDGs
535
+ with gr.Tabs():
536
+ with gr.TabItem("πŸ“Š Primary SDGs"):
537
  primary_page_plot = gr.Plot(label="πŸ“Š Primary SDGs [Page-Level]")
538
+
539
+ with gr.Row():
540
+ page_csv = gr.File(label="πŸ“Š Download Page Predictions CSV")
541
+ page_docx = gr.File(label="πŸ“„ Download Page Report DOCX")
542
+ page_jpeg1 = gr.File(label="πŸ–ΌοΈ Download Primary SDGs JPEG")
543
+
544
+ with gr.TabItem("πŸ“ˆ Secondary SDGs"):
545
  secondary_page_plot = gr.Plot(label="πŸ“ˆ Secondary SDGs [Page-Level]")
546
 
547
+ with gr.Row():
548
+ page_csv_secondary = gr.File(label="πŸ“Š Download Page Predictions CSV")
549
+ page_report_file_secondary = gr.File(label="πŸ“„ Download Page Report DOCX")
550
+ secondary_page_jpeg = gr.File(label="πŸ–ΌοΈ Download Secondary SDGs JPEG")
 
551
 
552
+ with gr.Row():
553
+ page_button = gr.Button("πŸƒβ€β™‚οΈ Run Page-Level Analysis")
554
+ reset_page_button = gr.Button("πŸ”„ Reset Page-Level Analysis", elem_classes="reset-button")
555
 
556
  with gr.Tab("✍️ Sentence-Level Analysis"):
557
  gr.Markdown(
558
  """
559
+ ### Sentence-Level SDG Analysis
560
  This section conducts Sustainable Development Goals (SDG) mapping
561
  using the [sdgBERT model](https://huggingface.co/sadickam/sdgBERT).
562
+ It provides **detailed SDG mapping** at the sentence level.
 
563
  """
564
  )
565
+ # Nested Tabs for Primary and Secondary SDGs
566
+ with gr.Tabs():
567
+ with gr.TabItem("πŸ“Š Primary SDGs"):
568
  primary_sentence_plot = gr.Plot(label="πŸ“Š Primary SDGs [Sentence-Level]")
569
+
570
+ with gr.Row():
571
+ sentence_csv = gr.File(label="πŸ“Š Download Sentence Predictions CSV")
572
+ sentence_docx = gr.File(label="πŸ“„ Download Sentence Report DOCX")
573
+ sentence_jpeg1 = gr.File(label="πŸ–ΌοΈ Download Primary SDGs JPEG")
574
+
575
+ with gr.TabItem("πŸ“ˆ Secondary SDGs"):
576
  secondary_sentence_plot = gr.Plot(label="πŸ“ˆ Secondary SDGs [Sentence-Level]")
577
 
578
+ with gr.Row():
579
+ sentence_csv_secondary = gr.File(label="πŸ“Š Download Sentence Predictions CSV")
580
+ sentence_report_file_secondary = gr.File(label="πŸ“„ Download Sentence Report DOCX")
581
+ secondary_sentence_jpeg = gr.File(label="πŸ–ΌοΈ Download Secondary SDGs JPEG")
 
582
 
583
+ with gr.Row():
584
+ sentence_button = gr.Button("πŸƒβ€β™‚οΈ Run Sentence-Level Analysis")
585
+ reset_sentence_button = gr.Button("πŸ”„ Reset Sentence-Level Analysis", elem_classes="reset-button")
586
 
587
  # Function to process page-level analysis
588
  @spaces.GPU
589
  def process_pages(file, extraction_mode, start_page, end_page):
590
  if not file:
591
+ # Return None for each output component
592
+ return [None, None, None, None, None, None, None, None]
593
 
594
  try:
595
  if hasattr(file, 'name'):
 
624
  # Predict SDGs at page level
625
  df_page_predictions = predict_pages(page_df)
626
 
627
+ # Generate plots with icon overlay
628
  first_plot = plot_sdg(
629
  df_page_predictions, "πŸ“Š Primary SDGs", 'pred1'
630
  )
 
633
  )
634
 
635
  # Define output file names
636
+ page_csv_file = f"{sanitized_file_name}_page_predictions_primary.csv"
637
+ page_report_file = f"{sanitized_file_name}_page_report_primary.docx"
638
  primary_page_jpeg = f"{sanitized_file_name}_primary_page.jpeg"
639
+
640
+ page_csv_file_secondary = f"{sanitized_file_name}_page_predictions_secondary.csv"
641
+ page_report_file_secondary = f"{sanitized_file_name}_page_report_secondary.docx"
642
  secondary_page_jpeg = f"{sanitized_file_name}_secondary_page.jpeg"
643
 
644
+ # Save CSV and reports
645
  df_page_predictions.to_csv(page_csv_file, index=False)
646
+ page_report_primary = generate_page_report(df_page_predictions, page_report_file)
647
+
648
+ df_page_predictions.to_csv(page_csv_file_secondary, index=False)
649
+ page_report_secondary = generate_page_report(df_page_predictions, page_report_file_secondary)
650
 
651
  # Save figures as JPEG
652
  save_figure_as_jpeg(first_plot, primary_page_jpeg)
653
  save_figure_as_jpeg(second_plot, secondary_page_jpeg)
654
 
655
  return (
656
+ first_plot, second_plot,
657
+ page_csv_file, page_report_file, primary_page_jpeg,
658
+ page_csv_file_secondary, page_report_file_secondary, secondary_page_jpeg
659
+ )
660
 
661
  except Exception as e:
662
  print(f"Error: {e}")
663
+ return [None, None, None, None, None, None, None, None]
664
 
665
  # Function to process sentence-level analysis
666
  @spaces.GPU
667
  def process_sentences(file, extraction_mode, start_page, end_page):
668
  if not file:
669
+ # Return None for each output component
670
+ return [None, None, None, None, None, None, None, None]
671
 
672
  try:
673
  if hasattr(file, 'name'):
 
702
  # Predict SDGs at sentence level
703
  df_sentence_predictions = predict_sentences(sentence_df)
704
 
705
+ # Generate plots with icon overlay
706
  first_plot = plot_sdg(
707
  df_sentence_predictions, "πŸ“Š Primary SDGs", 'pred1'
708
  )
 
711
  )
712
 
713
  # Define output file names
714
+ sentence_csv_file = f"{sanitized_file_name}_sentence_predictions_primary.csv"
715
+ sentence_report_file = f"{sanitized_file_name}_sentence_report_primary.docx"
716
  primary_sentence_jpeg = f"{sanitized_file_name}_primary_sentence.jpeg"
717
+
718
+ sentence_csv_file_secondary = f"{sanitized_file_name}_sentence_predictions_secondary.csv"
719
+ sentence_report_file_secondary = f"{sanitized_file_name}_sentence_report_secondary.docx"
720
  secondary_sentence_jpeg = f"{sanitized_file_name}_secondary_sentence.jpeg"
721
 
722
+ # Save CSV and reports
723
  df_sentence_predictions.to_csv(sentence_csv_file, index=False)
724
+ sentence_report_primary = generate_sentence_report(df_sentence_predictions, sentence_report_file)
725
+
726
+ df_sentence_predictions.to_csv(sentence_csv_file_secondary, index=False)
727
+ sentence_report_secondary = generate_sentence_report(df_sentence_predictions, sentence_report_file_secondary)
728
 
729
  # Save figures as JPEG
730
  save_figure_as_jpeg(first_plot, primary_sentence_jpeg)
731
  save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
732
 
733
  return (
734
+ first_plot, second_plot,
735
+ sentence_csv_file, sentence_report_file, primary_sentence_jpeg,
736
+ sentence_csv_file_secondary, sentence_report_file_secondary, secondary_sentence_jpeg
737
+ )
738
 
739
  except Exception as e:
740
  print(f"Error: {e}")
741
+ return [None, None, None, None, None, None, None, None]
742
 
743
  # Reset functions to clear the outputs
744
  def reset_page_outputs():
745
+ return [None, None, None, None, None, None, None, None]
746
 
747
  def reset_sentence_outputs():
748
+ return [None, None, None, None, None, None, None, None]
749
 
750
+ # Button actions for Page-Level Analysis
751
  page_button.click(
752
  process_pages,
753
  inputs=[file_input, extraction_mode, start_page, end_page],
754
+ outputs=[
755
+ primary_page_plot, # πŸ“Š Primary SDGs [Page-Level]
756
+ secondary_page_plot, # πŸ“ˆ Secondary SDGs [Page-Level]
757
+ page_csv, # πŸ“Š Download Page Predictions CSV
758
+ page_docx, # πŸ“„ Download Page Report DOCX
759
+ page_jpeg1, # πŸ–ΌοΈ Download Primary SDGs JPEG
760
+ page_csv_secondary, # πŸ“Š Download Page Predictions CSV (Secondary)
761
+ page_report_file_secondary, # πŸ“„ Download Page Report DOCX (Secondary)
762
+ secondary_page_jpeg # πŸ–ΌοΈ Download Secondary SDGs JPEG
763
+ ]
764
+ )
765
+
766
+ reset_page_button.click(
767
+ reset_page_outputs,
768
+ outputs=[
769
+ primary_page_plot,
770
+ secondary_page_plot,
771
+ page_csv,
772
+ page_docx,
773
+ page_jpeg1,
774
+ page_csv_secondary,
775
+ page_report_file_secondary,
776
+ secondary_page_jpeg
777
+ ]
778
  )
779
 
780
+ # Button actions for Sentence-Level Analysis
781
  sentence_button.click(
782
  process_sentences,
783
  inputs=[file_input, extraction_mode, start_page, end_page],
784
+ outputs=[
785
+ primary_sentence_plot, # πŸ“Š Primary SDGs [Sentence-Level]
786
+ secondary_sentence_plot, # πŸ“ˆ Secondary SDGs [Sentence-Level]
787
+ sentence_csv, # πŸ“Š Download Sentence Predictions CSV
788
+ sentence_docx, # πŸ“„ Download Sentence Report DOCX
789
+ sentence_jpeg1, # πŸ–ΌοΈ Download Primary SDGs JPEG
790
+ sentence_csv_secondary, # πŸ“Š Download Sentence Predictions CSV (Secondary)
791
+ sentence_report_file_secondary, # πŸ“„ Download Sentence Report DOCX (Secondary)
792
+ secondary_sentence_jpeg # πŸ–ΌοΈ Download Secondary SDGs JPEG
793
+ ]
794
  )
795
 
796
  reset_sentence_button.click(
797
  reset_sentence_outputs,
798
+ outputs=[
799
+ primary_sentence_plot,
800
+ secondary_sentence_plot,
801
+ sentence_csv,
802
+ sentence_docx,
803
+ sentence_jpeg1,
804
+ sentence_csv_secondary,
805
+ sentence_report_file_secondary,
806
+ secondary_sentence_jpeg
807
+ ]
808
  )
809
 
810
  demo.queue().launch()