diff --git a/data/image_classification/class_accuracies.pkl b/data/image_classification/class_accuracies.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..2208031499937166e39cb4f42e2930a1c8f3dfa1
--- /dev/null
+++ b/data/image_classification/class_accuracies.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b404d7c242ec04dea8a73b950670aee544791c4eafe19376888d0c21b78ecf6d
+size 206
diff --git a/data/image_classification/diagonal.npy b/data/image_classification/diagonal.npy
new file mode 100644
index 0000000000000000000000000000000000000000..95cb28c8ffb2ee1f4b353936886762863305c4b3
--- /dev/null
+++ b/data/image_classification/diagonal.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:092072623a33151cd67b60123fd4e5f8e81d464e772721c17e4fbf307e800be5
+size 152
diff --git a/data/image_classification/diagonal.pkl b/data/image_classification/diagonal.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..11179f9af62adcf34dbfc0c0a74ff3c6988b9cd8
--- /dev/null
+++ b/data/image_classification/diagonal.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58b090912ec24da229e09882bfd7b13deef11eb1431b92ac0e9b4f2fab697a25
+size 171
diff --git a/data/image_classification/images/image1.jpg b/data/image_classification/images/image1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..10ffbb8a14e0e9f05df8bdf4eac356629d22401b
Binary files /dev/null and b/data/image_classification/images/image1.jpg differ
diff --git a/data/image_classification/images/image2.jpg b/data/image_classification/images/image2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..05a458a4bac00aa625a4226d6def5bdf94712a55
Binary files /dev/null and b/data/image_classification/images/image2.jpg differ
diff --git a/data/image_classification/images/image3.jpg b/data/image_classification/images/image3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e41eac813e426479b0f38dcec9671f437832534b
Binary files /dev/null and b/data/image_classification/images/image3.jpg differ
diff --git a/data/image_classification/images/image4.jpg b/data/image_classification/images/image4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2af2276f77f38c5234293c1b7ae1b94fcbe57cc2
Binary files /dev/null and b/data/image_classification/images/image4.jpg differ
diff --git a/data/image_classification/images/image5.jpg b/data/image_classification/images/image5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4bb37f69b72959782ca0775d2ad04de2c04d2069
Binary files /dev/null and b/data/image_classification/images/image5.jpg differ
diff --git a/data/image_classification/images/image6.jpg b/data/image_classification/images/image6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9247863bfbc8f0f247f4c01777c73c0832c30b6f
Binary files /dev/null and b/data/image_classification/images/image6.jpg differ
diff --git a/data/image_classification/images/image7.jpg b/data/image_classification/images/image7.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..847b69475df5fe2e98e3fb5a8dc66e26f9390834
Binary files /dev/null and b/data/image_classification/images/image7.jpg differ
diff --git a/data/image_classification/images/image8.jpg b/data/image_classification/images/image8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bae584bd6d1480e30b01787043fe3125ed2e022a
Binary files /dev/null and b/data/image_classification/images/image8.jpg differ
diff --git a/data/image_classification/images/image9.jpg b/data/image_classification/images/image9.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..472eeebb00050848edc69df0502193e61d9d6030
Binary files /dev/null and b/data/image_classification/images/image9.jpg differ
diff --git a/data/image_classification/results.pkl b/data/image_classification/results.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..c754ceaf7b71bdcf2f6669831ffca33f332ca86e
--- /dev/null
+++ b/data/image_classification/results.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0147d470ee6854e29a4d66577e820a3d76910c659c6c19ba09996d165385f2fb
+size 796
diff --git a/data/pinterest/image1.jpg b/data/pinterest/image1.jpg
deleted file mode 100644
index ca31f0a255798b86a9202016dfb51bda5d413e9b..0000000000000000000000000000000000000000
Binary files a/data/pinterest/image1.jpg and /dev/null differ
diff --git a/data/pinterest/image2.jpg b/data/pinterest/image2.jpg
deleted file mode 100644
index 1bc3e6de53b24f8616e3e2570096774a8fe64dc7..0000000000000000000000000000000000000000
Binary files a/data/pinterest/image2.jpg and /dev/null differ
diff --git a/data/pinterest/image3.jpg b/data/pinterest/image3.jpg
deleted file mode 100644
index 1088d9b5e1a741fbbe923ebc7b33554c5a27a7e0..0000000000000000000000000000000000000000
Binary files a/data/pinterest/image3.jpg and /dev/null differ
diff --git a/data/pinterest/image4.jpg b/data/pinterest/image4.jpg
deleted file mode 100644
index f76a417c5212a88ac695685b45dee78c75ed4710..0000000000000000000000000000000000000000
Binary files a/data/pinterest/image4.jpg and /dev/null differ
diff --git a/data/topic-modeling/data-tm-view.pkl b/data/topic-modeling/data-tm-view.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..8d4e2fd377e23f558f5e8eea32df55cc0f8e1f23
--- /dev/null
+++ b/data/topic-modeling/data-tm-view.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcdfe682dc519405f991deb929d2e7bd197711ebd39d366204dc31540088bfe7
+size 25643
diff --git a/data/topic-modeling/similarity_topic_df.pkl b/data/topic-modeling/similarity_topic_df.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..c73b95eb97c12decd71598dc878c4f4fda58eb0c
--- /dev/null
+++ b/data/topic-modeling/similarity_topic_df.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56ffc20bf88f4794175504142c145f009c30882cc8f9a4a2bd6685565d7b1031
+size 5702
diff --git a/data/topic-modeling/similarity_topic_scores.npy b/data/topic-modeling/similarity_topic_scores.npy
new file mode 100644
index 0000000000000000000000000000000000000000..a9869ef35fb4f3ec3da8e889876a4d2277664292
--- /dev/null
+++ b/data/topic-modeling/similarity_topic_scores.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0eae3be2576e7314e8972be092e5c983857b8630f72452656d5511161925ee0
+size 1848
diff --git a/data/topic-modeling/topic_info.pkl b/data/topic-modeling/topic_info.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b7258264c6ed1a392029ec36f77dcae2ebb6278f
--- /dev/null
+++ b/data/topic-modeling/topic_info.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6641f3d3292ddf67301be1695a86d32dc1b4e50ce8300998a8f5ff4378006de
+size 67773
diff --git a/data/topic-modeling/topics_top_words.json b/data/topic-modeling/topics_top_words.json
new file mode 100644
index 0000000000000000000000000000000000000000..48f2ebae54bb2262cd38600aed5cfab1f30c80c9
--- /dev/null
+++ b/data/topic-modeling/topics_top_words.json
@@ -0,0 +1 @@
+{"Footwear": [["shoes", 0.38847787705376907], ["footwear", 0.35415016786395725], ["sole", 0.31010186887426383], ["heel", 0.3088388496929392], ["sandals", 0.3080144185111931]], "Stationery": [["poster", 0.4772449173738855], ["posters", 0.3532032467764592], ["paper", 0.33680800783349146], ["sticker", 0.32875394429105165], ["wall", 0.3267100065240568]], "Accessories": [["bag", 0.44865215755276705], ["bags", 0.34066237491127976], ["backpack", 0.32761906839012567], ["wallet", 0.3183404784655875], ["pouch", 0.3139648334711031]], "Casual Clothing": [["shirt", 0.4866213634648433], ["tshirt", 0.4388525801491516], ["cotton", 0.42030363825064176], ["hoodie", 0.3472116721637178], ["sweatshirt", 0.3252484971808718]], "Home Decor": [["bed", 0.3915837064596754], ["bedsheet", 0.3795293850199473], ["pillow", 0.3726331517956856], ["chair", 0.3620157706059908], ["cushion", 0.3371702503148896]], "Ethnic Wear": [["kurta", 0.5964118488674663], ["kurti", 0.46409279828742933], ["ethnic", 0.4271965752882015], ["lehenga", 0.35217693955328105], ["rayon", 0.32973478637750253]], "Kitchenware": [["food", 0.4053179773769792], ["baking", 0.36953324326216985], ["cooking", 0.33478563148016793], ["container", 0.33388107407524753], ["stainless", 0.328391980553913]], "Electronics": [["usb", 0.4370380638338913], ["remote", 0.3927290047743422], ["cable", 0.38592683873622896], ["battery", 0.37858979023970196], ["power", 0.35648534880129973]], "Mobile Accessories": [["multy", 0.8227986161311175], ["smartphone", 0.7174978731856773], ["sturdy", 0.614121692169791], ["designer", 0.6109682869843849], ["attractive", 0.6104716588340674]], "Toys and Games": [["toy", 0.49860348911927627], ["toys", 0.3988778432684865], ["kids", 0.3573877985545804], ["play", 0.32559893534296275], ["doll", 0.32138639583229955]], "Smartphone Cases": [["galaxy", 0.3735023765469636], ["samsung", 0.36692840942262006], ["tpu", 0.360454345644459], ["phone", 0.34470171090642315], ["case", 0.3323669244458833]], "Bathroom Essentials": [["towel", 0.47783909527995144], ["towels", 0.4756431172582276], ["bathroom", 0.45437940527046383], ["machine", 0.40002718019217415], ["washing", 0.39952622128733417]], "Fitness Apparel": [["shorts", 0.4030437177905839], ["yoga", 0.3508045282032731], ["fitness", 0.345809251125565], ["gym", 0.3372047840909218], ["exercise", 0.3255797401202898]], "Jewelry": [["beads", 0.5537295987387991], ["jewelry", 0.4715332431907011], ["necklace", 0.42530621186050926], ["sterling", 0.3859332057581718], ["jewellery", 0.37480366224575584]], "Tailoring": [["bust", 0.8067574906731596], ["length", 0.7507572954203788], ["hip", 0.7208533754339365], ["cuff", 0.6493257298350807], ["waist", 0.5680918306873547]], "Beauty Products": [["hair", 0.5233417349688945], ["skin", 0.49711579981104936], ["makeup", 0.4464886130813637], ["oil", 0.3695888525844108], ["powder", 0.33759452075830465]], "Automotive Parts": [["brake", 0.7679029085894177], ["ford", 0.538136002342594], ["caliper", 0.5149383886065275], ["rotors", 0.44156507901608216], ["remanufactured", 0.40996825453754265]], "Religious Items": [["pooja", 0.4592149148466313], ["puja", 0.440364978878493], ["ganesha", 0.4339401038570326], ["statue", 0.42572057559436643], ["lord", 0.4160582548834375]], "Rugs and Carpets": [["carpet", 0.7396822233119122], ["carpets", 0.6163891407029106], ["rug", 0.5466610489961798], ["pile", 0.4811751923719153], ["bedroomhall", 0.4774631207324265]], "Lighting": [["lamp", 0.572806507321936], ["lights", 0.5194199268441229], ["led", 0.4637118662205354], ["light", 0.42752197570640416], ["fog", 0.39030233935073244]], "Tools and Hardware": [["drill", 0.4090803554942639], ["wrench", 0.3887384996728029], ["brass", 0.37763561450770244], ["ratchet", 0.3576598563561437], ["welding", 0.34836291774213735]], "Gifts": [["gift", 0.49486738196492014], ["christmas", 0.4499909703416835], ["anniversary", 0.44211510176760416], ["birthday", 0.42329962363537077], ["holiday", 0.3769232700978505]], "Cups and Mugs": [["mug", 0.78550527496083], ["mugs", 0.6407372094776862], ["coffee", 0.5251853727580017], ["ceramic", 0.49068900003067073], ["microwave", 0.45930169018934874]], "Car Accessories": [["car", 0.5608807013030145], ["mats", 0.40243127224340647], ["vehicle", 0.32527312147968], ["mud", 0.3120217917532408], ["floor", 0.28639120838100274]], "Bicycles and Motorcycles": [["bike", 0.672669310609492], ["motorcycle", 0.45862702253497417], ["bicycle", 0.41604743198493005], ["wheeler", 0.4111806808042964], ["bikes", 0.40182534484285193]], "Eyewear": [["lens", 0.6347206656220337], ["sunglasses", 0.5827648990585392], ["lenses", 0.48591645135463507], ["glasses", 0.44000442658697553], ["vision", 0.35746121468648506]], "Gardening": [["plant", 0.5003258672377139], ["plants", 0.48132127991787405], ["flowers", 0.4775975182063175], ["pot", 0.45391275371608636], ["planter", 0.4432884490358204]], "Dining": [["table", 0.6153329443740811], ["dining", 0.37217834623751783], ["cutlery", 0.3139821236350345], ["tray", 0.3097030379964209], ["tablecloths", 0.3006277593090895]], "Lingerie": [["bra", 0.8965852601121921], ["cups", 0.5285363570485941], ["breast", 0.46345090588793064], ["nonpadded", 0.4551225849177485], ["straps", 0.4371461075717605]], "Apple Products": [["iphone", 0.7503862987601692], ["pro", 0.37695667339429706], ["apple", 0.37312380912718573], ["xr", 0.3582173040481931], ["max", 0.3526350702674551]], "Screen Protectors": [["screen", 0.6153666239172285], ["protector", 0.5651942935308064], ["protectors", 0.507626361439129], ["tempered", 0.4671540612427298], ["hardness", 0.4561645454021592]], "Women's Tops": [["sleeve", 0.49572716077806755], ["womens", 0.44013142956242973], ["sweatyrocks", 0.4203211398934657], ["crop", 0.41809175044803937], ["sleeveless", 0.41643008352599814]], "Gardening Supplies": [["hose", 0.7841372177474352], ["pipe", 0.6327301802335703], ["hoses", 0.5641588317324012], ["filter", 0.5118209249698248], ["fittings", 0.46796012979048174]], "Pet Supplies": [["dog", 0.6822708088198687], ["pet", 0.6400522687470505], ["dogs", 0.5160745818307858], ["collar", 0.42754412295633315], ["cats", 0.3647447770159879]], "Audio Equipment": [["sound", 0.7436954698780431], ["stereo", 0.6847399263959048], ["music", 0.6510439565263184], ["bluetooth", 0.6468335306088341], ["audio", 0.5323419441521797]], "Curtains": [["curtains", 0.7082112219740755], ["lxcm", 0.5903739907537208], ["curtain", 0.5406181796148612], ["sheer", 0.4277598568424003], ["panels", 0.419958307310006]], "Sarees": [["saree", 0.8783232110965399], ["sarees", 0.7321717143814415], ["sari", 0.5601859768503931], ["elegantly", 0.548955977226906], ["explore", 0.5321879512070764]], "Health and Wellness": [["coconut", 0.49630270118056097], ["dietary", 0.4819055711752624], ["ayurvedic", 0.46826858968976937], ["snack", 0.4369980524170574], ["supplement", 0.4248983967068878]], "Sportswear": [["hat", 0.7236122381999898], ["team", 0.5961027784061247], ["cap", 0.5198077659688286], ["jersey", 0.4724677801851194], ["nfl", 0.46235035258007345]], "Clocks and Alarms": [["clock", 0.9911350148825689], ["clocks", 0.5806141697627104], ["alarm", 0.4921721915993405], ["aa", 0.4199955227337318], ["wall", 0.41135071771777504]], "Watches": [["watch", 0.9877005762493802], ["watches", 0.6588330987551935], ["analogue", 0.6120074604752818], ["dial", 0.5425729098662936], ["band", 0.5008365915519408]], "Bar Accessories": [["wine", 0.7080736210501507], ["glasses", 0.6170645321866487], ["beer", 0.52017140239074], ["bottle", 0.5089754002398992], ["opener", 0.4765574582140716]], "Crafting Supplies": [["knitting", 0.7625115695769519], ["yarn", 0.624084654214461], ["needles", 0.6192608627071066], ["sewing", 0.5291732442509863], ["thread", 0.5207899838586767]]}
\ No newline at end of file
diff --git a/images/ML_domains.png b/images/ML_domains.png
new file mode 100644
index 0000000000000000000000000000000000000000..6011cc79861085196f2557025d0fe7e3b50909f9
Binary files /dev/null and b/images/ML_domains.png differ
diff --git a/images/ML_header.jpg b/images/ML_header.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e680d3666a034c8eae536ff7ccf1231d1abadbbc
Binary files /dev/null and b/images/ML_header.jpg differ
diff --git a/images/brain_tumor.jpg b/images/brain_tumor.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b8c2456cb3287096bf8252d63897d11f4e5cd0b
Binary files /dev/null and b/images/brain_tumor.jpg differ
diff --git a/images/cnn_example.png b/images/cnn_example.png
new file mode 100644
index 0000000000000000000000000000000000000000..3eb951fe829597ce0328e759d68dc1d34f2f8fa0
Binary files /dev/null and b/images/cnn_example.png differ
diff --git a/images/customer-churn.webp b/images/customer-churn.webp
deleted file mode 100644
index 9d8c75a61b1a145752c0f35898f1a2379bcf485a..0000000000000000000000000000000000000000
Binary files a/images/customer-churn.webp and /dev/null differ
diff --git a/images/e-commerce.jpg b/images/e-commerce.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..97e64c1c61f21820c3e901e94dd5633aa6625021
Binary files /dev/null and b/images/e-commerce.jpg differ
diff --git a/images/fashion_ai.jpg b/images/fashion_ai.jpg
deleted file mode 100644
index e5b5057e5854b90966a5b46db2790475c17c14ea..0000000000000000000000000000000000000000
Binary files a/images/fashion_ai.jpg and /dev/null differ
diff --git a/images/fashion_od.jpg b/images/fashion_od.jpg
deleted file mode 100644
index 9ca9a9734c9a0920561298cd1aacb45cd349047f..0000000000000000000000000000000000000000
Binary files a/images/fashion_od.jpg and /dev/null differ
diff --git a/images/meningioma_tumor.png b/images/meningioma_tumor.png
new file mode 100644
index 0000000000000000000000000000000000000000..b13fa3d04dcaa277cc7bee3c3472529fde96c25b
Binary files /dev/null and b/images/meningioma_tumor.png differ
diff --git a/images/no_tumor.png b/images/no_tumor.png
new file mode 100644
index 0000000000000000000000000000000000000000..7b36e6b929b8c63ea984996fa4433349a2521f16
Binary files /dev/null and b/images/no_tumor.png differ
diff --git a/images/od_header.jpg b/images/od_header.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2f404328ba0cd375861bb4f233ea30ce4d137889
Binary files /dev/null and b/images/od_header.jpg differ
diff --git a/images/pituitary.png b/images/pituitary.png
new file mode 100644
index 0000000000000000000000000000000000000000..4a5a7e704e5ef69bb13a424f65458ffc4dc319b7
Binary files /dev/null and b/images/pituitary.png differ
diff --git a/images/reviews.jpg b/images/reviews.jpg
deleted file mode 100644
index d8a96a1e3d98f2dbd2dea236f5665aae15ad6c77..0000000000000000000000000000000000000000
Binary files a/images/reviews.jpg and /dev/null differ
diff --git a/images/reviews.png b/images/reviews.png
new file mode 100644
index 0000000000000000000000000000000000000000..6cb03e877991280afc1217414c6566fa72ea0858
Binary files /dev/null and b/images/reviews.png differ
diff --git a/images/topic_modeling.gif b/images/topic_modeling.gif
new file mode 100644
index 0000000000000000000000000000000000000000..e6e2f87e69500631c59281e4e0c81ed1639e3e06
Binary files /dev/null and b/images/topic_modeling.gif differ
diff --git a/images/tumor_image.jpg b/images/tumor_image.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..51759fbde4b031292359ef256301b70274778e56
Binary files /dev/null and b/images/tumor_image.jpg differ
diff --git a/images/tumor_types_class.png b/images/tumor_types_class.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc6eff1c131e776093e61ee43386ea69bb624c14
Binary files /dev/null and b/images/tumor_types_class.png differ
diff --git a/images/tumors_types_class.png b/images/tumors_types_class.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc6eff1c131e776093e61ee43386ea69bb624c14
Binary files /dev/null and b/images/tumors_types_class.png differ
diff --git a/images/unsupervised_learner.webp b/images/unsupervised_learner.webp
deleted file mode 100644
index f127dbed46b5dca203c5d88c0318de0dc4fb25d2..0000000000000000000000000000000000000000
Binary files a/images/unsupervised_learner.webp and /dev/null differ
diff --git a/main_page.py b/main_page.py
index afb47eb2951a8ed8324f997d42758c6e45c07cf1..06b67638029acf381076124c9e8b0666c84b016e 100644
--- a/main_page.py
+++ b/main_page.py
@@ -3,7 +3,7 @@ import streamlit as st
import pandas as pd
import numpy as np
-from st_pages import Page, show_pages
+from st_pages import Page, show_pages, Section, add_indentation
from PIL import Image
#from utils import authenticate_drive
@@ -14,6 +14,7 @@ from PIL import Image
##################################################################################
st.set_page_config(layout="wide")
+#add_indentation()
@@ -34,27 +35,41 @@ st.set_page_config(layout="wide")
# TITLE #
##################################################################################
+
st.image("images/AI.jpg")
-st.title("AI and Data Science Examples")
-st.subheader("HEC Paris, 2023-2024")
-st.markdown("""**Course provided by Shirish C. SRIVASTAVA**
- and developped by Hi! PARIS Engineers: *Laurène DAVID*, *Salma HOUIDI* and *Maeva N'GUESSAN*.
- """, unsafe_allow_html=True)
+st.markdown(" ")
+
+col1, col2 = st.columns([0.65,0.35], gap="medium")
+
+with col1:
+ st.title("AI and Data Science Examples")
+ st.subheader("HEC Paris, 2023-2024")
+ st.markdown("""**Course provided by Shirish C. SRIVASTAVA**
+ **Hi! PARIS Engineers**: Laurène DAVID, Salma HOUIDI and Maeva N'GUESSAN""", unsafe_allow_html=True)
#st.markdown("in collaboration with Hi! PARIS engineers: Laurène DAVID, Salma HOUIDI and Maeva N'GUESSAN")
-st.markdown(" ")
-st.info("""**About the app**: The AI and Data Science Examples app was created to introduce students to the field of Data Science by showcasing real-life applications of AI.
- It includes use cases using traditional Machine Learning algorithms on structured data, as well as Deep Learning models run on unstructured data (text, images,...).""")
+with col2:
+#Hi! PARIS collaboration mention
+ st.markdown(" ")
+ st.markdown(" ")
+ st.markdown(" ")
+ image_hiparis = Image.open('images/hi-paris.png')
+ st.image(image_hiparis, width=150)
+
+ url = "https://www.hi-paris.fr/"
+ st.markdown("""###### **Made in collaboration with [Hi! PARIS](%s)** """ % url, unsafe_allow_html=True)
+
+st.markdown(" ")
st.divider()
-#Hi! PARIS collaboration mention
-st.markdown(" ")
-image_hiparis = Image.open('images/hi-paris.png')
-st.image(image_hiparis, width=150)
-url = "https://www.hi-paris.fr/"
-st.markdown("**The app was made in collaboration with [Hi! PARIS](%s)**" % url)
+# #Hi! PARIS collaboration mention
+# st.markdown(" ")
+# image_hiparis = Image.open('images/hi-paris.png')
+# st.image(image_hiparis, width=150)
+# url = "https://www.hi-paris.fr/"
+# st.markdown("**The app was made in collaboration with [Hi! PARIS](%s)**" % url)
@@ -68,11 +83,21 @@ st.markdown("**The app was made in collaboration with [Hi! PARIS](%s)**" % url)
show_pages(
[
Page("main_page.py", "Home Page", "🏠"),
- Page("pages/supervised_unsupervised_page.py", "Supervised vs Unsupervised", "🔍"),
- Page("pages/timeseries_analysis.py", "Time Series Forecasting", "📈"),
- Page("pages/sentiment_analysis.py", "Sentiment Analysis", "👍"),
- Page("pages/object_detection.py", "Object Detection", "📹"),
- Page("pages/recommendation_system.py", "Recommendation system", "🛒")
+ Section(name=" ", icon=""),
+ Section(name=" ", icon=""),
+
+ Section(name="Machine Learning", icon="1️⃣"),
+ Page("pages/supervised_unsupervised_page.py", "1| Supervised vs Unsupervised 🔍", ""),
+ Page("pages/timeseries_analysis.py", "2| Time Series Forecasting 📈", ""),
+ Page("pages/recommendation_system.py", "3| Recommendation systems 🛒", ""),
+
+ Section(name="Natural Language Processing", icon="2️⃣"),
+ Page("pages/topic_modeling.py", "1| Topic Modeling 📚", ""),
+ Page("pages/sentiment_analysis.py", "2| Sentiment Analysis 👍", ""),
+
+ Section(name="Computer Vision", icon="3️⃣"),
+ Page("pages/image_classification.py", "1| Image Classification 🖼️", ""),
+ Page("pages/object_detection.py", "2| Object Detection 📹", "")
]
)
@@ -83,4 +108,30 @@ show_pages(
##################################################################################
+st.header("About the app")
+
+
+st.info("""The **AI and Data Science Examples** app was created as a tool to introduce students to the field of Data Science by showcasing real-life applications of AI.
+ It includes use cases using traditional Machine Learning algorithms on structured data, as well as models that analyze unstructured data (text, images,...).""")
+
+st.markdown(" ")
+st.markdown("""The app is structured into three sections:
+- 1️⃣ **Machine Learning**: This first section covers use cases where structured data (data in a tabular format) is fed to an AI model.
+ You will find pages on *Supervised/Unsupervised Learning*, *Time Series Forecasting* and AI powered *Recommendation Systems*.
+- 2️⃣ **Natural Language Processing** (NLP): This second section showcases AI applications where large amounts of text data is analyzed using Deep Learning models.
+ Pages on *Topic Modeling* and *Sentiment Analysis*, which are types of NLP models, can be found in this section.
+- 3️⃣ **Computer Vision**: This final section covers a sub-field of AI called Computer Vision which deals with image/video data.
+ The field of Computer Vision includes *Image classification* and *Object Detection*, which are both featured in this section.
+ """)
+
+st.image("images/ML_domains.png",
+ caption="""This figure showcases a selection of sub-fields in Artificial Intelligence, such as traditional
+ Machine Learning, NLP, Computer Vision and Robotics.""")
+
+
+# st.markdown(" ")
+# st.markdown(" ")
+# st.markdown("## Want to learn more about AI ?")
+# st.markdown("""**Hi! PARIS**, a multidisciplinary center on Data Analysis and AI founded by Institut Polytechnique de Paris and HEC Paris,
+# hosts every year a **Data Science Bootcamp** for students of all levels.""")
\ No newline at end of file
diff --git a/notebooks/Supervised-Unsupervised/supply_chain.ipynb b/notebooks/Supervised-Unsupervised/supply_chain.ipynb
deleted file mode 100644
index 2793c0d555769feaf50da361f19ca1435b94897c..0000000000000000000000000000000000000000
--- a/notebooks/Supervised-Unsupervised/supply_chain.ipynb
+++ /dev/null
@@ -1,55 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt \n",
- "import seaborn as sns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "path_data = r\"C:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\app-hec-AI-DS\\data\\classification\\supply_chain_data.csv\"\n",
- "supply_data = pd.read_csv(path_data)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.0"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/energy_consumption.ipynb b/notebooks/energy_consumption.ipynb
index eee2751034abaa389176ca92d73d32ebb265750e..278e96c1693e836742bf1f88d038bc05cef88f28 100644
--- a/notebooks/energy_consumption.ipynb
+++ b/notebooks/energy_consumption.ipynb
@@ -9,9 +9,18 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\app-ai-ds-hec\\venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ }
+ ],
"source": [
"import pandas as pd \n",
"import numpy as np\n",
diff --git a/notebooks/topic_modeling.ipynb b/notebooks/topic_modeling.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..732cdc560939478b87597e5fa560f1d6fad4186d
--- /dev/null
+++ b/notebooks/topic_modeling.ipynb
@@ -0,0 +1,101 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Topic Modeling on product descriptions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#py -m pip install bertopic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\app-ai-ds-hec\\venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import pickle\n",
+ "import pandas as pd\n",
+ "from bertopic import BERTopic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_model = r\"C:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\data-hec-AI-DS\\model_topicmodeling.pkl\"\n",
+ "path_data = r\"C:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\data-hec-AI-DS\\data-topicmodeling.csv\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "TypeError",
+ "evalue": "_rebuild() got an unexpected keyword argument 'impl_kind'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpath_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mc:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\app-ai-ds-hec\\venv\\lib\\site-packages\\numba\\core\\serialize.py:152\u001b[0m, in \u001b[0;36mcustom_rebuild\u001b[1;34m(custom_pickled)\u001b[0m\n\u001b[0;32m 147\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Customized object deserialization.\u001b[39;00m\n\u001b[0;32m 148\u001b[0m \n\u001b[0;32m 149\u001b[0m \u001b[38;5;124;03mThis function is referenced internally by `custom_reduce()`.\u001b[39;00m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 151\u001b[0m \u001b[38;5;28mcls\u001b[39m, states \u001b[38;5;241m=\u001b[39m custom_pickled\u001b[38;5;241m.\u001b[39mctor, custom_pickled\u001b[38;5;241m.\u001b[39mstates\n\u001b[1;32m--> 152\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_rebuild(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstates)\n",
+ "\u001b[1;31mTypeError\u001b[0m: _rebuild() got an unexpected keyword argument 'impl_kind'"
+ ]
+ }
+ ],
+ "source": [
+ "model = pickle.load(open(path_model, 'rb'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pages/image_classification.py b/pages/image_classification.py
new file mode 100644
index 0000000000000000000000000000000000000000..d28d4dfbd0a25ea6cf78b3930d45399d142ec9db
--- /dev/null
+++ b/pages/image_classification.py
@@ -0,0 +1,330 @@
+import streamlit as st
+import pickle
+import time
+import os
+import pandas as pd
+import plotly.express as px
+from PIL import Image
+from utils import load_data_pickle
+
+
+# import gradcam
+# from gradcam.utils import visualize_cam
+# from gradcam import GradCAM, GradCAMpp
+
+#add_indentation()
+st.set_page_config(layout="wide")
+
+# Chemin vers le dossier contenant les images et le modèle pré-entraîné
+DATA_DIR = r"data/image_classification/images"
+MODEL_PATH = r"pretrained_models/image_classification/resnet18_braintumor.pt"
+gradcam_images_paths = ["images/meningioma_tumor.png", "images/no_tumor.png", "images/pituitary.png"]
+
+ # PREPROCESSING
+
+# def preprocess(image):
+# # Il faut que l'image' est une image PIL. Si 'image' est un tableau numpy, on le convertit en image PIL.
+# if isinstance(image, np.ndarray):
+# image = Image.fromarray(image)
+
+# transform = transforms.Compose([
+# transforms.Resize((224, 224)),
+# transforms.ToTensor(),
+# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalisez l'image.
+# ])
+# # On applique les transformations définies sur l'image.
+# image = transform(image)
+# return image
+
+
+# Chargement du modèle pré-entraîné
+
+# def load_pretrained_model(num_classes=3):
+# model = models.resnet18(pretrained=False)
+# num_ftrs = model.fc.in_features
+# model.fc = torch.nn.Linear(num_ftrs, num_classes)
+
+# # Chargement des poids pré-entraînés tout en ignorant la dernière couche 'fc'
+# state_dict = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
+# state_dict.pop('fc.weight', None)
+# state_dict.pop('fc.bias', None)
+# model.load_state_dict(state_dict, strict=False)
+
+# model.eval()
+# return model
+
+# model = load_pretrained_model(num_classes=3) #On a supprimés une des classes
+
+
+# # PREDICTION
+# def predict(image_preprocessed, model):
+# # Si image_preprocessed est déjà un tensor PyTorch, on doit s'assurer qu'il soit de dimension 3 : [batch_size, channels, height, width]
+# # La fonction unsqueeze(0) ajoute une dimension de batch_size au début pour le faire correspondre à cette attente
+# if image_preprocessed.dim() == 3:
+# image_preprocessed = image_preprocessed.unsqueeze(0)
+
+# with torch.no_grad():
+# output = model(image_preprocessed)
+# _, predicted = torch.max(output, 1)
+# return predicted, output
+
+
+
+###################################### TITLE ####################################
+
+st.markdown("# Image Classification 🖼️")
+
+st.markdown("### What is Image classification ?")
+st.info("""**Image classification** is a process in Machine Learning and Computer Vision where an algorithm is trained to recognize and categorize images into predefined classes. It involves analyzing the visual content of an image and assigning it to a specific label based on its features.""")
+ #unsafe_allow_html=True)
+st.markdown(" ")
+st.markdown("""State-of-the-art image classification models use **neural networks** to predict whether an image belongs to a specific class.
+Each of the possible predicted classes are given a probability then the class with the highest value is assigned to the input image.""",
+unsafe_allow_html=True)
+
+image_ts = Image.open('images/cnn_example.png')
+_, col, _ = st.columns([0.2,0.8,0.2])
+with col:
+ st.image(image_ts,
+ caption="An example of an image classification model, with the 'backbone model' as the neural network.")
+
+st.markdown(" ")
+
+st.markdown("""Real-life applications of image classification includes:
+- **Medical Imaging 👨⚕️**: Diagnose diseases and medical conditions from images such as X-rays, MRIs and CT scans to, for example, identify tumors and classify different types of cancers.
+- **Autonomous Vehicules** 🏎️: Classify objects such as pedestrians, vehicles, traffic signs, lane markings, and obstacles, which is crucial for navigation and collision avoidance.
+- **Satellite and Remote Sensing 🛰️**: Analyze satellite imagery to identify land use patterns, monitor vegetation health, assess environmental changes, and detect natural disasters such as wildfires and floods.
+- **Quality Control 🛂**: Inspect products and identify defects to ensure compliance with quality standards during the manufacturying process.
+ """)
+
+# st.markdown("""Real-life applications of Brain Tumor includes:
+# - **Research and development💰**: The technologies and methodologies developed for brain tumor classification can advance research in neuroscience, oncology, and the development of new diagnostic tools and treatments.
+# - **Healthcare👨⚕️**: Data derived from the classification and analysis of brain tumors can inform public health decisions, healthcare policies, and resource allocation, emphasizing areas with higher incidences of certain types of tumors.
+# - **Insurance Industry 🏬**: Predict future demand for products to optimize inventory levels, reduce holding costs, and improve supply chain efficiency.
+# """)
+
+
+ ###################################### USE CASE #######################################
+
+
+# BEGINNING OF USE CASE
+st.divider()
+st.markdown("# Brain Tumor Classification 🧠")
+
+st.info("""In this use case, a **brain tumor classification** model is leveraged to accurately identify the presence of tumors in MRI scans of the brain.
+ This application can be a great resource for healthcare professionals to facilite early detection and consequently improve treatment outcomes for patients.""")
+
+st.markdown(" ")
+_, col, _ = st.columns([0.1,0.8,0.1])
+with col:
+ st.image("images/brain_tumor.jpg")
+
+st.markdown(" ")
+st.markdown(" ")
+
+### WHAT ARE BRAIN TUMORS ?
+st.markdown(" ### What is a Brain Tumor ?")
+st.markdown("""Before introducing the use case, let's give a short description on what a brain tumor is.
+ A brain tumor occurs when **abnormal cells form within the brain**. Two main types of tumors exist: **cancerous (malignant) tumors** and **benign tumors**.
+- **Cancerous tumors** are malignant tumors that have the ability to invade nearby tissues and spread to other parts of the body through a process called metastasis.
+- **Benign tumors** can become quite large but will not invade nearby tissue or spread to other parts of the body. They can still cause serious health problems depending on their size, location and rate of growth.
+ """, unsafe_allow_html=True)
+
+
+
+st.markdown(" ")
+st.markdown(" ")
+st.markdown("### About the data 📋")
+
+st.markdown("""You were provided with a large dataset which contains **anonymized patient MRI scans** categorized into three distinct classes: **pituitary tumor** (in most cases benign), **meningioma tumor** (cancerous) and **no tumor**.
+ This dataset will serve as the foundation for training our classification model, offering a comprehensive view of varied tumor presentations within the brain.""")
+
+_, col, _ = st.columns([0.15,0.7,0.15])
+with col:
+ st.image("images/tumors_types_class.png")
+
+# see_data = st.checkbox('**See the data**', key="image_class\seedata")
+# if see_data:
+# st.warning("You can view here a few examples of the MRI training data.")
+# # image selection
+# images = os.listdir(DATA_DIR)
+# selected_image1 = st.selectbox("Choose an image to visualize 🔎 :", images, key="selectionbox_key_2")
+
+# # show image
+# image_path = os.path.join(DATA_DIR, selected_image1)
+# image = Image.open(image_path)
+# st.image(image, caption="Image selected", width=450)
+
+# st.info("""**Note**: This dataset will serve as the foundation for training our classification model, offering a comprehensive view of varied tumor presentations within the brain.
+# By analyzing these images, the model learns to discern the subtle differences between each class, thereby enabling the precise identification of tumor types.""")
+
+st.markdown(" ")
+st.markdown(" ")
+
+
+
+st.markdown("### Train the algorithm ⚙️")
+st.markdown("""**Training an AI model** means feeding it data that contains multiple examples/images each type of tumor to be detected.
+ By analyzing the provided MRI images, the model learns to discern the subtle differences between each classes, thereby enabling the precise identification of tumor types.""")
+
+
+### CONDITION ##
+
+# Initialisation de l'état du modèle
+if 'model_train' not in st.session_state:
+ st.session_state['model_train'] = False
+
+run_model = st.button("Train the model")
+
+if run_model:
+ # Simuler l'entraînement du modèle
+ st.session_state.model_train = True
+ with st.spinner('Training the model...'):
+ time.sleep(2)
+ st.success("The model has been trained.")
+else:
+ # Afficher le statut
+ st.info("The model hasn't been trained yet.")
+
+# Afficher les résultats
+if st.session_state.model_train:
+ st.markdown(" ")
+ st.markdown(" ")
+ st.markdown("### See the results ☑️")
+ tab1, tab2 = st.tabs(["Performance", "Explainability"])
+
+ with tab1:
+ #st.subheader("Performance")
+ st.info("""**Evaluating a model's performance** helps provide a quantitative measurement of it's ability to make accurate predictions.
+ In this use case, the performance of the brain tumor classification model was measured by comparing the patient's true diagnosis with the class predicted by the trained model.""")
+
+ class_accuracy_path = "data/image_classification/class_accuracies.pkl"
+
+ # Charger les données depuis le fichier Pickle
+ try:
+ with open(class_accuracy_path, 'rb') as file:
+ class_accuracy = pickle.load(file)
+ except Exception as e:
+ st.error(f"Erreur lors du chargement du fichier : {e}")
+ class_accuracy = {}
+
+ if not isinstance(class_accuracy, dict):
+ st.error(f"Expected a dictionary, but got: {type(class_accuracy)}")
+ else:
+ # Conversion des données en DataFrame
+ df_accuracy = pd.DataFrame(list(class_accuracy.items()), columns=['Tumor Type', 'Accuracy'])
+ df_accuracy['Accuracy'] = ((df_accuracy['Accuracy'] * 100).round()).astype(int)
+
+ # Générer le graphique à barres avec Plotly
+ fig = px.bar(df_accuracy, x='Tumor Type', y='Accuracy',
+ text='Accuracy', color='Tumor Type',
+ title="Model Performance",
+ labels={'Accuracy': 'Accuracy (%)', 'Tumor Type': 'Tumor Type'})
+
+ fig.update_traces(texttemplate='%{text}%', textposition='outside')
+
+ # Afficher le graphique dans Streamlit
+ st.plotly_chart(fig, use_container_width=True)
+
+
+ st.markdown("""The model's accuracy was evaluated across two types of tumors (pituitary and meningioma) and no tumor type.
+ This evaluation is vital for determining if the model performs consistently across different tumor classifications, or if it encounters difficulties in accurately distinguishing between these two types of tumors.""",
+ unsafe_allow_html=True)
+
+ st.markdown(" ")
+
+ st.markdown("""**Interpretation**:
+ Our model demonstrates high accuracy in predicting cancerous type tumors (meningioma) as well as 'healthy' brain scans (no tumor) with a 98% accuracy for both.
+ It is observed that the model's performance is lower for pituitary type tumors, as it is around 81%.
+ This discrepancy may indicate that the model finds it more challenging to distinguish pituitary tumors from other tumor
+ types, possibly due to their unique characteristics or lower representation in the training data.
+ """, unsafe_allow_html=True)
+
+ with tab2:
+ #st.subheader("Model Explainability with Grad-CAM")
+ st.info("""**Explainability in AI** refers to the ability to **understand and interpret how AI systems make predictions** and how to quantify the impact of the provided data on its results.
+ In the case of image classification, explainability can be measured by analyzing which of the image's pixel had the most impact on the model's output.""")
+ st.markdown(" ")
+ st.markdown("""The following images show the output of image classification explainability applied on three images used during training.
+ Pixels that are colored in 'red' had a larger impact on the model's output and thus its ability to distinguish different tumor types (or none).
+
+ """, unsafe_allow_html=True)
+
+ st.markdown(" ")
+ gradcam_images_paths = ["images/meningioma_tumor.png", "images/no_tumor.png", "images/pituitary.png"]
+ class_names = ["Meningioma Tumor", "No Tumor", "Pituitary Tumor"]
+
+ for path, class_name in zip(gradcam_images_paths, class_names):
+ st.image(path, caption=f"Explainability for {class_name}")
+
+ # st.markdown("""
+ # Interpretation:
+
+ # ### Meningioma Tumors
+ # **Meningiomas** are tumors that originate from the meninges, the layers of tissue
+ # that envelop the brain and spinal cord. Although they are most often benign
+ # (noncancerous) and grow slowly, their location can cause significant issues by
+ # exerting pressure on the brain or spinal cord. Meningiomas can occur at various
+ # places around the brain and spinal cord and are more common in women than in men.
+
+ # ### Pituitary Tumors
+ # **Pituitary** are growths that develop in the pituitary gland, a small gland located at the
+ # base of the brain, behind the nose, and between the ears. Despite their critical location,
+ # the majority of pituitary tumors are benign and grow slowly. This gland regulates many of the
+ # hormones that control various body functions, so even a small tumor can affect hormone production,
+ # leading to a variety of symptoms.""", unsafe_allow_html=True)
+
+
+#################################################
+
+st.markdown(" ")
+st.markdown(" ")
+st.markdown("### Classify MRI scans 🆕")
+
+st.info("**Note**: The brain tumor classification model can classify new MRI images only if it has been previously trained.")
+
+st.markdown("""Here, you are provided the MRI scans of nine new patients.
+ Select an image and press 'run the model' to classify the MRI as either a pituitary tumor, a meningioma tumor or no tumor.""")
+
+
+# Définition des catégories de tumeurs
+categories = ["pituitary tumor", "no tumor", "meningioma tumor"]
+
+# Selection des images
+images = os.listdir(DATA_DIR)
+selected_image2 = st.selectbox("Choose an image", images, key="selectionbox_key_1")
+
+# show image
+image_path = os.path.join(DATA_DIR, selected_image2)
+image = Image.open(image_path)
+st.markdown("#### You've selected the following image.")
+st.image(image, caption="Image selected", width=300)
+
+
+if st.button('**Make predictions**', key='another_action_button'):
+ results_path = r"data/image_classification"
+ df_results = load_data_pickle(results_path, "results.pkl")
+ predicted_category = df_results.loc[df_results["image"]==selected_image2,"class"].to_numpy()
+
+ # # Prétraitement et prédiction
+ # image_preprocessed = preprocess(image)
+ # predicted_tensor, _ = predict(image_preprocessed, model)
+
+ # predicted_idx = predicted_tensor.item()
+ # predicted_category = categories[predicted_idx]
+
+ # Affichage de la prédiction avec la catégorie prédite
+ if predicted_category == "pituitary":
+ st.warning(f"**Results**: Pituitary tumor was detected. ")
+ elif predicted_category == "no tumor":
+ st.success(f"**Results**: No tumor was detected.")
+ elif predicted_category == "meningnoma":
+ st.error(f"**Results**: Meningioma was detected.")
+
+
+ # image_path = os.path.join(DATA_DIR, selected_image2)
+ # image = Image.open(image_path)
+ # st.image(image, caption="Image selected", width=450)
+
+
+
diff --git a/pages/object_detection.py b/pages/object_detection.py
index b334f0924ccd4a78595e0ae6c0143c1ac99bf23b..7357098470d2f6b83b32d19721c31c27094c3db8 100644
--- a/pages/object_detection.py
+++ b/pages/object_detection.py
@@ -12,6 +12,9 @@ from PIL import Image
from transformers import YolosFeatureExtractor, YolosForObjectDetection
from torchvision.transforms import ToTensor, ToPILImage
from annotated_text import annotated_text
+from st_pages import add_indentation
+
+#add_indentation()
st.set_page_config(layout="wide")
@@ -134,7 +137,8 @@ cats = ['shirt, blouse', 'top, t-shirt, sweatshirt', 'sweater', 'cardigan', 'jac
######################################################################################################################################
-st.markdown("# Object Detection")
+#st.image("images/od_header.jpg")
+st.markdown("# Object Detection 📹")
st.markdown("### What is Object Detection ?")
@@ -155,28 +159,38 @@ st.markdown("""Common applications of Object Detection include:
- **Retail** 🏬 : Implementing smart shelves and checkout systems that use object detection to track inventory and monitor stock levels.
- **Healthcare** 👨⚕️: Detecting and tracking anomalies in medical images, such as tumors or abnormalities, for diagnostic purposes or prevention.
- **Manufacturing** 🏭: Quality control on production lines by detecting defects or irregularities in manufactured products. Ensuring workplace safety by monitoring the movement of workers and equipment.
-- **Fashion and E-commerce** 🛍️ : Improving virtual try-on experiences by accurately detecting and placing virtual clothing items on users.
""")
+
+############################# USE CASE #############################
st.markdown(" ")
st.divider()
-st.markdown("## Fashion Object Detection 👗")
+st.markdown("# Fashion Object Detection 👗")
# st.info("""This use case showcases the application of **Object detection** to detect clothing items/features on images.
# The images used were gathered from Dior's""")
-st.info("""In this use case, we are going to identify and locate different articles of clothings, as well as finer details such as a collar or pocket using an object detection AI model.
- The images used were taken from **Dior's 2020 Fall Women Fashion Show**.""")
+st.info("""**Object detection models** can very valuable for fashion retailers wishing to improve customer experience by providing, for example, **product recognition**, **visual search**
+ and even **virtual try-ons**.
+ In this use case, we are going to show an object detection model that as able to identify and locate different articles of clothings on fashipn show images.
+ """)
st.markdown(" ")
+st.markdown(" ")
+
+# images_dior = [os.path.join("data/dior_show/images",url) for url in os.listdir("data/dior_show/images") if url != "results"]
+# columns_img = st.columns(4)
+# for img, col in zip(images_dior,columns_img):
+# with col:
+# st.image(img)
+
+_, col, _ = st.columns([0.1,0.8,0.1])
+with col:
+ st.image("images/fashion_od2.png")
-images_dior = [os.path.join("data/dior_show/images",url) for url in os.listdir("data/dior_show/images") if url != "results"]
-columns_img = st.columns(4)
-for img, col in zip(images_dior,columns_img):
- with col:
- st.image(img)
st.markdown(" ")
+st.markdown(" ")
st.markdown("### About the model 📚")
@@ -204,12 +218,12 @@ st.markdown("")
############## SELECT AN IMAGE ###############
st.markdown("### Select an image 🖼️")
-#st.markdown("""**Select an image that you wish to run the Object Detection model on.**""")
+st.markdown("""The images provided were taken from **Dior's 2020 Fall Women Fashion Show**""")
image_ = None
fashion_images_path = r"data/dior_show/images"
list_images = os.listdir(fashion_images_path)
-image_name = st.selectbox("Select the image you wish to run the model on", list_images)
+image_name = st.selectbox("Choose an image", list_images)
image_ = os.path.join(fashion_images_path, image_name)
st.image(image_, width=300)
@@ -278,13 +292,19 @@ dict_cats_final = {key:value for (key,value) in dict_cats.items() if value in se
############## SELECT A THRESHOLD ###############
st.markdown("### Define a threshold for predictions 🔎")
-st.markdown("""This section allows you to control how confident you want your model to be with its predictions.
- Objects that are given a lower score than the chosen threshold will be ignored in the final results.""", unsafe_allow_html=True)
-
-st.markdown(" Below is an example of probability scores given by object detection models for each element detected.")
+st.markdown("""In this section, you can select a threshold for the model's final predictions.
+ Objects that are given a lower score than the chosen threshold will be ignored in the final results""", unsafe_allow_html=True)
+st.info("""**Note**: Object detection models detect objects using bounding boxes as well as assign objects to specific classes.
+ Each object is given a class based on a probability score computed by the model. A high probability signals that the model is confident in its prediction.
+ On the contrary, a lower probability score signals a level of uncertainty.""")
+st.markdown(" ")
+#st.markdown("The images below are examples of probability scores given by object detection models for each element detected.")
-st.image("images/probability_od.png", caption="Example with bounding boxes and probability scores given by object detection models")
+_, col, _ = st.columns([0.2,0.6,0.2])
+with col:
+ st.image("images/probability_od.png",
+ caption="Examples of object detection with bounding boses and probability scores")
st.markdown(" ")
@@ -346,7 +366,8 @@ if run_model:
# PLOT BOUNDING BOX AND BARS/PROBA
col1, col2 = st.columns(2)
with col1:
- #st.markdown("**Bounding box results**")
+ st.markdown(" ")
+ st.markdown("##### 1. Bounding box results")
bboxes_scaled = rescale_bboxes(outputs.pred_boxes[0, keep].cpu(), image.size)
colors_used = plot_results(image, probas[keep], bboxes_scaled)
@@ -356,7 +377,12 @@ if run_model:
st.error("""No objects were detected on the image.
Decrease your threshold or choose differents items to detect.""")
else:
+ st.markdown(" ")
+ st.markdown("##### 2. Probability score of each object")
+ st.info("""**Note**: Some items might have been detected more than once on the image.
+ For these items, we've computed the average probability score across all detections.""")
visualize_probas(probas, threshold, colors_used)
+
else:
diff --git a/pages/recommendation_system.py b/pages/recommendation_system.py
index 9ef93cc63523e185628943c23c1ff837de548797..b1bbf1c2224d5bf173a57e14561cd7edecda8e0e 100644
--- a/pages/recommendation_system.py
+++ b/pages/recommendation_system.py
@@ -6,21 +6,24 @@ import pickle
import os
import altair as alt
import plotly.express as px
+
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from annotated_text import annotated_text
from utils import load_data_pickle, load_model_pickle, load_data_csv
+from st_pages import add_indentation
+#add_indentation()
st.set_page_config(layout="wide")
-
-st.markdown("# Recommendation system")
+#st.image("images/recom_system_header.png")
+st.markdown("# Recommendation systems 🛒")
st.markdown("### What is a Recommendation System ?")
-st.info("""**Recommendation systems** are AI algorithms built to **suggest** or **recommend** **products** to consumers.
+st.info("""**Recommendation systems** are algorithms built to **suggest** or **recommend** **products** to consumers.
They are very common in social media platforms such as TikTok, Youtube or Instagram or e-commerce websites as they help improve and personalize a consumer's experience.""")
st.markdown("""There are two methods to build recommendation systems:
@@ -123,7 +126,7 @@ if select_usecase == "Movie recommendation system 📽️":
# Description of the use case
- st.markdown("""## Movie Recommendation System 📽️""")
+ st.markdown("""# Movie Recommendation System 📽️""")
#st.info(""" """)
@@ -349,7 +352,10 @@ From gleaming skyscrapers to vibrant neighborhoods, this cosmopolitan gem in Sou
if see_top_places:
st.markdown(top_places)
- st.markdown("""## Hotel Recommendation System 🛎️""")
+
+
+
+ st.markdown("""# Hotel Recommendation System 🛎️""")
st.info("""This use case shows how you can create personalized hotel recommendations using a recommendation system with **content-based Filtering**.
Analyzing location, amenities, price, and reviews, the model suggests tailored hotel recommendation based on the user's preference.
diff --git a/pages/sentiment_analysis.py b/pages/sentiment_analysis.py
index 4c2fb464960eed27c8a56e5f056477f10e163d5c..bc314a9798ca9e2ad4a831b2990a4ea57d5befa6 100644
--- a/pages/sentiment_analysis.py
+++ b/pages/sentiment_analysis.py
@@ -7,11 +7,14 @@ import pandas as pd
import numpy as np
import altair as alt
import plotly.express as px
+from st_pages import add_indentation
from pysentimiento import create_analyzer
from utils import load_data_pickle
st.set_page_config(layout="wide")
+#add_indentation()
+
def clean_text(text):
pattern_punct = r"[^\w\s.',:/]"
@@ -30,8 +33,8 @@ def load_sa_model():
-
-st.markdown("# Sentiment Analysis")
+#st.image("images/sa_header.jpg")
+st.markdown("# Sentiment Analysis 👍")
st.markdown("### What is Sentiment Analysis ?")
@@ -62,14 +65,14 @@ st.divider()
#use_case = st.selectbox("", sa_pages, label_visibility="collapsed")
-st.markdown("### Customer Reviews 📝")
+st.markdown("# Customer Review Analysis 📝")
st.info("""In this use case, **sentiment analysis** is used to predict the **polarity** (negative, neutral, positive) of customer reviews.
You can try the application by using the provided starbucks customer reviews, or by writing your own.""")
st.markdown(" ")
-_, col, _ = st.columns([0.25,0.5,0.25])
+_, col, _ = st.columns([0.2,0.6,0.2])
with col:
- st.image("images/reviews.jpg")
+ st.image("images/reviews.png",use_column_width=True)
st.markdown(" ")
diff --git a/pages/supervised_unsupervised_page.py b/pages/supervised_unsupervised_page.py
index 1c5d3d54a46d239390b9efd1ccdaecd1f159325a..af706c183f1941a3a1502b16d64a136c7ef96a17 100644
--- a/pages/supervised_unsupervised_page.py
+++ b/pages/supervised_unsupervised_page.py
@@ -7,12 +7,14 @@ import plotly.express as px
from PIL import Image
from utils import load_data_pickle, load_model_pickle
+from st_pages import add_indentation
from annotated_text import annotated_text
#####################################################################################
# PAGE CONFIG
#####################################################################################
+#add_indentation()
st.set_page_config(layout="wide")
@@ -21,20 +23,21 @@ st.set_page_config(layout="wide")
# INTRO
#####################################################################################
-
-st.markdown("# Supervised vs Unsupervised Learning")
+#st.image("images/ML_header.jpg", use_column_width=True)
+st.markdown("# Supervised vs Unsupervised Learning 🔍")
st.info("""There are two main types of models in the field of Data Science, **Supervised** and **Unsupervised learning** models.
Being able to distinguish which type of model fits your data is an essential step in building any AI project.""")
+st.markdown(" ")
st.markdown(" ")
#st.markdown("## What are the differences between both ?")
col1, col2 = st.columns(2, gap="large")
with col1:
- st.markdown("### Supervised Learning")
- st.markdown("""In supervised learning, models are trained by learning from **labeled data**.
+ st.markdown("## Supervised Learning")
+ st.markdown("""Supervised learning models are trained by learning from **labeled data**.
Labeled data provides to the model the desired output, which it will then use to learn relevant patterns and make predictions.
- A model is first **trained** to make predictions using labeled data
- The trained model can then be used to **predict values** for new data.
@@ -43,15 +46,15 @@ with col1:
st.image("images/supervised_learner.png", caption="An example of supervised learning")
with col2:
- st.markdown("### Unsupervised Learning")
- st.markdown("""In unsupervised learning, models **learn the data's inherent structure** without any explicit guidance on what to look for.
+ st.markdown("## Unsupervised Learning")
+ st.markdown("""Unsupervised learning models learn the data's inherent structure without any explicit guidance on what to look for.
The algorithm will identify any naturally occurring patterns in the dataset using **unlabeled data**.
- They can be useful for applications where the goal is to discover **unknown groupings** in the data.
- They are also used to identify unusual patterns or **outliers**.
""", unsafe_allow_html=True)
st.markdown(" ")
st.image("images/unsupervised_learning.png", caption="An example of unsupervised Learning",
- use_column_width=True)
+ use_column_width=True)
st.markdown(" ")
@@ -88,7 +91,7 @@ if learning_type == "Supervised Learning":
## Description of the use case
st.divider()
- st.markdown("## Credit score classification 💯")
+ st.markdown("# Credit score classification 💯")
st.info("""**Classification** is a type of supervised learning where the goal is to categorize input data into predefined classes or categories.
In this case, we will build a **credit score classification** model that predicts if a client will have a **'Bad'**, **'Standard'** or **'Good'** credit score.""")
st.markdown(" ")
@@ -347,7 +350,7 @@ if learning_type == "Supervised Learning":
## Description of the use case
st.divider()
- st.markdown("## Customer churn prediction ❌")
+ st.markdown("# Customer churn prediction ❌")
st.info(""" Classification is a type of supervised learning model whose goal is to categorize input data into predefined classes or categories.
In this example, we will build a **customer churn classification model** that can predict whether a customer is likely to leave a company's service in the future using historical data.
""")
@@ -670,7 +673,7 @@ if learning_type == "Unsupervised Learning":
# st.divider()
st.divider()
- st.markdown("## Customer Segmentation (Clustering) 🧑🤝🧑")
+ st.markdown("# Customer Segmentation 🧑🤝🧑")
st.info("""**Unsupervised learning** models are valulable tools for cases where you want your model to discover patterns by itself, without having to give it examples to learn from (especially if you don't have labeled data).
In this use case, we will show how they can be useful for **Customer Segmentation** to detect unknown groups of clients in a company's customer base.
diff --git a/pages/timeseries_analysis.py b/pages/timeseries_analysis.py
index 920aae44d552af3dd0c26ea89af0fc37db60d397..753f5b149c2e855144fc6ced72d7ee88558f25ed 100644
--- a/pages/timeseries_analysis.py
+++ b/pages/timeseries_analysis.py
@@ -9,9 +9,11 @@ from prophet import Prophet
from datetime import date
from utils import load_data_pickle
from sklearn.metrics import root_mean_squared_error
+from st_pages import add_indentation
st.set_page_config(layout="wide")
+#add_indentation()
@st.cache_data(ttl=3600, show_spinner=False)
def forecast_prophet(train, test, col=None):
@@ -25,10 +27,12 @@ def forecast_prophet(train, test, col=None):
return model, forecast
+
###################################### TITLE ####################################
#st.image("images/ts_header.png")
-st.markdown("# Time Series Forecasting")
+#st.image("images/ts_header.png", use_column_width=True)
+st.markdown("# Time Series Forecasting 📈")
st.markdown("### What is Time Series Forecasting ?")
st.info("""Time series forecasting models are AI models built to make accurate predictions about future values using historical data.
@@ -70,7 +74,7 @@ data_model["ds"] = pd.to_datetime(data_model["ds"])
# BEGINNING OF USE CASE
st.divider()
-st.markdown("### Power Consumption Forecasting ⚡")
+st.markdown("# Power Consumption Forecasting ⚡")
#st.markdown(" ")
st.info("""In this use case, a time series forecasting model is used to predict the **energy consumption** (or **Global Active Power**) of a household using historical data.
@@ -85,7 +89,7 @@ with col:
st.markdown(" ")
st.markdown(" ")
-st.markdown("#### About the data 📋")
+st.markdown("### About the data 📋")
st.markdown("""You were provided data from the **daily energy consumption** of a household between January 2007 and November 2010 (46 months).
The goal is to forecast the **Global active power** being produced daily by the household.
@@ -176,7 +180,7 @@ with tab5:
st.markdown(" ")
st.markdown(" ")
-st.markdown("#### Forecast model 📈")
+st.markdown("### Forecast model 📈")
st.markdown("""The forecasting model used in this use case allows **additional data** to be used for training.
Try adding more data to the model as it can help improve its performance and accuracy.""")
diff --git a/pages/topic_modeling.py b/pages/topic_modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..50074c4367cb23b9576f94cc4baecceb2cc40ae7
--- /dev/null
+++ b/pages/topic_modeling.py
@@ -0,0 +1,232 @@
+
+import json
+import os
+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+import plotly.express as px
+
+from utils import load_data_csv, load_data_pickle, load_model_pickle, load_numpy
+from st_pages import add_indentation
+
+# from wordcloud import WordCloud
+
+# Page configuration
+#st.set_page_config(layout="wide")
+#add_indentation()
+
+
+# Function to generate word clouds
+# def generate_wordcloud(text):
+# wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
+# fig, ax = plt.subplots()
+# ax.imshow(wordcloud, interpolation='bilinear')
+# ax.axis('off')
+# return fig
+
+
+st.set_page_config(layout="wide")
+
+## Start of Streamlit app
+st.title("Topic Modeling 📚")
+
+
+st.markdown("### What is Topic Modeling ?")
+
+st.info("""
+Topic modeling is a text-mining technique used to **identify topics within a collection of documents**.
+It is a useful tool for organizing and summarizing vast amounts of textual data as well as automate the discovery of hidden thematic structures in a corpus of text data, without any prior knowledge.
+""")
+
+st.markdown(" ")
+_, col, _ = st.columns([0.25,0.4,0.35])
+with col:
+ st.image("images/topic_modeling.gif", caption="An example of Topic Modeling", use_column_width=True)
+
+
+st.markdown("""Common applications of Topic Modeling include:
+- **Search Engine Optimization (SEO): 🔎** Determine the main topics/keywords present on a website to optimize content and improve search engine rankings.
+- **Customer Support** ✍️: Analyze customer support tickets, emails, and chat transcripts to identify common questions and complaints.
+- **Fraud Detection and Risk Management: 🏦** : Detect fraudulent activities, compliance violations, and operational risks by analyzing textual data such as transaction descriptions, audit reports and regulatory filings.
+- **Market Research 🌎**: Gain competitive intelligence and make informed decisions regarding product development, marketing strategies, and market positioning by analyzing research reports and industry news.
+""")
+
+
+st.markdown(" ")
+st.divider()
+
+st.markdown("# Topic modeling on product descriptions 🛍️")
+st.info("""In this use case, we will use a topic model to categorize around 20 000 e-commerce products using text descriptions and identify
+ the main types of products solds.""")
+
+_, col, _ = st.columns([0.2,0.6,0.2])
+with col:
+ st.image("images/e-commerce.jpg")
+
+st.markdown(" ")
+
+# Load data
+path_data = "data/topic-modeling"
+# data = load_data_csv(path_data,"data-topicmodeling.csv")
+
+# Load the topic data
+topic_info = load_data_pickle(path_data, 'topic_info.pkl')
+
+
+
+##### ABOUT THE USE CASE
+st.markdown("#### About the data 📋")
+st.markdown("""You were provided a dataset with around 20 000 products from a large e-commerce retailer.
+ This dataset contains the products' title and description on the website.""", unsafe_allow_html=True)
+st.info("""**Note**: Some of the descriptions featured below are shown in their 'raw' form, meaning they contain unprocessed html code and special characters.
+ These descriptions were first 'cleaned' (by removing unwanted characters) before being used in the model.""")
+see_data = st.checkbox('**See the data**', key="credit_score_data") # Corrected the key to use an underscore
+if see_data:
+ st.markdown(" ")
+ st.warning("This view only shows a subset of the 20 000 product description used.")
+ data = load_data_pickle(path_data,"data-tm-view.pkl")
+ data_show = data[["TITLE", "DESCRIPTION"]]
+ st.dataframe(data_show.reset_index(drop=True), use_container_width=True)
+
+
+st.markdown(" ")
+st.markdown(" ")
+
+
+
+# RUN THE MODEL
+st.markdown("#### About the model 📚")
+st.markdown("""**Topic models** can be seen as unsupervised clustering models where text documents are grouped into topics/clusters based on their similarities.
+ We will use here a topic model to automatically categorize/group the retailer's products based on their description,
+ as well as understand what are the most common type of products being sold.""", unsafe_allow_html=True)
+
+st.info("""**Note**: In topic modeling, the final topics are represented by the model using 'top words'.
+ A topic's top words are chosen based on how much they appear in the topic's documents.""")
+
+def show_results():
+ st.markdown("#### See the results ☑️")
+ tab1, tab2 = st.tabs(["Overall results", "Specific Topic Details", ])# "Search Similar Topics"])
+ st.markdown(" ")
+
+ # Tab 1: Summary Table
+ with tab1:
+ st.header("Overall results")
+ st.markdown("""This tab showcases all of the **topics identified** within the product dataset, each topic's most significant words (**top words**), as well as the **proportion**
+ of products that were assigned to the specific topic.""")
+
+ summary_table = topic_info[['Title','Representation', 'Percentage']].copy()
+ summary_table['Top Words'] = summary_table['Representation'].apply(lambda x: x[:5]) #:5
+ summary_table = summary_table[["Title","Top Words","Percentage"]]
+ summary_table.rename({"Title":"Topic Title"}, axis=1, inplace=True)
+
+ st.data_editor(
+ summary_table, #.loc[df_results_tab1["Customer ID"].isin(filter_customers)],
+ column_config={
+ "Percentage": st.column_config.ProgressColumn(
+ "Proportion %",
+ help="Propotion of documents within each topic",
+ format="%.1f%%",
+ min_value=0,
+ max_value=100)},
+ use_container_width=True
+ )
+
+ st.info("""**Note**: The topic 'titles' were not provided by the model but instead were generated by feeding the topic's top words to an LLM.
+ Traditional topic models define topics using representative/top words but weren't built to generate a specific title to each topic.""")
+
+ # Tab 2: Specific Topic Details
+ with tab2:
+
+ # Load top words
+ with open(os.path.join(path_data,"topics_top_words.json"), "r") as json_file:
+ top_words_dict = json.load(json_file)
+
+ # Load similarity df and scores
+ similarity_df = load_data_pickle(path_data, "similarity_topic_df.pkl")
+ similarity_scores = load_numpy(path_data, "similarity_topic_scores.npy")
+
+ #st.markdown(" ")
+ st.header("Learn more about each topic")
+ st.markdown("""You can **select a specific topic** to get more information on its **top words**, as well as the
+ **other topics that are most similar to it**.""")
+ # st.info("""In this section, you can find more information on each of the topics identified by the model.
+ # This includes the topic's a full list of its top words, the importance of each of these words, as well as the top five topics that are most similar to it.""")
+
+ st.markdown(" ")
+
+ # Select topic
+ topics = topic_info["Title"].sort_values().to_list()
+ selected_topic = st.selectbox('**Select a Topic**', topics)
+ selected_topic_id = topic_info[topic_info['Title'] == selected_topic]["Topic"].to_numpy()[0] + 1
+
+ st.markdown(" ")
+ col1, col2 = st.columns(2)
+
+ # Top words
+ with col1:
+ top_words_df = pd.DataFrame(top_words_dict[selected_topic], columns=["Word", "Importance"])
+ top_words_df.sort_values(by=["Importance"], ascending=False, inplace=True)
+ top_words_df["Importance"] = top_words_df["Importance"].round(2)
+
+ fig = px.bar(top_words_df, x='Word', y='Importance', color="Importance", title="Top words", text_auto=True)
+ fig.update_layout(yaxis=dict(range=[0, 1]), xaxis_title="", showlegend=False)
+ st.plotly_chart(fig, use_container_width=True)
+ st.info("""**Note:** Each score was computed based on the words importance in the particular topic using
+ a popular metric in NLP called TF-IDF (Term Frequency-Inverse Document Frequency). """)
+
+
+ # Similar topics to the selected topic
+ with col2:
+ similarity_df = similarity_df.loc[similarity_df["Topic"]==selected_topic]
+ similarity_df["scores"] = 100*similarity_scores[selected_topic_id,:]
+ similarity_df.columns = ["Original Topic", "Rank", "Topic", "Similarity (%)"]
+
+ fig = px.bar(similarity_df, y='Similarity (%)', x='Topic', color="Topic", title="Five most similar topics", text_auto=True)
+ fig.update_layout(yaxis=dict(range=[0, 100]),
+ xaxis_title="",
+ showlegend=False)
+
+ st.plotly_chart(fig, use_container_width=True)
+ st.info("""**Note:** Topics with a high similarity score can be merged together as to reduce the number of topics, as
+ well as improve the topics coherence.""")
+
+
+
+
+ # words_for_cloud = ' '.join(selected_topic_info.iloc[0]['Representation'])
+ # fig_wordcloud = generate_wordcloud(words_for_cloud)
+ # st.pyplot(fig_wordcloud)
+
+ # Display most representative document
+ # representative_doc = selected_topic_info.iloc[0]['Representative_Docs'][1]
+ # st.write(representative_doc)
+
+
+ # Tab 3: Search for similar topics
+ # with tab3:
+ # st.header("Search for Similar Topics")
+ # search_word = st.text_input("Enter a search word to find similar topics:")
+ # if search_word:
+ # st.write(f"Results for similar topics to '{search_word}' would be displayed here.")
+
+ return None
+
+if 'button_clicked' not in st.session_state:
+ st.session_state['button_clicked'] = False
+
+def run_model():
+ run_model = st.button("**Run the model**", type="primary")
+ st.markdown(" ")
+ st.markdown(" ")
+
+ if not st.session_state['button_clicked']:
+ if run_model:
+ show_results()
+ st.session_state['button_clicked'] = True
+ else:
+ show_results()
+
+run_model()
+
+
+
diff --git a/utils.py b/utils.py
index 5a5b9fa5548c41c2ed5099100ef636c11065cbc7..63857dc0eb5fff9e682273e3fbec228e8d8017b2 100644
--- a/utils.py
+++ b/utils.py
@@ -4,6 +4,7 @@ import pickle
import base64
import streamlit as st
import pandas as pd
+import numpy as np
from htbuilder import HtmlElement, div, hr, a, p, img, styles
from pathlib import Path
@@ -35,11 +36,16 @@ def load_model_pickle(path, file):
model = pickle.load(open(path_file, 'rb'))
return model
+@st.cache_data(ttl=3600, show_spinner=False)
+def load_numpy(path,file):
+ array = np.load(os.path.join(path,file))
+ return array
+
###################### LOAD MODEL HUGGINGFACE #############################
-st.cache_data(ttl=3600)
+st.cache_data(ttl=3600, show_spinner=False)
def load_model_huggingface(repo_id, token, task=None):
""" Load model using Huggingface's Inference API
"""
@@ -47,6 +53,8 @@ def load_model_huggingface(repo_id, token, task=None):
return model
+
+
#################### LOAD DATA FROM GOOGLE DRIVE ###################
# @st.cache_data(ttl=3600, show_spinner=False)
@@ -93,20 +101,6 @@ def load_model_huggingface(repo_id, token, task=None):
# return df
-# @st.cache_data(ttl=3600, show_spinner=False)
-# def load_model_drive(file_content):
-# """ Load model using file_content
-# """
-# model = pickle.load(file_content)
-# return model
-
-
-# def files_in_drive(folder_id, drive_service):
-# results = drive_service.files().list(q=f"'{folder_id}' in parents").execute()
-# files_dict= results.get('files', [])
-
-# return files_dict
-
#################### PASSEWORD #####################