kmrmanish commited on
Commit
2482865
·
1 Parent(s): 84389e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -37
app.py CHANGED
@@ -2,38 +2,37 @@ import streamlit as st
2
  import difflib
3
  import pandas as pd
4
  import numpy as np
5
-
6
-
7
- # for text data preprocessing
8
  import re
9
  import nltk
10
- nltk.download('stopwords')
11
  from nltk.corpus import stopwords
12
  from nltk.stem.porter import PorterStemmer
13
  from sklearn.feature_extraction.text import TfidfVectorizer
14
  from sklearn.metrics.pairwise import cosine_similarity
15
 
 
 
16
 
 
17
  lpi_df = pd.read_csv('Learning Pathway Index.csv')
18
 
19
- lpi_df.rename(columns={"Course / Learning material": "Course_Learning_Material",
20
- "Course Level": "Course_Level",
21
- "Type (Free or Paid)":"Type",
22
- "Module / Sub-module \nDifficulty level": "Difficulty_Level",
23
- "Keywords / Tags / Skills / Interests / Categories":"Keywords"
24
- }, inplace=True)
25
-
26
- lpi_df['combined_features'] = lpi_df['Course_Learning_Material']+' '+lpi_df['Source']+' '+lpi_df['Course_Level']+' '+lpi_df['Type']+' '+lpi_df['Module']+' '+lpi_df['Difficulty_Level']+' '+lpi_df['Keywords']
27
-
28
 
 
 
29
 
 
30
  combined_features = lpi_df['combined_features']
31
-
32
  porter_stemmer = PorterStemmer()
33
 
34
-
35
  def stemming(content):
36
- stemmed_content = re.sub('[^a-zA-Z]',' ',content)
37
  stemmed_content = stemmed_content.lower()
38
  stemmed_content = stemmed_content.split()
39
  stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
@@ -42,20 +41,20 @@ def stemming(content):
42
 
43
  combined_features = combined_features.apply(stemming)
44
 
45
-
46
  vectorizer = TfidfVectorizer()
47
-
48
  vectorizer.fit(combined_features)
49
-
50
  combined_features = vectorizer.transform(combined_features)
51
-
52
  similarity = cosine_similarity(combined_features)
53
 
54
-
55
-
 
 
 
 
56
 
57
  st.title('Learning Pathway Index Course Recommendation')
58
-
59
  user_input = st.text_input('Enter What You Want to Learn : ')
60
 
61
  if user_input:
@@ -68,18 +67,20 @@ if user_input:
68
  similarity_score = list(enumerate(similarity[index_of_the_course]))
69
  sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)
70
 
71
- st.write('Courses suggested for you :')
72
-
73
- i = 1
74
- for course in sorted_similar_course:
75
- index = course[0]
76
- title_from_index = lpi_df[lpi_df.index == index]['Module'].values[0]
77
- if i < 30:
78
- st.write(f"{i}. {title_from_index}")
79
- i += 1
80
-
81
- if i == 1:
82
- st.write('No close matches found.')
 
 
 
83
  else:
84
- st.write('No close matches found.')
85
-
 
2
  import difflib
3
  import pandas as pd
4
  import numpy as np
 
 
 
5
  import re
6
  import nltk
 
7
  from nltk.corpus import stopwords
8
  from nltk.stem.porter import PorterStemmer
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
 
12
+ # Download NLTK stopwords if not already done
13
+ nltk.download('stopwords')
14
 
15
+ # Read the data
16
  lpi_df = pd.read_csv('Learning Pathway Index.csv')
17
 
18
+ # Rename columns
19
+ lpi_df.rename(columns={
20
+ "Course / Learning material": "Course_Learning_Material",
21
+ "Course Level": "Course_Level",
22
+ "Type (Free or Paid)": "Type",
23
+ "Module / Sub-module \nDifficulty level": "Difficulty_Level",
24
+ "Keywords / Tags / Skills / Interests / Categories": "Keywords"
25
+ }, inplace=True)
 
26
 
27
+ # Combine features
28
+ lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords']
29
 
30
+ # Text preprocessing
31
  combined_features = lpi_df['combined_features']
 
32
  porter_stemmer = PorterStemmer()
33
 
 
34
  def stemming(content):
35
+ stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
36
  stemmed_content = stemmed_content.lower()
37
  stemmed_content = stemmed_content.split()
38
  stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
 
41
 
42
  combined_features = combined_features.apply(stemming)
43
 
44
+ # TF-IDF and similarity
45
  vectorizer = TfidfVectorizer()
 
46
  vectorizer.fit(combined_features)
 
47
  combined_features = vectorizer.transform(combined_features)
 
48
  similarity = cosine_similarity(combined_features)
49
 
50
+ # Streamlit app
51
+ st.set_page_config(
52
+ page_title="Course Recommendation App",
53
+ page_icon="✅",
54
+ layout="wide",
55
+ )
56
 
57
  st.title('Learning Pathway Index Course Recommendation')
 
58
  user_input = st.text_input('Enter What You Want to Learn : ')
59
 
60
  if user_input:
 
67
  similarity_score = list(enumerate(similarity[index_of_the_course]))
68
  sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)
69
 
70
+ st.subheader('Courses suggested for you:')
71
+ with st.beta_container():
72
+ col1, col2 = st.beta_columns(2)
73
+ for i, course in enumerate(sorted_similar_course[:30], start=1):
74
+ index = course[0]
75
+ title_from_index = lpi_df.loc[index, 'Module']
76
+ if i % 2 == 0:
77
+ with col2:
78
+ st.write(f"{i}. {title_from_index}")
79
+ else:
80
+ with col1:
81
+ st.write(f"{i}. {title_from_index}")
82
+
83
+ if len(sorted_similar_course) == 0:
84
+ st.warning('No close matches found.')
85
  else:
86
+ st.warning('No close matches found.')