|
import streamlit as st |
|
import difflib |
|
import pandas as pd |
|
import numpy as np |
|
import re |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.stem.porter import PorterStemmer |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
nltk.download('stopwords') |
|
|
|
|
|
lpi_df = pd.read_csv('Learning Pathway Index.csv') |
|
|
|
|
|
lpi_df.rename(columns={ |
|
"Course / Learning material": "Course_Learning_Material", |
|
"Course Level": "Course_Level", |
|
"Type (Free or Paid)": "Type", |
|
"Module / Sub-module \nDifficulty level": "Difficulty_Level", |
|
"Keywords / Tags / Skills / Interests / Categories": "Keywords" |
|
}, inplace=True) |
|
|
|
|
|
lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords'] |
|
|
|
|
|
combined_features = lpi_df['combined_features'] |
|
porter_stemmer = PorterStemmer() |
|
|
|
def stemming(content): |
|
stemmed_content = re.sub('[^a-zA-Z]', ' ', content) |
|
stemmed_content = stemmed_content.lower() |
|
stemmed_content = stemmed_content.split() |
|
stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')] |
|
stemmed_content = ' '.join(stemmed_content) |
|
return stemmed_content |
|
|
|
combined_features = combined_features.apply(stemming) |
|
|
|
|
|
vectorizer = TfidfVectorizer() |
|
vectorizer.fit(combined_features) |
|
combined_features = vectorizer.transform(combined_features) |
|
similarity = cosine_similarity(combined_features) |
|
|
|
|
|
st.title('Learning Pathway Index Course Recommendation') |
|
user_input = st.text_input('Enter What You Want to Learn : ') |
|
|
|
if user_input: |
|
list_of_all_titles = lpi_df['Module'].tolist() |
|
find_close_match = difflib.get_close_matches(user_input, list_of_all_titles) |
|
|
|
if find_close_match: |
|
close_match = find_close_match[0] |
|
index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0] |
|
similarity_score = list(enumerate(similarity[index_of_the_course])) |
|
sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True) |
|
|
|
st.subheader('Courses suggested for you:') |
|
for i, course in enumerate(sorted_similar_course[:30], start=1): |
|
index = course[0] |
|
title_from_index = lpi_df.loc[index, 'Module'] |
|
st.write(f"{i}. {title_from_index}") |
|
|
|
if len(sorted_similar_course) == 0: |
|
st.write('No close matches found.') |
|
else: |
|
st.write('No close matches found.') |
|
|