import streamlit as st import difflib import pandas as pd import numpy as np import re import nltk from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # Download NLTK stopwords if not already done nltk.download('stopwords') # Read the data lpi_df = pd.read_csv('Learning Pathway Index.csv') # Rename columns lpi_df.rename(columns={ "Course / Learning material": "Course_Learning_Material", "Course Level": "Course_Level", "Type (Free or Paid)": "Type", "Module / Sub-module \nDifficulty level": "Difficulty_Level", "Keywords / Tags / Skills / Interests / Categories": "Keywords" }, inplace=True) # Combine features lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords'] # Text preprocessing combined_features = lpi_df['combined_features'] porter_stemmer = PorterStemmer() def stemming(content): stemmed_content = re.sub('[^a-zA-Z]', ' ', content) stemmed_content = stemmed_content.lower() stemmed_content = stemmed_content.split() stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')] stemmed_content = ' '.join(stemmed_content) return stemmed_content combined_features = combined_features.apply(stemming) # TF-IDF and similarity vectorizer = TfidfVectorizer() vectorizer.fit(combined_features) combined_features = vectorizer.transform(combined_features) similarity = cosine_similarity(combined_features) # Streamlit app st.set_page_config( page_title="Course Recommendation App", page_icon="✅", layout="wide", ) st.title('Learning Pathway Index Course Recommendation') user_input = st.text_input('Enter What You Want to Learn : ') if user_input: list_of_all_titles = lpi_df['Module'].tolist() find_close_match = difflib.get_close_matches(user_input, list_of_all_titles) if find_close_match: close_match = find_close_match[0] index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0] similarity_score = list(enumerate(similarity[index_of_the_course])) sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True) st.subheader('Courses suggested for you:') with st.beta_container(): col1, col2 = st.beta_columns(2) for i, course in enumerate(sorted_similar_course[:30], start=1): index = course[0] title_from_index = lpi_df.loc[index, 'Module'] if i % 2 == 0: with col2: st.write(f"{i}. {title_from_index}") else: with col1: st.write(f"{i}. {title_from_index}") if len(sorted_similar_course) == 0: st.warning('No close matches found.') else: st.warning('No close matches found.')