Spaces:

kmrmanish
/

LPI_Course_Recommendation_System

Sleeping

App Files Files Community

LPI_Course_Recommendation_System / app.py

kmrmanish

Update app.py

5aa7983 over 1 year ago

raw

history blame contribute delete

2.75 kB

	import streamlit as st
	import difflib
	import pandas as pd
	import numpy as np
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem.porter import PorterStemmer
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity

	# Download NLTK stopwords if not already done
	nltk.download('stopwords')

	# Read the data
	lpi_df = pd.read_csv('Learning Pathway Index.csv')

	# Rename columns
	lpi_df.rename(columns={
	"Course / Learning material": "Course_Learning_Material",
	"Course Level": "Course_Level",
	"Type (Free or Paid)": "Type",
	"Module / Sub-module \nDifficulty level": "Difficulty_Level",
	"Keywords / Tags / Skills / Interests / Categories": "Keywords"
	}, inplace=True)

	# Combine features
	lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords']

	# Text preprocessing
	combined_features = lpi_df['combined_features']
	porter_stemmer = PorterStemmer()

	def stemming(content):
	stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
	stemmed_content = stemmed_content.lower()
	stemmed_content = stemmed_content.split()
	stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
	stemmed_content = ' '.join(stemmed_content)
	return stemmed_content

	combined_features = combined_features.apply(stemming)

	# TF-IDF and similarity
	vectorizer = TfidfVectorizer()
	vectorizer.fit(combined_features)
	combined_features = vectorizer.transform(combined_features)
	similarity = cosine_similarity(combined_features)

	# Streamlit app
	st.title('Learning Pathway Index Course Recommendation')
	user_input = st.text_input('Enter What You Want to Learn : ')

	if user_input:
	list_of_all_titles = lpi_df['Module'].tolist()
	find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)

	if find_close_match:
	close_match = find_close_match[0]
	index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
	similarity_score = list(enumerate(similarity[index_of_the_course]))
	sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)

	st.subheader('Courses suggested for you:')
	for i, course in enumerate(sorted_similar_course[:30], start=1):
	index = course[0]
	title_from_index = lpi_df.loc[index, 'Module']
	st.write(f"{i}. {title_from_index}")

	if len(sorted_similar_course) == 0:
	st.write('No close matches found.')
	else:
	st.write('No close matches found.')