File size: 6,019 Bytes
fb3e3c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
{
"cells": [
{
"cell_type": "markdown",
"id": "34c6730a-3af1-421e-b380-85b6659dfb1e",
"metadata": {},
"source": [
"# Movie Recommendation prediction using ML"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "abd4daf2-55b8-405e-a116-29f82aac5667",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Rank Movie_name Year Certificate Runtime_in_min \\\n",
"66 67 Spider-Man: Into the Spider-Verse 2018 PG 117 \n",
"157 158 Spider-Man: No Way Home 2021 PG-13 148 \n",
"\n",
" Genre Metascore Gross_in_$_M Rating_from_10 \n",
"66 Animation, Action, Adventure 87.0 190.24 8.4 \n",
"157 Action, Adventure, Fantasy 71.0 804.75 8.2 \n"
]
}
],
"source": [
"movies = pd.read_csv('./imdb.csv')\n",
"contains_spider = movies['Movie_name'].str.contains('Spider', case=False, na=False)\n",
"\n",
"# Filter movies with 'Spider' in the title\n",
"spider_movies = movies[contains_spider]\n",
"\n",
"print(spider_movies)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "ee5586e1-75a9-4331-a2b7-1589bf2443bb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['movie_recommender_model.pkl']"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.metrics.pairwise import cosine_similarity\n",
"import joblib\n",
"\n",
"# Load the data\n",
"movies = pd.read_csv('./imdb.csv')\n",
"\n",
"# Select the required columns\n",
"movies = movies[['Rank', 'Movie_name', 'Rating_from_10', 'Certificate', 'Genre', 'Year', 'Runtime_in_min']]\n",
"\n",
"# Combine Certificate and Genre to create tags\n",
"movies['tags'] = movies['Certificate'] + ' ' + movies['Genre']\n",
"\n",
"# Remove 'Certificate' and 'Genre'\n",
"new_data = movies.drop(columns=['Certificate', 'Genre'])\n",
"\n",
"# Drop missing values\n",
"new_cleaned = new_data.dropna()\n",
"\n",
"# Vectorizing the 'tags' column\n",
"cv = CountVectorizer(max_features=5000, stop_words='english')\n",
"vectorized_data = cv.fit_transform(new_cleaned['tags']).toarray()\n",
"\n",
"# Calculate cosine similarities\n",
"similarity = cosine_similarity(vectorized_data)\n",
"\n",
"# Define the Recommender class\n",
"class MovieRecommender:\n",
" def __init__(self, similarity, movie_data):\n",
" self.similarity = similarity\n",
" self.movie_data = movie_data\n",
"\n",
" def recommend(self, movie_title):\n",
" # Check if the movie title exists in the dataset\n",
" if movie_title not in self.movie_data['Movie_name'].values:\n",
" # Find similar movie names containing the keyword\n",
" similar_movies = self.movie_data[self.movie_data['Movie_name'].str.contains(movie_title, case=False, na=False)]\n",
" if not similar_movies.empty:\n",
" suggestions = similar_movies['Movie_name'].tolist()\n",
" return f\"Movie '{movie_title}' not found. Did you mean one of these?\\n\" + \"\\n\".join(suggestions)\n",
" else:\n",
" return f\"Movie '{movie_title}' not found. Please recheck the movie name.\"\n",
" \n",
" # Find the index of the movie in the DataFrame using 'Movie_name' column\n",
" index = self.movie_data[self.movie_data['Movie_name'] == movie_title].index[0]\n",
" \n",
" # Calculate similarity scores, sort them, and store the titles of the top 5 movies\n",
" distances = sorted(list(enumerate(self.similarity[index])), reverse=True, key=lambda vector: vector[1])\n",
" recommendations = [self.movie_data.iloc[i[0]]['Movie_name'] for i in distances[1:6]] # Skip the movie itself\n",
" \n",
" return recommendations\n",
"\n",
"model = MovieRecommender(similarity, new_cleaned)\n",
"\n",
"# recommendations = model.recommend(\"Spider\")\n",
"# print(recommendations)\n",
"\n",
"# Save the model to a file\n",
"joblib.dump(model, 'movie_recommender_model.pkl')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b5ee717-664e-45d9-9671-eb52db072336",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 33,
"id": "833b731e-999e-4578-92b8-f51c29ed4f30",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['The Princess Bride', 'Harry Potter and the Prisoner of Azkaban', \"Harry Potter and the Sorcerer's Stone\", 'Harry Potter and the Deathly Hallows: Part 2', 'Harry Potter and the Deathly Hallows: Part 1']\n"
]
}
],
"source": [
"model = joblib.load('movie_recommender_model.pkl')\n",
"recommendations = model.recommend(\"Iron Man\")\n",
"print(recommendations)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b145455-992e-405d-9fdf-cc1cf24a19f4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|