File size: 6,019 Bytes
fb3e3c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "34c6730a-3af1-421e-b380-85b6659dfb1e",
   "metadata": {},
   "source": [
    "# Movie Recommendation prediction using ML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "abd4daf2-55b8-405e-a116-29f82aac5667",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    Rank                         Movie_name  Year Certificate  Runtime_in_min  \\\n",
      "66    67  Spider-Man: Into the Spider-Verse  2018          PG             117   \n",
      "157  158            Spider-Man: No Way Home  2021       PG-13             148   \n",
      "\n",
      "                            Genre  Metascore Gross_in_$_M  Rating_from_10  \n",
      "66   Animation, Action, Adventure       87.0       190.24             8.4  \n",
      "157    Action, Adventure, Fantasy       71.0       804.75             8.2  \n"
     ]
    }
   ],
   "source": [
    "movies = pd.read_csv('./imdb.csv')\n",
    "contains_spider = movies['Movie_name'].str.contains('Spider', case=False, na=False)\n",
    "\n",
    "# Filter movies with 'Spider' in the title\n",
    "spider_movies = movies[contains_spider]\n",
    "\n",
    "print(spider_movies)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "ee5586e1-75a9-4331-a2b7-1589bf2443bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['movie_recommender_model.pkl']"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "import joblib\n",
    "\n",
    "# Load the data\n",
    "movies = pd.read_csv('./imdb.csv')\n",
    "\n",
    "# Select the required columns\n",
    "movies = movies[['Rank', 'Movie_name', 'Rating_from_10', 'Certificate', 'Genre', 'Year', 'Runtime_in_min']]\n",
    "\n",
    "# Combine Certificate and Genre to create tags\n",
    "movies['tags'] = movies['Certificate'] + ' ' + movies['Genre']\n",
    "\n",
    "# Remove 'Certificate' and 'Genre'\n",
    "new_data = movies.drop(columns=['Certificate', 'Genre'])\n",
    "\n",
    "# Drop missing values\n",
    "new_cleaned = new_data.dropna()\n",
    "\n",
    "# Vectorizing the 'tags' column\n",
    "cv = CountVectorizer(max_features=5000, stop_words='english')\n",
    "vectorized_data = cv.fit_transform(new_cleaned['tags']).toarray()\n",
    "\n",
    "# Calculate cosine similarities\n",
    "similarity = cosine_similarity(vectorized_data)\n",
    "\n",
    "# Define the Recommender class\n",
    "class MovieRecommender:\n",
    "    def __init__(self, similarity, movie_data):\n",
    "        self.similarity = similarity\n",
    "        self.movie_data = movie_data\n",
    "\n",
    "    def recommend(self, movie_title):\n",
    "        # Check if the movie title exists in the dataset\n",
    "        if movie_title not in self.movie_data['Movie_name'].values:\n",
    "            # Find similar movie names containing the keyword\n",
    "            similar_movies = self.movie_data[self.movie_data['Movie_name'].str.contains(movie_title, case=False, na=False)]\n",
    "            if not similar_movies.empty:\n",
    "                suggestions = similar_movies['Movie_name'].tolist()\n",
    "                return f\"Movie '{movie_title}' not found. Did you mean one of these?\\n\" + \"\\n\".join(suggestions)\n",
    "            else:\n",
    "                return f\"Movie '{movie_title}' not found. Please recheck the movie name.\"\n",
    "        \n",
    "        # Find the index of the movie in the DataFrame using 'Movie_name' column\n",
    "        index = self.movie_data[self.movie_data['Movie_name'] == movie_title].index[0]\n",
    "        \n",
    "        # Calculate similarity scores, sort them, and store the titles of the top 5 movies\n",
    "        distances = sorted(list(enumerate(self.similarity[index])), reverse=True, key=lambda vector: vector[1])\n",
    "        recommendations = [self.movie_data.iloc[i[0]]['Movie_name'] for i in distances[1:6]]  # Skip the movie itself\n",
    "        \n",
    "        return recommendations\n",
    "\n",
    "model = MovieRecommender(similarity, new_cleaned)\n",
    "\n",
    "# recommendations = model.recommend(\"Spider\")\n",
    "# print(recommendations)\n",
    "\n",
    "# Save the model to a file\n",
    "joblib.dump(model, 'movie_recommender_model.pkl')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b5ee717-664e-45d9-9671-eb52db072336",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "833b731e-999e-4578-92b8-f51c29ed4f30",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['The Princess Bride', 'Harry Potter and the Prisoner of Azkaban', \"Harry Potter and the Sorcerer's Stone\", 'Harry Potter and the Deathly Hallows: Part 2', 'Harry Potter and the Deathly Hallows: Part 1']\n"
     ]
    }
   ],
   "source": [
    "model = joblib.load('movie_recommender_model.pkl')\n",
    "recommendations = model.recommend(\"Iron Man\")\n",
    "print(recommendations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b145455-992e-405d-9fdf-cc1cf24a19f4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}