File size: 6,019 Bytes

fb3e3c7

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "34c6730a-3af1-421e-b380-85b6659dfb1e",
   "metadata": {},
   "source": [
    "# Movie Recommendation prediction using ML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "abd4daf2-55b8-405e-a116-29f82aac5667",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    Rank                         Movie_name  Year Certificate  Runtime_in_min  \\\n",
      "66    67  Spider-Man: Into the Spider-Verse  2018          PG             117   \n",
      "157  158            Spider-Man: No Way Home  2021       PG-13             148   \n",
      "\n",
      "                            Genre  Metascore Gross_in_$_M  Rating_from_10  \n",
      "66   Animation, Action, Adventure       87.0       190.24             8.4  \n",
      "157    Action, Adventure, Fantasy       71.0       804.75             8.2  \n"
     ]
    }
   ],
   "source": [
    "movies = pd.read_csv('./imdb.csv')\n",
    "contains_spider = movies['Movie_name'].str.contains('Spider', case=False, na=False)\n",
    "\n",
    "# Filter movies with 'Spider' in the title\n",
    "spider_movies = movies[contains_spider]\n",
    "\n",
    "print(spider_movies)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "ee5586e1-75a9-4331-a2b7-1589bf2443bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['movie_recommender_model.pkl']"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "import joblib\n",
    "\n",
    "# Load the data\n",
    "movies = pd.read_csv('./imdb.csv')\n",
    "\n",
    "# Select the required columns\n",
    "movies = movies[['Rank', 'Movie_name', 'Rating_from_10', 'Certificate', 'Genre', 'Year', 'Runtime_in_min']]\n",
    "\n",
    "# Combine Certificate and Genre to create tags\n",
    "movies['tags'] = movies['Certificate'] + ' ' + movies['Genre']\n",
    "\n",
    "# Remove 'Certificate' and 'Genre'\n",
    "new_data = movies.drop(columns=['Certificate', 'Genre'])\n",
    "\n",
    "# Drop missing values\n",
    "new_cleaned = new_data.dropna()\n",
    "\n",
    "# Vectorizing the 'tags' column\n",
    "cv = CountVectorizer(max_features=5000, stop_words='english')\n",
    "vectorized_data = cv.fit_transform(new_cleaned['tags']).toarray()\n",
    "\n",
    "# Calculate cosine similarities\n",
    "similarity = cosine_similarity(vectorized_data)\n",
    "\n",
    "# Define the Recommender class\n",
    "class MovieRecommender:\n",
    "    def __init__(self, similarity, movie_data):\n",
    "        self.similarity = similarity\n",
    "        self.movie_data = movie_data\n",
    "\n",
    "    def recommend(self, movie_title):\n",
    "        # Check if the movie title exists in the dataset\n",
    "        if movie_title not in self.movie_data['Movie_name'].values:\n",
    "            # Find similar movie names containing the keyword\n",
    "            similar_movies = self.movie_data[self.movie_data['Movie_name'].str.contains(movie_title, case=False, na=False)]\n",
    "            if not similar_movies.empty:\n",
    "                suggestions = similar_movies['Movie_name'].tolist()\n",
    "                return f\"Movie '{movie_title}' not found. Did you mean one of these?\\n\" + \"\\n\".join(suggestions)\n",
    "            else:\n",
    "                return f\"Movie '{movie_title}' not found. Please recheck the movie name.\"\n",
    "        \n",
    "        # Find the index of the movie in the DataFrame using 'Movie_name' column\n",
    "        index = self.movie_data[self.movie_data['Movie_name'] == movie_title].index[0]\n",
    "        \n",
    "        # Calculate similarity scores, sort them, and store the titles of the top 5 movies\n",
    "        distances = sorted(list(enumerate(self.similarity[index])), reverse=True, key=lambda vector: vector[1])\n",
    "        recommendations = [self.movie_data.iloc[i[0]]['Movie_name'] for i in distances[1:6]]  # Skip the movie itself\n",
    "        \n",
    "        return recommendations\n",
    "\n",
    "model = MovieRecommender(similarity, new_cleaned)\n",
    "\n",
    "# recommendations = model.recommend(\"Spider\")\n",
    "# print(recommendations)\n",
    "\n",
    "# Save the model to a file\n",
    "joblib.dump(model, 'movie_recommender_model.pkl')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b5ee717-664e-45d9-9671-eb52db072336",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "833b731e-999e-4578-92b8-f51c29ed4f30",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['The Princess Bride', 'Harry Potter and the Prisoner of Azkaban', \"Harry Potter and the Sorcerer's Stone\", 'Harry Potter and the Deathly Hallows: Part 2', 'Harry Potter and the Deathly Hallows: Part 1']\n"
     ]
    }
   ],
   "source": [
    "model = joblib.load('movie_recommender_model.pkl')\n",
    "recommendations = model.recommend(\"Iron Man\")\n",
    "print(recommendations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b145455-992e-405d-9fdf-cc1cf24a19f4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}