{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f8888b32",
   "metadata": {},
   "source": [
    "\n",
    "# Recommendation Systems Introduction\n",
    "\n",
    "* Recommendation systems are one on the most valuable applicatins of Machine Learning.\n",
    "* Amazon attributes 20% their 221.60 billion dollars of their yearly revenues to recommendation.\n",
    "* Recommendation systems can use explicit and implict data.\n",
    "    - A user rating for a product or movie is explicit.\n",
    "    - Buying/watched history for a user is implicit.\n",
    "* Implicit data is already personalised.\n",
    "* Explicit data often needs to be renomalised across users -- For example, users may rate movies differently.\n",
    "* One simple approach to the problem is item-based collaborative filtering.\n",
    "* You can also go in the opposite direction and perform user-based collaborative filtering.\n",
    "\n",
    "![alt text](RecMatrix.png \"The Recommender Matrix\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "500e990f",
   "metadata": {},
   "source": [
    "# Exploring the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "780b8fd5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Head of ratings csv\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1260759144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1029</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1260759179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1061</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1260759182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1129</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1260759185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1172</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1260759205</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating   timestamp\n",
       "0       1       31     2.5  1260759144\n",
       "1       1     1029     3.0  1260759179\n",
       "2       1     1061     3.0  1260759182\n",
       "3       1     1129     2.0  1260759185\n",
       "4       1     1172     4.0  1260759205"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# First lets just see an example algorithm run on movielens.\n",
    "\n",
    "import os\n",
    "import csv\n",
    "import sys\n",
    "import re\n",
    "from surprise import Dataset\n",
    "from surprise import Reader\n",
    "from collections import defaultdict\n",
    "import numpy as np\n",
    "from surprise import SVD\n",
    "import pandas as pd\n",
    "\n",
    "ratingsFile = 'ratings.csv'\n",
    "moviesFile = 'movies.csv'\n",
    "\n",
    "rdf = pd.read_csv(ratingsFile,header=0)\n",
    "print('Head of ratings csv')\n",
    "rdf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c763caf8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Head of movies csv\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movieId                               title  \\\n",
       "0        1                    Toy Story (1995)   \n",
       "1        2                      Jumanji (1995)   \n",
       "2        3             Grumpier Old Men (1995)   \n",
       "3        4            Waiting to Exhale (1995)   \n",
       "4        5  Father of the Bride Part II (1995)   \n",
       "\n",
       "                                        genres  \n",
       "0  Adventure|Animation|Children|Comedy|Fantasy  \n",
       "1                   Adventure|Children|Fantasy  \n",
       "2                               Comedy|Romance  \n",
       "3                         Comedy|Drama|Romance  \n",
       "4                                       Comedy  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mdf = pd.read_csv(moviesFile,header=0)\n",
    "print('Head of movies csv')\n",
    "mdf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a3556fcd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>imdbId</th>\n",
       "      <th>tmdbId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>114709</td>\n",
       "      <td>862.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>113497</td>\n",
       "      <td>8844.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>113228</td>\n",
       "      <td>15602.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>114885</td>\n",
       "      <td>31357.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>113041</td>\n",
       "      <td>11862.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movieId  imdbId   tmdbId\n",
       "0        1  114709    862.0\n",
       "1        2  113497   8844.0\n",
       "2        3  113228  15602.0\n",
       "3        4  114885  31357.0\n",
       "4        5  113041  11862.0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dflinks = pd.read_csv(\"links.csv\")\n",
    "dftags = pd.read_csv(\"tags.csv\")\n",
    "#take a look at the training data\n",
    "dflinks.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a0b0891a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>tag</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15</td>\n",
       "      <td>339</td>\n",
       "      <td>sandra 'boring' bullock</td>\n",
       "      <td>1138537770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15</td>\n",
       "      <td>1955</td>\n",
       "      <td>dentist</td>\n",
       "      <td>1193435061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15</td>\n",
       "      <td>7478</td>\n",
       "      <td>Cambodia</td>\n",
       "      <td>1170560997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>32892</td>\n",
       "      <td>Russian</td>\n",
       "      <td>1170626366</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>34162</td>\n",
       "      <td>forgettable</td>\n",
       "      <td>1141391765</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId                      tag   timestamp\n",
       "0      15      339  sandra 'boring' bullock  1138537770\n",
       "1      15     1955                  dentist  1193435061\n",
       "2      15     7478                 Cambodia  1170560997\n",
       "3      15    32892                  Russian  1170626366\n",
       "4      15    34162              forgettable  1141391765"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dftags.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e6d8c696",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Movie :  Index(['movieId', 'title', 'genres'], dtype='object')\n",
      "\n",
      "Rating :  Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')\n",
      "\n",
      "Links :  Index(['movieId', 'imdbId', 'tmdbId'], dtype='object')\n",
      "\n",
      "Tags :  Index(['userId', 'movieId', 'tag', 'timestamp'], dtype='object')\n",
      "\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 9125 entries, 0 to 9124\n",
      "Data columns (total 3 columns):\n",
      " #   Column   Non-Null Count  Dtype \n",
      "---  ------   --------------  ----- \n",
      " 0   movieId  9125 non-null   int64 \n",
      " 1   title    9125 non-null   object\n",
      " 2   genres   9125 non-null   object\n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 214.0+ KB\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 100004 entries, 0 to 100003\n",
      "Data columns (total 4 columns):\n",
      " #   Column     Non-Null Count   Dtype  \n",
      "---  ------     --------------   -----  \n",
      " 0   userId     100004 non-null  int64  \n",
      " 1   movieId    100004 non-null  int64  \n",
      " 2   rating     100004 non-null  float64\n",
      " 3   timestamp  100004 non-null  int64  \n",
      "dtypes: float64(1), int64(3)\n",
      "memory usage: 3.1 MB\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1296 entries, 0 to 1295\n",
      "Data columns (total 4 columns):\n",
      " #   Column     Non-Null Count  Dtype \n",
      "---  ------     --------------  ----- \n",
      " 0   userId     1296 non-null   int64 \n",
      " 1   movieId    1296 non-null   int64 \n",
      " 2   tag        1296 non-null   object\n",
      " 3   timestamp  1296 non-null   int64 \n",
      "dtypes: int64(3), object(1)\n",
      "memory usage: 40.6+ KB\n"
     ]
    }
   ],
   "source": [
    "#get a list of the features within the dataset\n",
    "print(\"Movie : \", mdf.columns,end=\"\\n\\n\")\n",
    "print(\"Rating : \", rdf.columns,end=\"\\n\\n\")\n",
    "print(\"Links : \", dflinks.columns,end=\"\\n\\n\")\n",
    "print(\"Tags : \", dftags.columns,end=\"\\n\\n\")\n",
    "\n",
    "mdf.info()\n",
    "rdf.info()\n",
    "dftags.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d37778ea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA9RUlEQVR4nO2dd3gc1bn/P2d3VaxeLVtykeWGbVwxLnRCJ/QUAoSWBLg3kJt+k5vc5HJ/N4QkNxBaIEBCSUIgBMiFEAihGwy4YtyLJMu2bFladWklbT2/P2ZGO2qWbGt2tfL7eZ599uzZMzNn2jvvfM97zlFaawRBEITRhSveFRAEQRCGHzHugiAIoxAx7oIgCKMQMe6CIAijEDHugiAIoxBPvCsAUFBQoEtLS+NdDUEQhIRi3bp19Vrrwv7+GxHGvbS0lLVr18a7GoIgCAmFUmrPQP+JLCMIgjAKEeMuCIIwChHjLgiCMAoR4y4IgjAKEeMuCIIwChHjLgiCMAoR4y4IgjAKEeMuCELc8bb5eXbtPmQI8uFjRHRiEgTh2OYX/9jOX9ZVU5CRzKeOKxq0vNYarcHlUjGoXWIinrsgCHGlrrWLFzccAOBXr+/q4b23dAap8Lb3Webrz2zgcw9/SDAc6ZG/q7YNb5vf2QonCGLcBUGIK09+WEUwEuG2M6exaX8Lb2yrA8DnD/H533zIJfe/T7s/1F2+pSPIq5trWLenid+8U9Gdv6m6hYvuf587X90W830YiYhxFwQhbnQEQvzxo72cN3sc3zh7OqX5adz9+k7CEc23nt3Ajto2fIEwr2yq6V7mn1sPEgxr5k3I5r63drH1QCveNj83/2Et/lCEPQ0dcdyjkYMYd0EQ4sZz66pp6Qxy02lT8LhdfP3s6WyraeX6x1bz2pZa/vPTs5hSkM4L66u7l3l5Yw0T88bwxI1LyElL5lvPbuCrT62jqSPA/Ik5HGjujOMejRzEuAuCEBfCEc1v39vNwkk5nDA5D4BL5pdQVpjO++X1XLGwhC+fMoUrFpbwUWUj+xo7aPIFWFlez6fnFpOXnsydl89l+8E21lQ18b+fnc/p0wuobe3qo8Ufi4hxFwQhLqzY5WVvYwc3nVrWned2Ke68fC5XLZnET6+Yi1KKyxaWAPB/H+/ntS0HCUU0F80bD8DZs4v47nkzuf3i2Vw8v5iS3DFENBxs6YrLPo0kJBRSEIS4sLm6BYAzZvaca2JpWT5Ly/K7f0/MS2NZWR4vfLyfkpwxlOanMac4q/v/W8+c1p0uzhkDwP7mTibmpTlZ/RGPeO6CIMSFCm87JTljSEse3Me8YtEEdtf7eL+8novmFaNU//HtJaZxF91djLsgCHGiwuujrDB9SGUvOH4cqUmGufq0Kcn0R7fn3jS4cdda8+a2WrqC4SHVIdEQ4y4IQszRWlPpbWdqYcaQymemJnHFognMm5DNceMyByyXmuSmICOZAy2DG/ftB9v48pNr+e+/bRlyvRMJ0dwFQYg5ta1+fIEwU8cOzbgD/OTS44loPaAkY1GSM4bqIXju5XVGz9enV+/jvDnjOGPm2CHXJREQz10QhJhjDSkwdYiyDBjjyHjcg5us4pwx7B+C5l5V7wOgrDCd7z+/iZbO4JDrkgiIcRcEIeZYxn3aEGWZw6EkZwwHmjsHHWFyd4OPcVmp3HPlArzt/iHJM+FI4oxaKcZdEISYU1HXTmaKh8LMlGFfd3HOGLqCERp9gUOWq6r3UVqQxrwJOXz1jKm8sH4/Ww60DFj+yQ+qWHLHGwnTADuocVdKTVRKva2U2qqU2qKU+rqZf7tSar9SaoP5udC2zH8opcqVUjuUUuc5uQOCICQeFV4fZWMzBtXPj4SSXCsc8tAdmaoaOphSYMhClt5e397/A6HdH+KeN3bS4Auw25RzRjpD8dxDwLe11rOBZcCtSqnZ5n+/0lovMD+vAJj/fQGYA5wPPKiUcjtQd0EQEpQKb/th6e2HQ0l3R6aBBxBr6QzS6AtQmm/UwWOOCx+O9D9swZMfVNHUYWjyo8a4a61rtNbrzXQbsA0oOcQilwLPaK39WuvdQDmwZDgqKwhC4tPuD1HT0jXkMMjDJWrcB/bcrcbUUtNzd5vGPRTuq6m3dgV5ZEUly81es5X9jC8/EjkszV0pVQosBFaZWbcppTYqpR5TSuWaeSXAPtti1fTzMFBK3ayUWquUWuv1eg+/5oIgJCS7vYZhdcq456QlkZbsPmRHJsv7tmQZj9vy3Psa9ydWVtHSGeQHF85iXFYqlaPFc7dQSmUAzwPf0Fq3Ag8BU4EFQA1w1+FsWGv9iNZ6sdZ6cWFh4eALCIIwKjiSMMjDQSlFsRkxY7Gzto3OQLQhdHe9D6Vgkjn+jCXLhHoZ95bOII++V8k5s4uYOyGbKQXpVHpHkXFXSiVhGPantNYvAGita7XWYa11BHiUqPSyH5hoW3yCmScIgkCFtx23SzEp37mBvUpsse77Gju48N73eODtXd3/VzX4KM4eQ2qS0RzodhmmsLfn/tb2Wtq6QvzrGVMBIya+0tueEBN5DyVaRgG/A7Zpre+25dsHeLgc2GymXwK+oJRKUUpNAaYDq4evyoIgJDIV3nYm5aWR4nEuzsLuuf/u/d2EIpo3zen7wNDcLUkGBvbcOwNGA6ul408pSKe1KzRomOVIYCjDD5wMXAtsUkptMPN+AFyllFoAaKAKuAVAa71FKfUssBUj0uZWrXViBIYKguA4FXU+xyQZiwm5Y2jwBaht7eLZtfvISPGw/WAb+5s7Kc5OZXe9j0sWFHeXjzao9oyWsaJnrP+tdoLd9T7yM4Y/Rn84GUq0zPtaa6W1nmcPe9RaX6u1nmvmX6K1rrEtc4fWeqrWeqbW+lVnd0EQhEQhHNHsrvc51phqUZyTCsD/vraDjkCYn14xF4B3dtTR1BGktSvUHQYJ0QbV3p570IyeSTJlG8vbT4RGVemhKghCzKhu6iAQjjhu3EtyDD3/+fXVnDKtgIvnjWdC7hje3u7tEykD4BlAc7d+u03jPyF3DElulRCNqmLcBUGIGd2G1WFZxuqlqjXcdFoZSinOnDmWleX17KxtA6Ix7mCTZXoZd+u3pcl73C4m5aWxu37kx7qLcRcEIWbUtfoBGJeV6uh2ijJTcLsUM4syOW16AQBnHldIZzDMn9fsw6VgYm40WmegHqq9NXeAssKMhPDcZTx3QRBiRl2b0WvUiQHD7HjcLr5z7kwWTcrpHr9meVkByR4XG/Y1MykvjWRP1LcdyHO3NHeP3bgXpPPuDi/hiO5h9Eca4rkLghAz6tr8ZKV6uuPLneRfz5jaY6LtMcnu7iEE7JIM2Dz3cF/N3e1SPQY4KytMJxCODGkqv3gixl0QhJjhbfMz1mFJ5lCcOdPoDT+lVweqQ2nuvb3zKQVGY3DlCNfdxbgLghAz6tr8jHVYkjkUZ80qwqVg5risHvlKKdwu1U+0TKSHJAN0T+o90nV30dwFQYgZdW1dLJqUO3hBh5iYl8Y/v3kak/L6Ruu4XWpInnt+ejKZqZ4eQ//qIcztGmvEuAuCEBO01oYsE0fPHWDa2Mx+8z0u1SdaJhTWJPWat1UpRVlBOhXedv6x+SD3v7WLcETzt6+d0qdsPBk5NREEYVTT5g/RFYwwNjN+mvuhcLtUd3SMRX+eOxjhkB9UNPAvf1xHfbuf7QfbeH5ddayqOiTEuAuCEBOsGHenwyCPlCS3a0iaO8C5s4tYNCmHe65cwMrvfYr5E3O4/61yAqH+Z3KKB2LcBUGICVaMe7xlmYEYquYOcMHc8bzw1ZO5bGEJHreLb50zg/3Nnfxl3b4+ZeOFGHdBEGKCt83w3MdmjUzjPpDm3p/n3pvTphewaFIOD7xVjj8Upq61iztf2ca9b+wiGI6PNy8NqoIgxATLuBeOYM29t+cejmg8Q2gkVUrxrXNm8sXfreL6x1bz8d5mguEIEQ3v7qzjvqsWMiHXuclJ+kM8d0EQYkJdm59kj4us1JHpU3r6iXMPDaC598fJ0/JZVpbH2qomLltQwtvfOYP7r1rIztp2Lrz3PT6saHCi2gMyMo+yIAijjrrWLsZmpoy4eHCLgTz3oY4fo5Ti0esW0xkMd0cETc5PZ96EbK58+CMeXlHB8qn5g6xl+BDjLghCTPC2xz/G/VB4XK4+Y8uEIkPT3C0yU5PITE3qkTc5P53jS7KpbuoYlnoOFZFlBEGICXWt/hEbBgkDRMuEh6a5D0ZRVgq1rV1HvZ7DQYy7IAgxwRhXZmQ2poIx1V6faJlIZFiG9R2XlUpTR5CuYOymkxbjLgiC43QFw7R0Bke0LDNgtMwwGPeibOOhZkUMxQIx7oIgOE59+8junQoDRcsMz4QcReYwxwdjKM2IcRcEwXHqRngHJjA9994NqkPsxDQY1rSCB1vEuAuCMIqwxpUZyZp7kttFqI/mPnwNqkBMG1XFuAuC4Djedsu4j2zPfagDhx0u2WOSSPG4xLgLgjC68LZ2oRTkpSfHuyoD4jmMgcMOF6UU47JTOdgqDaqCIIwi6tr85KenDIvE4RT9ee7DpbkDFGWmiucuCMLoIt5zpw4Fj8t1xAOHDYWi7BFm3JVSE5VSbyultiqltiilvm7m5ymlXldK7TK/c818pZS6TylVrpTaqJRa5PROCIIwsvG2jezeqTCA5z5MmjtAUabRS1VrPXjhYWAoj6QQ8G2t9WxgGXCrUmo28H3gTa31dOBN8zfABcB083Mz8NCw11oQhISirq0rATx31Sda5nAGDhuMcdmpdAUjtHaGhmV9gzGocdda12it15vpNmAbUAJcCjxpFnsSuMxMXwr8Xht8BOQopcYPd8UFQUgMwhFNfXtgRMe4g+m5H+XAYYci1h2ZDktMUkqVAguBVUCR1rrG/OsgUGSmSwD7XFPVZl7vdd2slFqrlFrr9XoPt96CICQIjb4A4Yge0THuYIwt09/AYW7XMGnupnGPle4+5ForpTKA54FvaK1b7f9pQ0Q6LCFJa/2I1nqx1npxYWHh4SwqCEICEZ2BKQE893409yT3MMkyI9FzV0olYRj2p7TWL5jZtZbcYn7Xmfn7gYm2xSeYeYIgHIM0+AzjXpAxso27x+XqM9/pcGrulixVG6MhCIYSLaOA3wHbtNZ32/56CbjeTF8PvGjLv86MmlkGtNjkG0EQjjEafQFgZHdggoEHDhsuzT01yU1OWhK1bbEx7kOZielk4Fpgk1Jqg5n3A+BnwLNKqS8De4DPm/+9AlwIlAMdwI3DWWFBEBKL+nbDuOePcOPu7qW5hyMarRk2zR0MaeZgS2x6qQ5q3LXW7wMDPbrO6qe8Bm49ynoJgjBKaPT5cbsU2WOSBi8cR3p77lZYpGeYNHeAsVmp1MXIc5ceqoIgOEqjL0BuWjKuYZI3nMJt9lC1OhlZhn64ZBmAcVkpMRv2V4y7IAiO0tAeGPGSDESNuOW8WxLNcDWogiHL1Lf7CfVquHUCMe6CIDhKoy8w4htTIWrELTnG6tA0nJ772KxUIjraDuEkYtwFQXCUBl+AvIyRb9wtI27JMUHTyLuHcSTLWMa6i3EXBMFRGtr9CSHLRD33npp70jB67kUxnG5PjLsgCI4RDEdo7QqRnz6yOzCBzXM35RhrPtXh1NyLso3jEIuIGTHugiA4RpPVgSkBZBlLfuntuQ9nKGRBegpulxLPXRCExKbBlxgdmKCv5m41rA5nJyaXS5Gfnkx9u/MdmcS4C4LgGIky9ABE5RdrfJmQA5o7wJhkN11BCYUUBCGBsTzUggSQZazRH7s9dwc0d4BUj5uuYHhY19kfYtwFQXCMqOc+8htULfnFSc0dIDXJhT8knrsgCAlMoy+AS0HOCB9XBmKjuQOkJInnLghCgtOQIOPKQN8eqpYsM9yae4rHRZd47oIgJDKN7Ykx9AD09dzDDowtA8a47n7x3AVBSGQafH7yE6AxFfr2UA05prmLLCMIQoLT4AskRO9UMKbZg/489+E1k6kel4RCCoKQ2CTKiJBg89xNrd2Kdx/OUSHB9NxD4rkLgpCghMIRmjuCCWPcPe7+NXcnQiFFlhEEIWFp6ggCJKDm3rOHqhOeuz8U6Z7xySnEuAuCMCQ2Vjezp8E35PKN3ePKJIrmPlC0zDBr7klutIaAw7MxiXEXBGFQguEI1z22mh+9uGXIyzSYQw8kjCxjGvGgw5p7isfYjtONqmLcBUEYlJXl9TR3BFlX1Tjk+T+7R4RMEFkmVpp7SpIbwPFYdzHugiAMyssbawDwBcJsrWkd0jKJNCIkDKy5D//AYeK5C4IwAvCHwry25SCnTCsAYPXuxiEt1+ALoBTkpiWGcR9Ic/c4oLkDjodDinEXBOGQvL+rnrauEF8+ZQoT88awpmpoxr3R5yc3LXnYPV+nGKiHqhPDDwCOh0OKcRcE4ZC8vLGG7DFJnDytgBNL81hb1TSkML6GBBpXBvr2UA051olJZBlBEOJMVzDM61trOW9OEckeF0tK82jwBajwDh4S2ZBAvVMhtmPLgCF3Ocmgxl0p9ZhSqk4ptdmWd7tSar9SaoP5udD2338opcqVUjuUUuc5VXFBEJzn3Z1e2v0hLppXDMCJU/IAhiTNNPoCCTF3qkW35m567I5p7h5Llom/5/4EcH4/+b/SWi8wP68AKKVmA18A5pjLPKiUcg9XZQVBiC2vbKohNy2J5VPzASgrSKcgI5k1Q2hUbfQFEiYMEsDt7t9zH+4mg6gsE2fPXWu9AhhaCwpcCjyjtfZrrXcD5cCSo6ifIAhxZG1VE6dMLyTJbZgKpRQnluaxehDPPRzRNHUEEmJ6PYs+MzGFI3hcCqWGuxPTyG9QvU0ptdGUbXLNvBJgn61MtZnXB6XUzUqptUqptV6v9yiqIQiCE7R0BNnf3Mns8Vk98k8szaO6qZOals4Bl23uCKA1CSXL9NbcwxE97Ho72Dx3h2djOlLj/hAwFVgA1AB3He4KtNaPaK0Xa60XFxYWHmE1BEFwim0Hjc5Ks8Zn9shfYuruh4p337CvGYDSgnRnKucAlrZuDfkbiuhh19thhPdQ1VrXaq3DWusI8ChR6WU/MNFWdIKZJwhCgrHN7Ina23OfNT6LzFQPz6zeN+BQBH83wyeXl+U7Xs/hwu1SKAXhSLRB1YkY/RGjufeHUmq87eflgBVJ8xLwBaVUilJqCjAdWH10VRQEIR5sPdBKfnoyhZk9dXO3S/HDC2fxYWUDv3htR5/luoJh/mkLn0wkPC7VLcsETc19uEl2u1DK+WgZz2AFlFJPA2cABUqpauC/gDOUUgsADVQBtwBorbcopZ4FtgIh4FattfOj0guCMOxsO9jK7OKsfhsUv7BkEltrWnlkRSWzx2dx2cJo09qKXuGTiYTbpXoMP+CE566UItXjdjzOfVDjrrW+qp/s3x2i/B3AHUdTKUEQ4ksoHGFnbTs3nFQ6YJkfXTSbHQfb+N7zGykrTGfehBzA6NFqD59MJDwuV49QSCtKaLgxZmMamQ2qgiCMYirrfQRCkT6NqXaS3C4evGYRBRkp3PKHddS1ddEVDPPGtlrOP368Y4bRSWLhuYM5j+pI1NwFQRjdWI2ps3o1pvYmPyOFR647gaaOAP/6x/W8tuUgHYEwF80bf8jlRiqG5h4d8tcJzR2MCTucDoUcVJYRBOHYY+uBVpLdLqYWZgxadk5xNr/83Hxu+9PHbDnQQkFGMkvNcMlEw+65h8IR8dwFQRhdbK1pZdrYjCFLKxfNK+bWM6fSFYxw/vHj8CSgJAOm526Pc3doP1JiYNzFcxcEoQ/bato4fcbhdS789jkzmZCbxtmzihyqlfO43T01d6dkmVSPC3+8QyEFQTi28Lb5qW/3M7v40Hp7b1wuxVVLJjlUq9jQO1rGSVmmqSPgyLotEvPdSRAEx4g2pg4cKTNa6a25O+a5JznvuYtxFwShBwMNO3As4HEpguFotIyjDarxnqxDEIRjB601H1Y2MD47lZwEmdh6OPH00twd68TkkWgZQRBiyGMrq3hnh5drlia2dn6kuGOkuadID1VBEGLF+7vq+ekr2zhvThFfPWNavKsTFzwx09zFcxcEIQbsbejg1j+tZ1phBnd9fgEuh4zaSMdt66Hq6PADHhf+UASttSPrBzHugiAA9721i3BE88h1J5CRcuxGSPfw3B3U3Lsn7HBwCAIx7oIgsLO2jYWTcpicnzgzJzmB2zaeu9MDh4GzE3aIcReEYxytNRV17UMaR2a009NzdzbOHcRzFwTBQWpb/fgCYaYWHtteO5jRMtbYMmEnNXfx3AVBcJgKbzuAeO701dw9bqdlGfHcBUFwiErLuI8V4+5294yW8bicm4kJxHMXBMFBKrw+MlI8jO01EfaxiH2CbCfHc08RWUYQBKep8LYztTC934mwjzU8ds3dySF/Lc9dGlQFQXAKiZSJ0ltzdzuuuYvnLgiCA/j8IQ60dInebmJo7raBw0RzFwQhEdld7wOQMEgTw3M3hgVwshOTpblLnLsgCI4gYZA9sXqoWtKMkwOHAfjFcxcEwQkq6tpxuxST8tPiXZURgaW5W9KMc5q7JcuI5y4IggNUeH1MykvrlgmOdazx3C3j7pzmLg2qgiA4iBUGKRhYnnvYDId0SnP3uBQuhaNT7Q1q3JVSjyml6pRSm215eUqp15VSu8zvXDNfKaXuU0qVK6U2KqUWOVZzQRCOinBEU1nvE73dhjVBdtDsperU8ANKKXPCjvjKMk8A5/fK+z7wptZ6OvCm+RvgAmC6+bkZeGh4qikIwpGysryeO1/ZRijc05Dsb+okEIpQJp57N1YDqhXF4pTnDs7PxjSocddarwAae2VfCjxppp8ELrPl/14bfATkKKXGD1NdBUE4DELhCL98bQdf/N0qHl5Rydaa1h7/S6RMX6wGVCuKxaloGTBmY4q3594fRVrrGjN9ECgy0yXAPlu5ajOvD0qpm5VSa5VSa71e7xFWQxCE/thxsI2rH13FA2+Xc/Ys4/bcckCM+2BYDaiW0XVq4DAwPHd/PDX3wdDGJICHPRGg1voRrfVirfXiwsLCo62GIAjAlgMt/Osf13HePSvYVtPKPVcu4JFrTyAz1cPm/S09ym6taaUgI4Xc9OQ41Xbk4e6WZUzP3SHNHYyp9pz03I90ssRapdR4rXWNKbvUmfn7gYm2chPMPEEQHGbHwTYueWAlaclu/u2s6Xzp5FJy0gzDPXt8Vh/Pff2eJhZNyolDTUculjG3jK6zmrtrRHruLwHXm+nrgRdt+deZUTPLgBabfCMIgoOs3dNIOKJ56bZT+NY5M7oNO8DxJdlsq2ntblStb/dT1dDBCZNz41XdEUkfz91RzT3ODapKqaeBD4GZSqlqpdSXgZ8B5yildgFnm78BXgEqgXLgUeCrjtRaEIQ+lNe1k5bsZnJe396mc4qz8IciVJpjyXy8txlAjHsvekfLOKu5O9ugOqgso7W+aoC/zuqnrAZuPdpKCYJw+JSbQ/e6+vE2jy/JBmDz/hZmFGWybk8TSW7VnS8YuF09J652avgBMAYPkx6qgiAMSnldO9MHGLq3rCCdFI+rW3dfv6eJOcXZ3d3gBYNuzz0WoZBJrvj2UBUEYeTT1hWk5hDjsnvcLo4bn8Xm/S0EwxE+qW5m0SSRZHpjae5dMevENPLi3AVBGEFUeA0tfSDPHeD44iy21rSy5UAr/lBE9PZ+6Ou5OxznLrKMIAiHYldtGwDTDmHc5xRn09YV4v8+NqKTF03OiUXVEgp37wZVR+PcXTKHqiAIh6a8rp1kt4tJ/UTKWBxfkgXA8+uqKc5OZXz2mFhVL2GwjHk0WsbZUMhAKEIkcth9QIeEGHdBGAWU17UzpSAdj3vgW3pGUSZul6LNH2KRSDL90h0tY8olTmvu4NxUe2LcBWEUsKuunWlFhx4jJjXJ3a3Ji97eP0lWg2pMNHdnJ8kW4y4ICU5XMMy+po5DNqZazCk24tolUqZ/Yqq5m7NfORUOeaRjywiCMEKo8Laj9aEbUy3Om1NEZX07s4uzYlCzxCOmmrvD86iKcReEBKe8zhi6d/rYzEHLnjtnHOfOGed0lRKWaA/V2GnuIssIgtAv5XXtuBSUFgwcKSMMDY+r56iQsdDcpUFVEIR+Ka9rpzQ/vVvDFY6c3qNCOuq5e8RzFwThEOyqax9w2AHh8Ij2UDW86SSHJ+sAMe6CIPRDMByhqt43pEgZYXB6R8s4PVkHONegKsZdEBKYHQfbCEX0kCJlhMHxuHrGnjs9tgzg2GxMYtwFIYF5bl01yW4Xp8+QeYiHA7c7dp57ikc6MQmC0A+dgTDPr6/mgrnjyM9IiXd1RgWeWE6z1625iywjCIKNv208QFtXiKuXTIp3VUYN9lBIl6LfWa2GC4lzFwShX55atZdpYzNYMiUv3lUZNdg1dyf1doBUj8S5C4LQi837W/hkXzPXLJ2EUs55l8cads3dSb0djNmxPC7lmOcuww8IQgLyp9V7SU1yccXCCfGuyqjCrrE7qbdbfOPs6Sx0aBA3Me6CkCA0+gKsLK9n1e4GXlhfzcXzislOS4p3tUYVdm/dyREhLW771HTH1i3GXRASgCZfgLPueoemjiDpyW6Wl+Xzb2c5ZxiOVdw2icvtsObuNGLcBSEBeH59NU0dQR6/4UROnV5wyBmXhCPH5VK4FER0bGQZJ5ErRBDihM8foq0rOGg5rTVPrdrL4sm5nHncWDHsDmNFyTjdoOo0cpUIQhzoDIS5+IH3uezXKwkMEgr3YUUDu+t9XL1U4tljgWXUnRw0LBaIcRcEB3h3p5dvP/sJoXD/hvsXr22n0uujwuvjyQ+qDrmup1btJSctiQvnjnegpkJvLDnmmPbclVJVSqlNSqkNSqm1Zl6eUup1pdQu81smaxQSnkhEs35vE1rrIZX/zTsVPL++mqdX7+3z30eVDTy+sorrlk/mU8eN5b43d+Ft8/e7Hm+bn9e2HOSziyZ092gUnMWKdXe6E5PTDEftz9RaL9BaLzZ/fx94U2s9HXjT/C0ICc0v/7mDKx78gD9+tGfQsnVtXaza3UCSW3HX6ztp7gh0/9fuD/Hd5z5hcn4a37/gOP7z07PoDIb55Ws7+l3Xs2v3EYporhJJJmaI5z4wlwJPmukngcsc2IYgxIy/fXKAB9+pINnj4oG3ywftUfjqpoNENNz1+QW0dga5541dALR2BfnGMx9T3dTJLz83n7RkD2WFGdx4cinPrtvHpuqWPut6du0+lpflM7VQhvSNFZbHHos4dyc5WuOugX8qpdYppW4284q01jVm+iBQdJTbEI5RuoJh/vbJgSFLIU6w5UAL333uExZPzuXR6xZT2+rvV2qx8/eNNcwoyuCS+cVcs3Qyf/hoDy+sr+ai+97n7R1efnzRbE4sjY4H87WzppMzJomHV1T0WE8gFGFPQwfLyvId2TehfyyP/VgPhTxFa70IuAC4VSl1mv1PbdyV/d6ZSqmblVJrlVJrvV7vUVZDGI38ec0+vvb0x6zf2xSX7Te0+7n59+vITUvmoS+ewOkzCllWlseD71TQGejfez/Y0sWaPY1cNK8YgG+dM4OMFA/fMhtX/3zzMm48eUqPZbJSk5g1Poualq4e+Zack5eR7MDeCQPhEc0dtNb7ze864K/AEqBWKTUewPyuG2DZR7TWi7XWiwsLZaIBoS8fVNQDsGp347Cvu60ryNl3v8tnHvqAd3d6+7wdBMMRvvrUeurb/Tx87QkUZhrjpX/z7Bl42/w8tap/7f2VTTVoDZ+eZ0S25KYn8/PPzOWqJZN45eunsri0/xEc89KTafQFeuQ1mL/z08W4xxL3sa65K6XSlVKZVho4F9gMvARcbxa7HnjxaCspHHtEIrrbqK85AuP+7k4vb22vHfD/n7y8jUpvOweaO7n+sdVc9uAHvLW9ttvI/8/LW1m1u5GffWYu8ybkdC+3tCyfU6YV8NA7FbT7Q33W+/LGA8wan9VDIz//+PHcecVcctIGNtL56ck0tPeMmLGMfe4hlhOGH0uOOZY19yLgfaXUJ8Bq4O9a638APwPOUUrtAs42fwvCYbHtYCvNHUFy05JYu6eJcGTouvtz66q54fHVfO1PH9PS2bcH6Nvb6/jz2n3ccvpU3v3umdx5xVwa2v186Ym1XPzA+/zk5a38/sM93HTqFC7vZ9TF75w3k6aOAN9+dgMRW732N3eyfm8zF807/Hj0/IwUWrtCPTo0dXvuIsvEFGtMmWNWc9daV2qt55ufOVrrO8z8Bq31WVrr6Vrrs7XWw/9OLYx6Pqo0LpuvnFpGW1eIHQfbhrTcn9fs5bvPfcLckmx8gXCfxs+WjiDff2EjM4sy+cbZ00n2uLhqySTe/s4Z/OIz82jtDPHb93dz6vQCvnf+cf1uY8HEHH746dm8tqWW+94yImHq2/1877mNAHz6CDob5ZnSS5MtbLLJNO55IsvElGgo5DGsuQuCU3xY0cDk/DQuXWA0TK6pivoIn+xr5s5Xt/XRyV/csJ/vPb+J06YX8uwtyzl5Wj5PrKzq9oa11vzoxc00tAe46/PzSfFEOwUluV18/sSJvPXt03nyS0v4zRdPOOQYLl86uZTPLJrAPW/s4u7Xd3LBve+xuqqRO6+YS2lB+mHvr6WrN7RHjXuDL4BSIsvEGomWEQSHCEc0q3Y3sLwsnwm5aRRnp7LaZtx/8vetPPxuZbd3D4ZG/7+v7WD+hGwevvYEUpPcfOXUMg62dvHyxgMAPL6yipc+OcDXz5rO8SXZ/W7b43Zx+oxC0lMOPWCqUoo7Lj+e+ROyue/NXWSlenjx1pO56gjnM7W8c3ujaqPPT86YpIRv2Es0uj33BNfcZchfYcSx9UArbV0hlk814rtPnJLHBxUNaK3ZsK+ZNVVGaOSfVu/tLrNil5fqpk6+f8Fx3d30z5hRyPSxGTz63m6KslK545VtnDu7iFvPnDYs9UxNcvPb60/k5Y0HuPLEiaQlH/ntZOnqDb5oo2qjL0CuSDIxp3vgsAR/qIrnLowI7A2mH1YaIZDLzc47J5bm4W3zs6ehg9++t5usVA9XLp7IPzbXUG9GmDy1ai8FGcmcO3tc93qUUtx0ahnbalr58pNrmFqYzt1XLhjWGe0LM1O48eQpR2XYAfLSjVBLuyzT6AtIGGQcsKJkRHMXhKNk64FWjv+v17j9pS34Q2E+rGigrDCdsVmpACyZYsSGv7C+mlc313D10sncdNoUgmHNX9ZWU9PSyZvbavnc4okke3pe0pcuLKYwM4Vkt4tHrl1MxiByS7zIGZOES/WWZQLSmBoHPKMkWmZkXunCMcXDKyoIRSI88UEVa6oaqar3cdnCku7/pxVmkJOWxIPvVOBSihtOKmVcdipLp+Txp9V76AyE0MBVJ/bVu1M8bv70laW4XeqIGjpjhculyEtP7g5/BMO4nzC5/05PgnOMFs1dPHfhqOgMhAfsij8U9jd38vLGGq5fXspvr1vM/uZOfIFwt5YOhuFbPDmPUERzyfxixmUbHv01yyazr7GT37xbyWnTC5mUn9bvNqYXZVKWAANvGb1UDZkpEtE0dQTJS5cJsGPNaImWEc9dOGK6gmEueeB9XErx4m0nH9F444+/vxuAG0+ZQknOGF75t1P52ycHOGd2z/HmTpqaz5vba/nKqWXdeefNKTJ6dvoCXDMKhsS1D0HQ2hUkHNHdWrwQO2RsGeGY539f28GuunZ21LZx9+s7D3v51q4gz6zZx6fnjqckZwwAxTljuOX0qT1i0AG+uGwyr379VGYXZ3XnpXjcfOmUKRw3LpNPHTf26HZmBJCfntLdoCrjysQP9ygZ8lc8d4FwRLO7vp1pYzOHvMzq3Y08tnI3X1w2iXAEHn2vknNnFw04MFZ/PLN6L+3+EDfZvPGBSPa4OG5cVp/8W8+cNmyhjfHGrrk3Su/UuDFaJusQ4y7wi39s5+EVlfzxy0s5ZXrBoOV9/hDf+csnTMxN4z8umIUG3tvl5Tt/+YRn/2U5Ww60smZ3Y/e4Li6l+NziCT0G4OoMhHl8ZRVLp+Qxd0L/HYqONfIzkmnpDBIMR7o9eDHusUc0dyEhaPQFuPWp9VyxqITPnjABpXpesJXedh5baeje//23Lbz69VP77XYfCEVYU9XIqsoG3txex76mDp65aVl3T85ffHYeVz+6iiV3vAkYN0ZOmtEY6POHeX59NY/fcCJLy/LpCIT40hNrqG3t4pefm+/k7icU+bbxZawxZsS4x57uUSETXHMX4z7K+dsnB/iwsoEPKxv4oKKB/7ns+B6x3j/5+zZSPG5uv+Q4fvjXzTy1ai/Xn1TaYx27atu47U8fs6O2DZeC40uyufPyuSy1zRB00tQC7rxiLgeaO1k6JZ9Fk3O6O/bUtXZx1aMfccPja3jg6oU8/G4la/c08qsrF3DytMHfFI4VrMbTRl9AZJk44h4lQ/6KcR/lvLzxANPHZnDx/GLueWMnG/Y184MLZ3H2rLG8s9PLW9vr+MGFx3H1kkm8uukgd7++k0vmF5ObnozWRiehH7+0mfRkD/dftZAzZhaSmdp/eN5A46qMzUrlmZuXc/WjH/HlJ9fidinu/cJCLp5f7OSuJxx5tsHDGtoDpCe7jygCSTg6RHMXRjwHW7pYU9XEt86Zwb+dNZ0lU/L49+c2ctPv1zJ7fBbt/hBTCtK54aQpKKX40UWzufC+9/j35zdSlJXCR5WNlNe1c9LUfO65ckF3j9EjoTAzhadvXsaPX9zMpQtKOG/OuMEXOsaIji8ToNHnl+n14sRoGc9djPsoockX4Pn11Zwzu4jJ+UZPzL9vMuYpt6Z8W1aWz5vfPp0XNxzg12+Xs7exg8duWNzdZX/muEyuXTaZJz6oIj3ZzeLSPK5fPpmrl04eFi+mICOFB6854ajXM1qxNPfGdj+NHUHyZKjfuBAdW0aMuxBHGtr9PPrebv7wYRW+QJjn1+/nxVtPJtnj4u/9TPmW5Hbx2RMmcNmCYvY1dTKlV5f8H356FlctmcTUwvRDjmcuDD85ackoZWnufgozpANTPIhOs5fY139i1/4Yp6UjyAX3vsfDKyo4a1YRP75oNttqWnngrV2DTvnmcbv6GHYwjP/McZkJf2EnIm6XIjfNiHVvbA9I79Q44ZFQSCHe3PvmLrztfp77l+XdA0xt3t/Cr9+pYE9jB8ARzecpxI+89GSjQdUXkLlT44SluYssIxw1XcEwOw62YY1onpeWzMS8Md0x6VprDrR0kTMmqTuuvLyujd9/WMUXTpzUY+TA/7p4Dh9UNPDihgPMLcnu1t+FxCAvPZnq5g78oYhMrxcnomPLiHEXjpIf/HUTL6zf3yNvXFYqS8vycCvFqt2N7G/upDAzhXuvXMDyqfn8v5e3MSbZzXfOndFjuey0JH7+2Xlc/9hqLpFQw4QjPz2ZTdUt3Wkh9rhHieYuxj3OHGju5KUNB7hsQTGXLjDGMK9u7mSV2elIa82SKXnceHIpT6/eyzW/W8WFx49nxU4vP7poNvn9NLqdPqOQ175xGmWF4rUnGvkZyXQGjSGUpQNTfBDNXRgWnvigiojWfPvcmUzMi45Hfu2yyX3KXr10Ej9+cQvPratmamE61y3vW8Zi5rihDwImjBzsjagS5x4f3NKJSTha2rqCPL1qLxfOHd/DsA9EWrKHX35uPpfML2ZiXhpJCf7aKPTFLsWILBMfRovnLtbhKFhZXs9F97/Hz17dToM5UXNLR5B73tjJhfe+x+tba3uUD0c0LR3B7t9/XrOPtiEOeWvntBmF/YYxComPXYrJFeMeFyRaZhSjtWZPQwcb9jUzb0J2nynaQuEI9765iwfeLqcwI4WHV1Tw5AdVnD27iHe219HmD1GUlcJNv1/LjSeX8u/nHcdrWw5y/1u7qPD6uHDuOL56xjQeX1nFkil5zJ+YE58dFUYclree5FZkjtDJvEc7lsee6G/GcvXY2NvQwW9WVPDWtjoOtnYBkJrk4vaL53DliRNRSrFuTyM/fWU76/Y08bkTJvDfl87hQHMXD75dzsubajh71lhuO3M6U8emc+cr23l8ZRVPr95LVzDCzKJMbjiplOfWVfPKpoMA3H7JnHjusjDCsHT2vPTkPsMzC7FBNPdBUEqdD9wLuIHfaq1/5tS2jpbd9T5+/XY5f/14P26X4pzZRSwry2dOcRZ3/3Mn339hEyt2eWnpDLKyvIH89GR+deV8Ll84AYBpYzO4+8oF3H3lgh7rvf2SOZw0NZ9n1uzj84sncu7sIlwuxTfPnsFjK3dzsKWLs0bB9HDC8JFvNqhK79T4IXHuh0Ap5QZ+DZwDVANrlFIvaa23OrE9C601VQ0drKpsoKali0WTc1k8Obe7409vyuva+fXb5by4YT/JHhc3nFTKLaeV9Rj98PdfWsJD71Zw1z93kJeewg8vnMU1yyZ1j1U+GOfOGce5vUZAzE5L4pvnzBhgCeFYJtec4CQvvf9hlQXn8YjmfkiWAOVa60oApdQzwKXAsBr3d3d6+cnL0VU2dwbxtvl7lHG7FJPz03D3esXVQIW3nVSPm6+cWsZNp5ZRmNnXW3K5FLeeOY1LFxRTkJEi42sLjuJxu8hJSxLPPY64ZSamQ1IC7LP9rgaWDvdGMlI8TC+KNnamJrk5YXIuS6fkMy47lY/3NrGqspHK+vZ+l7/g+HHccFJpvx2BejMhd/BQRUEYDr53/nE9RvIUYsvyqfnccnoZx41P7L4iSms9eKnDXalSnwXO11p/xfx9LbBUa32brczNwM0AkyZNOmHPnj3DXg9BEITRjFJqndZ6cX//OfXesR+YaPs9wczrRmv9iNZ6sdZ6cWFhoUPVEARBODZxyrivAaYrpaYopZKBLwAvObQtQRAEoReOaO5a65BS6jbgNYxQyMe01luc2JYgCILQF8fi3LXWrwCvOLV+QRAEYWASO9ZHEARB6Bcx7oIgCKMQMe6CIAijEDHugiAIoxBHOjEddiWU8gJH2oupAKgfgel4bz+R6xjv7SdyHeO9fanj0aUPl8la6/47CmmtE/oDrB2J6XhvP5HrGO/tJ3Id4719qePRpYfzI7KMIAjCKESMuyAIwihkNBj3R0ZoOt7bT+Q6xnv7iVzHeG9f6nh06WFjRDSoCoIgCMPLaPDcBUEQhF6IcRcEQRiFODZw2NGglHoMuAioA1ab6VYgC8gD/EAHkGumNZDeazVdQCp90UB/kyM6nT8YR7rc0RIh+pAPMfA1Ya+fldYYx3nMAOsbbuzbHcqxGso5Gig9lPUcCYdzfCzN1Ok6HU4dhrJde5kwxsiwvfP7W0/Elmf/70juQc3Ax9n6X/Wqj3X9D1SvQ9Wh977Z98E650Ezz2OmNZAENGDcR2Mx4t1zgQqMY6fN9PFm+kta6w8H2K8ejFTP/Qng/F7pYuB/MOZnTQLWmelkjIO3DONgdAE1GAfiE4yDaB2oCMaUfy+Z/9cCd5rpFRgnN2L+/rOZBvi5uXwY+KGZFwROsOV/zbaNPxI9wV8080PA78y0HzjLTAPsxTjBmOWsse/DwHu2bfgwHnIhIIAxlaGVPmjb5jlmfhi4w1bmU2Ya4EOiF/k2ojfDejMdADrNdKXt2HWZyx80j98YWzkN7LQd/+3AW7Z6XW5uX9vqpYF/t9XXOr6dvY7vHowLP2j+rrYdrxVAk1nHdmCxWa7dtt4IxhDU1jl93CwTAR4w87qAq23b+Lkt/Xfbsr+2HaO/mulOjJnFrBuy9zn9q5luAlaaZVrN8l6Ma6Ie2GjmdQCbzWWs6zpk5p9gy7euqRA9r6lv2NIv2tJeoMW2fKe5bAjYYtYrgnF+g2Y5bdbP2rf7iJ6XL9iOyy1EDeEdRA3792zL/qftmCwmem72mMcjguHUWZ16HgIOmOn3zGOqgYuJXovW9QPwoHmMXMBJRK+TX5n7Csa58Zr1vAv4yEzvs+3LPnOfg+Z3CPgAKDfLWPuPeawazfxaM62A58z1KOCfZtly4KcY14AfeNSscwuGvao1y52ptV6AcT38Q2t9HDAf414dEiPSuGutV2AcIHs6BeMmbMS4YCbZ0p0YB0VhHGCP+Uk3v983V+0CMoATzd/J1nYwZotyEX1C25/WE81vFzDXLO/BMB5WfraZdmNcVJZB6yD6tL7PTCdhXHTWk91P9M3DDbxsq+94jIvYZS4XMNNuYIf57Tb3y6LZlv+4rfxeol7ULlvdK23L2m9oi0Kz3F/M5a39GWMra5UvttVlPMbFbO3nbtvyO2zpPHP9Gphj2/dG27JJQJq5jAfjhraOVzGQaSvXQNQDc5mfiJlvX58H49jnmXkBjIeeB+PGbSb6JjPLtuzHRM/jD810AHid6DXThfGmGTHrWGbuXxYwE8PIW+flWYxr8REMp8W6pq3ZzKz71I1xPTXatnOXbT2VRM/velt6k20dSRj3krW+VvPbZR4H67rPpKd3az38XcCTtvQu27q32dJBW3o20XNwqpkXIXrPaYxzm25uayyG8cP8Pd62fethUGoeMzDe0O3nNclMT8I43lY6yVyPl+j18i7GfQHGdWSRjGFsLVuizLJjiRp36+GSgnEvhIkeX22WTcN4MJQQvYeDGA6JMpfrMsuXYCgVxo4rlQ2chvEAR2sd0Fo3M1Sc6Bk1HB+Mk7fZlu4ALjPTlldZStQTtvKDGDdxq3nQ/BgXoLZ9LA8hAmww1xHEuDmtMh8T9TbeIOoBvmnmhc1tWt7nKtuy/2LL327L/y1Rz2ixbf32T8S2rPX5my293/wOmvtr5bfY0l395Id7HQefLX0S0TeWp4jeQM22dAew1VZHqw6W92uvS7WtzHO2tH1/t9nyG2zpVluZLQMsG8Hw7HofO+vTPkA5+7rr+ykTxriBrTLNtvRWW9p+7KptafvbkL2+nUSvtw7z2/IANcbbaQTDw91rW24V0dd3a31BjOuu9zWlMTx3bduWld/U69hFev2219VP333ofZ0Gbcs22PLtx+U7tuXesaU32sp81ZbfZavXfqJvk4228pts6R8NcF4/ZUufbqvr/9nWX0v0Httlq4O9/mF67qfGeAhbeW1E7zN7vT4kei+8YduvjUTvqdsx3mwsT9+PYScexnhT6cB4OG/FcIiewLBHvwXSh2xD423ED8O478R4tdlkXlABM229Vlr51ivqJ0SNvWWY220nMkDUO7PkmC7b8nYD22bLtwxH0LwwrJtjp23d1ba0Pb+J6OucdUFa2/2j7bd1AVn19xO96TqIGuy9tvq0m/9pjNd5K38/USPlte3TNtt2/EQvYC9Rg23JOVYd7cZY98r325azbo7exnWrre7Wq65laK38Vts6ttrq2Ilxs1jHxZJ+Gm3r6ep1vq3lLNnILrv5iN7M1nHTGPJA2Ja2buBa23rsD69W2/JhehrhenoaUut8WDKEZQQ+tpX32o5dC8a1Zx1rq/619LyOrW202bZTYcsP0NNA2429dXybbdtpsy1r7Zv9ePltafuD2U/Ph7z9Wgj0yut93UXo6bhY+fa62h0Y+8N5ri2/1pY+1Vbmr7b8/9erjHWOrfNnHdNHbOUsidT6HbCVt+Qc+/6HMZxF6x7ZYKabMGSZaox79HbgD+a+P4hh3N8w7d455rpuMX/fC/zPaBx+IKC1PhdDa2vAOFgXY1xkNbZ8H8YNvwnDgIXNspswDmY9xgkImen7MV6PLANxwCxbR9QAhc18+029GSgyt7cR49WtxSxrvUZqjFfrA2baZaa3Y7xKWg+ccowLzjJE1xC9KD7G0PT85n+fYLwy+oFb6XkzWa/bXqIX+WvmvnQQvYHBeAW3N55ar7WWlGVJGklmOtm2bI5tmRTb/9Zy1r4qonKTBp4x69WFYagtL/UvtnzrdbyN6A3QjnHxWzeXJeeA8dprtRMEzeNleYdgyD/N5v4/TdSYbcQwzAGzjGXImsx6dGC8xfht+2zJABDVW3dinPdO89tuUJ8k2l6xmuhb5X7gl0TPW6u5P0/b9st6GKXb9qUZqDLX0WSu9w9mOmhbXyeGsxA092Mn0TeGNoz2A+shaB3fVPN7Nca5azHLpJrr9BCVNd4wy3SYywTMY/Yts35+jLfNKrP8A2a6y9y25Yy8bW7Hh/EwsjrzKPN/zO1a96bP3He7jBkyj6XlmFmSGhj3lCWdLDS/IxgKgKW//9xcr4UltwWJXosRs8xkos6XIvqwyabnA8uq7xzzGPnM/bHedD6P8Va631y/JWXeiHHvn6SU+iOGHWohKiE9ByxiiCSScbf0Q4VhVH9D1LhYhm08UbliKdHGsAyMRqUsjIPagHECNwPTiWqkAYybeCxRXdWN0biaZW73STO/DMMoa7P8LzAurADG61unue1vYpz8CMbE4VnAOKJvGda+/cb8DmA0TFk66tsYJz8J40QvIKoVt2AYnBaMtwjrjePb5jbAMJDFtvVZx+Rycx37MBqVLW/lM2Z+K4bx7TLX0YzxsHwN40J92Vy2A6NBqtJc7gWMizlkHgfLgLdhGHGrLtbDUWE8fIvN/fo70RvM+rYM9ylEDaT14A4QbShNMbc/laiWW4Jx/l0YUQiWhryRnm0y1gPKehNzmXVX5u8XiHruD9nW32yu3zonlracAhyHcQO7iL5JzcRokL2SqBE5iWhjsiXd3ILRgGc1lmOus9HcpxRgLYahysC4rscQ1fgvt+3TdnNbLvM8LDTr24phvFPN/1Zg6OMejAee5dgEiMoKGphhbiPZdjw95r6WmOXXE70G15j51nG02ig00XYvP3C2eTw+wrg+IhjX2EZzPXXmsbCiTYJmugrDOXCZ5a37artZ3nJUQmb6MXNdlldvPQCeJPr2sx+jPQOiD45cog+QNqIj2VpvedZbZ4t5fGrN7aRg3B+WLao36/MPc/nXzX38K4ateVdr/UVzGy6MawwM2W0rQ2RE9lBVSj0NnIExFKZlrDPpP+TISiuEeGA/B4fKE4yHg/V2NRTH6kiO42g89hH6P14Ber5NWQzlGPRXxoruGer6BqrX4WB5/NbbihvjIb3FXPdbwHKM/awEbtRaNw1lxSPSuAuCIAhHRyLJMoIgCMIQEeMuCIIwChHjLgiCMAoR4y4IgjAKEeMuCIIwChHjLgiCMAoR4y4IgjAK+f8xg7aERb3SxgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Max No.of Movies Relesed = 275\n",
      "Year = 1996\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "count    105.000000\n",
       "mean      86.866667\n",
       "std       92.458522\n",
       "min        1.000000\n",
       "25%       20.000000\n",
       "50%       45.000000\n",
       "75%      147.000000\n",
       "max      275.000000\n",
       "Name: title, dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#visualization libraries\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "#ignore warnings\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "#Extracting the year from the Title\n",
    "mdf['Year'] = mdf['title'].str.extract('.*\\((.*)\\).*',expand = False)\n",
    "\n",
    "#Ploting a Graph with No.of Movies each Year corresponding to its Year\n",
    "plt.plot(mdf.groupby('Year').title.count())\n",
    "plt.show()\n",
    "a=mdf.groupby('Year').title.count()\n",
    "print('Max No.of Movies Relesed =',a.max())\n",
    "for i in a.index:\n",
    "    if a[i] == a.max():\n",
    "        print('Year =',i)\n",
    "a.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b1ed3a23",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>Year</th>\n",
       "      <th>Adventure</th>\n",
       "      <th>Animation</th>\n",
       "      <th>Children</th>\n",
       "      <th>Comedy</th>\n",
       "      <th>Fantasy</th>\n",
       "      <th>Romance</th>\n",
       "      <th>...</th>\n",
       "      <th>Horror</th>\n",
       "      <th>Mystery</th>\n",
       "      <th>Sci-Fi</th>\n",
       "      <th>Documentary</th>\n",
       "      <th>IMAX</th>\n",
       "      <th>War</th>\n",
       "      <th>Musical</th>\n",
       "      <th>Western</th>\n",
       "      <th>Film-Noir</th>\n",
       "      <th>(no genres listed)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
       "      <td>1995</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children|Fantasy</td>\n",
       "      <td>1995</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "      <td>1995</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>1995</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>1995</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9120</th>\n",
       "      <td>162672</td>\n",
       "      <td>Mohenjo Daro (2016)</td>\n",
       "      <td>Adventure|Drama|Romance</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9121</th>\n",
       "      <td>163056</td>\n",
       "      <td>Shin Godzilla (2016)</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9122</th>\n",
       "      <td>163949</td>\n",
       "      <td>The Beatles: Eight Days a Week - The Touring Y...</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>2016</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9123</th>\n",
       "      <td>164977</td>\n",
       "      <td>The Gay Desperado (1936)</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>1936</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9124</th>\n",
       "      <td>164979</td>\n",
       "      <td>Women of '69, Unboxed</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9125 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      movieId                                              title  \\\n",
       "0           1                                   Toy Story (1995)   \n",
       "1           2                                     Jumanji (1995)   \n",
       "2           3                            Grumpier Old Men (1995)   \n",
       "3           4                           Waiting to Exhale (1995)   \n",
       "4           5                 Father of the Bride Part II (1995)   \n",
       "...       ...                                                ...   \n",
       "9120   162672                                Mohenjo Daro (2016)   \n",
       "9121   163056                               Shin Godzilla (2016)   \n",
       "9122   163949  The Beatles: Eight Days a Week - The Touring Y...   \n",
       "9123   164977                           The Gay Desperado (1936)   \n",
       "9124   164979                              Women of '69, Unboxed   \n",
       "\n",
       "                                           genres  Year  Adventure  Animation  \\\n",
       "0     Adventure|Animation|Children|Comedy|Fantasy  1995          1          1   \n",
       "1                      Adventure|Children|Fantasy  1995          1          0   \n",
       "2                                  Comedy|Romance  1995          0          0   \n",
       "3                            Comedy|Drama|Romance  1995          0          0   \n",
       "4                                          Comedy  1995          0          0   \n",
       "...                                           ...   ...        ...        ...   \n",
       "9120                      Adventure|Drama|Romance  2016          1          0   \n",
       "9121              Action|Adventure|Fantasy|Sci-Fi  2016          1          0   \n",
       "9122                                  Documentary  2016          0          0   \n",
       "9123                                       Comedy  1936          0          0   \n",
       "9124                                  Documentary   NaN          0          0   \n",
       "\n",
       "      Children  Comedy  Fantasy  Romance  ...  Horror  Mystery  Sci-Fi  \\\n",
       "0            1       1        1        0  ...       0        0       0   \n",
       "1            1       0        1        0  ...       0        0       0   \n",
       "2            0       1        0        1  ...       0        0       0   \n",
       "3            0       1        0        1  ...       0        0       0   \n",
       "4            0       1        0        0  ...       0        0       0   \n",
       "...        ...     ...      ...      ...  ...     ...      ...     ...   \n",
       "9120         0       0        0        1  ...       0        0       0   \n",
       "9121         0       0        1        0  ...       0        0       1   \n",
       "9122         0       0        0        0  ...       0        0       0   \n",
       "9123         0       1        0        0  ...       0        0       0   \n",
       "9124         0       0        0        0  ...       0        0       0   \n",
       "\n",
       "      Documentary  IMAX  War  Musical  Western  Film-Noir  (no genres listed)  \n",
       "0               0     0    0        0        0          0                   0  \n",
       "1               0     0    0        0        0          0                   0  \n",
       "2               0     0    0        0        0          0                   0  \n",
       "3               0     0    0        0        0          0                   0  \n",
       "4               0     0    0        0        0          0                   0  \n",
       "...           ...   ...  ...      ...      ...        ...                 ...  \n",
       "9120            0     0    0        0        0          0                   0  \n",
       "9121            0     0    0        0        0          0                   0  \n",
       "9122            1     0    0        0        0          0                   0  \n",
       "9123            0     0    0        0        0          0                   0  \n",
       "9124            1     0    0        0        0          0                   0  \n",
       "\n",
       "[9125 rows x 24 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Seperate the Geners Column and Encoding them with One-Hot-Encoding Method.\n",
    "genres=[]\n",
    "for i in range(len(mdf.genres)):\n",
    "    for x in mdf.genres[i].split('|'):\n",
    "        if x not in genres:\n",
    "            genres.append(x)  \n",
    "\n",
    "len(genres)\n",
    "for x in genres:\n",
    "    mdf[x] = 0\n",
    "for i in range(len(mdf.genres)):\n",
    "    for x in mdf.genres[i].split('|'):\n",
    "        mdf[x][i]=1\n",
    "mdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7974aaba",
   "metadata": {},
   "outputs": [],
   "source": [
    "mdf.drop(columns='genres',inplace=True)\n",
    "mdf.sort_index(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "51295269",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Animation    \t\t\t\t447\n",
      "Children    \t\t\t\t583\n",
      "Comedy    \t\t\t\t3315\n",
      "Fantasy    \t\t\t\t654\n",
      "Romance    \t\t\t\t1545\n",
      "Drama    \t\t\t\t4365\n",
      "Action    \t\t\t\t1545\n",
      "Crime    \t\t\t\t1100\n",
      "Thriller    \t\t\t\t1729\n",
      "Horror    \t\t\t\t877\n",
      "Mystery    \t\t\t\t543\n",
      "Sci-Fi    \t\t\t\t792\n",
      "Documentary    \t\t\t\t495\n",
      "IMAX    \t\t\t\t153\n",
      "War    \t\t\t\t367\n",
      "Musical    \t\t\t\t394\n",
      "Western    \t\t\t\t168\n",
      "Film-Noir    \t\t\t\t133\n",
      "(no genres listed)    \t\t\t\t18\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAD4CAYAAAApWAtMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAciklEQVR4nO3de7weVX3v8c+PBMJNCJB9EJLUTWushloiRi4iPQgIAXokakAolaC0FMUKXqqhPecgCD2gpalIrSLhgEgJSMpF4AiRq3JPIARCiAQIJDHAhlxIQq47v/PH7/ewJ5u9s5+dta/h+3698npm5pk1s2bNWvNba2aeHXN3RERESmzV2xkQEZH+T8FERESKKZiIiEgxBRMRESmmYCIiIsUG9nYGNmXIkCHe2NjY29kQEelXpk+f/rq7N/TkPvt0MGlsbGTatGm9nQ0RkX7FzF7q6X3qNpeIiBRTMBERkWIKJiIiUkzBREREiimYiIhIMQUTEREppmAiIiLFFExERKSYgomIiBTr07+Al97VOOG2TqeZd+Ex3ZATEenrNDIREZFiCiYiIlJMwURERIopmIiISDEFExERKaZgIiIixRRMRESkmIKJiIgUUzAREZFiCiYiIlJMwURERIopmIiISDEFExERKVZ3MDGzAWb2hJndmvN7mdkjZjbXzK4zs21y+aCcn5vfN1a2cXYun2NmR3b50YiISK/ozMjkTGB2Zf4iYKK7vx9YApyay08FluTyibkeZjYSOAHYGxgD/NjMBpRlX0RE+oK6gomZDQOOAS7PeQMOBW7IVa4Cxub0sTlPfn9Yrn8sMNnd17j7i8BcYL8uOAYREell9Y5M/g34NrAh53cDlrr7+pxfAAzN6aHAfID8flmu//byNtK8zcxOM7NpZjatqamp/iMREZFe02EwMbO/BF5z9+k9kB/c/TJ3H+3uoxsaGnpilyIiUqie/7b3IODTZnY0sC2wE/BDYLCZDczRxzBgYa6/EBgOLDCzgcDOwBuV5TXVNCIi0o91ODJx97PdfZi7NxIP0O9295OAe4Bxudp44OacviXnye/vdnfP5Sfk2157ASOAR7vsSEREpNfUMzJpz3eAyWZ2PvAEMCmXTwKuNrO5wGIiAOHus8zseuAZYD1whrs3F+xfRET6iE4FE3e/F7g3p1+gjbex3H01cFw76S8ALuhsJkVEpG/TL+BFRKSYgomIiBRTMBERkWIKJiIiUkzBREREiimYiIhIMQUTEREppmAiIiLFFExERKSYgomIiBRTMBERkWIKJiIiUkzBREREiimYiIhIMQUTEREppmAiIiLFFExERKSYgomIiBRTMBERkWIKJiIiUkzBREREiimYiIhIMQUTEREppmAiIiLFFExERKSYgomIiBRTMBERkWIDezsDW7LGCbd1Os28C4/phpyIiHQvjUxERKSYgomIiBRTMBERkWIKJiIiUkzBREREiimYiIhIMQUTEREppmAiIiLFFExERKSYgomIiBTrMJiY2bZm9qiZPWlms8zs3Fy+l5k9YmZzzew6M9smlw/K+bn5fWNlW2fn8jlmdmS3HZWIiPSoekYma4BD3X0fYBQwxswOAC4CJrr7+4ElwKm5/qnAklw+MdfDzEYCJwB7A2OAH5vZgC48FhER6SUdBhMPK3J26/znwKHADbn8KmBsTh+b8+T3h5mZ5fLJ7r7G3V8E5gL7dcVBiIhI76rrmYmZDTCzGcBrwFTgeWCpu6/PVRYAQ3N6KDAfIL9fBuxWXd5Gmuq+TjOzaWY2rampqdMHJCIiPa+uYOLuze4+ChhGjCY+2F0ZcvfL3H20u49uaGjort2IiEgX6tTbXO6+FLgHOBAYbGa1/w9lGLAwpxcCwwHy+52BN6rL20gjIiL9WD1vczWY2eCc3g74FDCbCCrjcrXxwM05fUvOk9/f7e6ey0/It732AkYAj3bRcYiISC+q539a3AO4Kt+82gq43t1vNbNngMlmdj7wBDAp158EXG1mc4HFxBtcuPssM7seeAZYD5zh7s1dezgiItIbOgwm7j4T+Egby1+gjbex3H01cFw727oAuKDz2RQRkb5Mv4AXEZFiCiYiIlJMwURERIopmIiISDEFExERKaZgIiIixRRMRESkmIKJiIgUUzAREZFiCiYiIlJMwURERIopmIiISDEFExERKaZgIiIixRRMRESkmIKJiIgUUzAREZFiCiYiIlJMwURERIopmIiISDEFExERKaZgIiIixRRMRESkmIKJiIgUUzAREZFiCiYiIlJMwURERIopmIiISDEFExERKaZgIiIixRRMRESkmIKJiIgUUzAREZFiCiYiIlJMwURERIopmIiISDEFExERKaZgIiIixRRMRESkWIfBxMyGm9k9ZvaMmc0yszNz+a5mNtXMnsvPXXK5mdklZjbXzGaa2b6VbY3P9Z8zs/Hdd1giItKT6hmZrAe+6e4jgQOAM8xsJDABuMvdRwB35TzAUcCI/Hca8B8QwQc4B9gf2A84pxaARESkf+swmLj7Ind/PKeXA7OBocCxwFW52lXA2Jw+Fvi5h4eBwWa2B3AkMNXdF7v7EmAqMKYrD0ZERHpHp56ZmFkj8BHgEWB3d1+UX70C7J7TQ4H5lWQLcll7y1vv4zQzm2Zm05qamjqTPRER6SV1BxMz2xGYApzl7m9Wv3N3B7wrMuTul7n7aHcf3dDQ0BWbFBGRblZXMDGzrYlAco27/1cufjVvX5Gfr+XyhcDwSvJhuay95SIi0s/V8zaXAZOA2e7+r5WvbgFqb2SNB26uLD853+o6AFiWt8PuAI4ws13ywfsRuUxERPq5gXWscxDwBeApM5uRy/4RuBC43sxOBV4Cjs/vbgeOBuYCbwFfBHD3xWb2PeCxXO88d1/cFQchIiK9q8Ng4u6/A6ydrw9rY30HzmhnW1cAV3QmgyIi0vfpF/AiIlJMwURERIopmIiISDEFExERKaZgIiIixRRMRESkmIKJiIgUq+dHiyL9VuOE2zqdZt6Fx3RDTkS2bBqZiIhIMQUTEREppmAiIiLFFExERKSYgomIiBRTMBERkWIKJiIiUkzBREREiulHi1uwvvCDvb6QBxHpfhqZiIhIMQUTEREppmAiIiLFFExERKSYgomIiBRTMBERkWIKJiIiUkzBREREiimYiIhIMQUTEREppmAiIiLFFExERKSYgomIiBRTMBERkWIKJiIiUkzBREREiimYiIhIMQUTEREppmAiIiLFFExERKTYwN7OgMimNE64rdNp5l14TDfkREQ2pcORiZldYWavmdnTlWW7mtlUM3suP3fJ5WZml5jZXDObaWb7VtKMz/WfM7Px3XM4IiLSG+q5zXUlMKbVsgnAXe4+Argr5wGOAkbkv9OA/4AIPsA5wP7AfsA5tQAkIiL9X4fBxN3vBxa3WnwscFVOXwWMrSz/uYeHgcFmtgdwJDDV3Re7+xJgKu8MUCIi0k9t7gP43d19UU6/Auye00OB+ZX1FuSy9pa/g5mdZmbTzGxaU1PTZmZPRER6UvHbXO7ugHdBXmrbu8zdR7v76IaGhq7arIiIdKPNDSav5u0r8vO1XL4QGF5Zb1gua2+5iIhsATY3mNwC1N7IGg/cXFl+cr7VdQCwLG+H3QEcYWa75IP3I3KZiIhsATr8nYmZXQscAgwxswXEW1kXAteb2anAS8DxufrtwNHAXOAt4IsA7r7YzL4HPJbrnefurR/qi4hIP9VhMHH3E9v56rA21nXgjHa2cwVwRadyJyIi/YJ+AS+yCfoFvkh99Le5RESkmIKJiIgUUzAREZFiemYi0sfpuY30BxqZiIhIMQUTEREppmAiIiLFFExERKSYgomIiBRTMBERkWJ6NbgP0yuhItJfaGQiIiLFFExERKSYbnNtgm4ziYjURyMTEREpppGJSDfS6FbeLTQyERGRYhqZiGzhNDqSnqCRiYiIFFMwERGRYlv0ba7ODu81tBcR2TwamYiISLEtemQiIv2fXiDoHxRMRKRbKRi8O+g2l4iIFFMwERGRYgomIiJSTMFERESKKZiIiEgxBRMRESmmYCIiIsUUTEREpJiCiYiIFNMv4EVki6df4Xc/BRMR2SRdiFUG9dBtLhERKaZgIiIixRRMRESkWI8HEzMbY2ZzzGyumU3o6f2LiEjX69FgYmYDgH8HjgJGAiea2ciezIOIiHS9nn6baz9grru/AGBmk4FjgWd6OB8iIj3m3fA2mLl7z+3MbBwwxt3/Jue/AOzv7l+trHMacFrO/ikwpxuyMgR4vR+n7wt56O/p+0Ie3u3p+0Ie+nv69rzP3Ru6Ybvt6nO/M3H3y4DLunMfZjbN3Uf31/R9IQ/9PX1fyMO7PX1fyEN/T9+X9PQD+IXA8Mr8sFwmIiL9WE8Hk8eAEWa2l5ltA5wA3NLDeRARkS7Wo7e53H29mX0VuAMYAFzh7rN6Mg+p9DZab6fvC3no7+n7Qh7e7en7Qh76e/o+o0cfwIuIyJZJv4AXEZFiCiYiIlKszwYTMxtrZm5mH6xj3cs7+iW9mb3XzCab2fNmNt3Mbjez08zs1so6jWb2V5VtHmdml5jZPDMb2Ub6H5nZtzpxTM1mNqPyr7HONGvNbLaZ/crMvmJme9a7zzryMsvMnjSzb5rZZtWHes+VmZ1lZttX5m83s8HtnJsPtJH+wfzcrVKGr5jZwpxeamZ1/QDWzE43s5PNbIWZXZm/gcLMnjWz6zaRzs3sF5X5gWbWVK1Hde6/Of+s0EvV8jezf8pzMjOPaf920o82s0va+e7e3HatjMbV00Yy7Yr8bMxjPb/y3RAzW5fn6ujK8hkWP0Cubuf5VuU038weqcy7mU2rzNdVjmY20czOqszfkcd2ebbRi83s22Z2n8Vf3KimfUf7M7MXzeyBnH66o/LpIG9uZhdX5r9lZt/tIM3pZnZyJ/dzipldWk96MzvEzD7eme1nunl5vrcxs/vNrOPn6+7eJ/8B1wG/Bc7tgm0Z8BBwemXZPsD/Am6tLDukOl9ZPo94E611+iuAb7Wx/sB28rFiM/K+Ivc/BLgKeAEY3QVlsqIy/d+A37RV1u0dy+acq9px1HluDq4nD8B3a+cAaASeriO/AyvTK4ArgXE5/yxwXQfnYwawXc7/j5y/td7yqmznFODSSvlPyrIYlOsMAfbcjPNx7+bWkVq9yLJ8AXii8t2X81h/A1yayz4EPEW84r9Dq3VXAIOBg4C3gIcq3zcDv6+U41HVcmzveIFxwPU5vRUwvdV2HwJ+AJy5qTpfWXYl8Rc46qo7mzoHwGrgxVodB74FfLeddQdszj7yu1Nq5V9HXt9uH52sB/Mqx3EOcFKHaTanwnX3P2DHrJwfAObkskOykdxANPhraHmB4O3GkxX4B8CsrPT7ZSVdBXw612kkLn6/B5YD9+Q2XweW5fpzgb8DbgVeAR4FbsrpVcDjwLVZ6EuycbwFvAz8OCv568AaolGekeusAH6a23sh067MtN/J/I3K9ZYAnvsblttsJv4qwJwsozdzvWeIN0PuBSZmPtYAzwHz83Mx0JD7WJ+V/1niLw78MfBG7vdXWQ7LgAfzfPwWWJp5eRH4eJbjoszTGmAtcATwQObpeWAm8C+ZZkMe68OVCjsWuB/4BvB0/juLON+P5vlZRpzP9cB2+d19wM25n7uBk/K8rQYm5/r3ADcSHYE3iaA3Dfgm2cjYRDABvpD7XgW8Cnww11+bx7MSWJDbfAl4JPPzSeDhPPYVwPuB43L9pjymNXmOmjLff5/LX878vpHl8wTRiVhcSbeUqIdPV+r8X+f06Px+dB7jVXnuVgP/AHyfuPj/Gtg603w0y3N6lvEexLldSdSrp4m28gTRAVteyfcNwI+A1zK/DxJ/uWJPok7Py+0sBv6Q52kQUa9fJepPbfvfIdrbvVl+84FvZh6fzjz9SZ6PJ4k6fR9wJ/A74MA89hezTN6ipQ7flftcn/m5DfirXLYhz9sLQEOezyWZ9nVa2tkqoq5PAc7Ncnwzz8nk3OdlRF24IctkJtFhasz0r2WaM4i2Mp+oQ7/M8/RvWSavZNp/2VQwYePO1NfyfM3M/DTmdhbmuTo4j28KUcceAw7KtLtlOc4CLs9jqAWTfYDb+2swOQmYlNMPEpX9kDy5w4geyUPAJ9oIJg4cldM3ZgGdBVwNzMjl2wPb5jaXExVzqyzIByrbrAWTN7Li/CQ/j8oTtii/ezD/rSAazRFEpd2baDiLiYZYu+i+lXm7D5heOWFrs+L931x3VFZAJ3p6t2Xa0bn/hUTjOi8r4dWZv4uIhvuN/PzTzMcy4B9zf6uyUm1HNNTdiIboua0FwCXA/yReIZ+S5TiECLQ7A4cRDfHGLL91xAXpy0RAuTn39b7cxjzgs8CUSjA5m+gYPAXsQASuWcDf5LGuB0bl+uuAv87ztpS46H0vj+tcovE0A9fk+i8B5+X0Q8CSSh37LhFMmvP8vEhLMLouj/MN4G9z/V8Cs2m5SL1J1KMZxIX5VeJ193OyzP87UQ9mZ3k+RdSRHxO99bdHJpU8rSYuLk1EZ2E+8LHM46tEELwJOJy4eN6R52EdsHurYDKHuJCsAHYnLhyr2bhtjAW2znw1VPJwBS3B5CaiM3ByltMpbDwymUP80dYxRCfkcFrOb1Oev+nA6URge4gYqTRnOU0h2uJioi7UgsljVHrUtASTz2XZ/xHRPs8i6sBM4FQiEDjw0Ux3EdGuPpjLax2ARVm+64gL8JRc9p/ABUQd/xDRXlfQ0ql8I/f5KyLwDyICZy1IHU20iQ8RwXs+8Ilcvwn4NvAl4HaiE3UBUQ+/Q9TBSVmmtY7y4E4Ekz/QMqod3Pr7nP9PWq6bfwTMzulLgP+d08dkWdWCyQCgqaPrdp/7cyrpROCHOT05528FHnX3BRD3aYnK9btWadcSjRuiAa8hTu7ruT5EA7qUqNQAI9x9g5nNJQq4Le8lKsON7v7/zOwyooEPJC7ynyIuoCOBP8s01+TnIKLSriIq6Zvu/hkzawJ2NbNVlXztBewPvOzuMyx+3OlEg55ONPZRmZ/7c/rTRMBYnMd7CxGcjiUq/0J3X2NmzxK97X8GtgE+QvQAhwMjKuU3g7hIPZDHZUQj+QBRkYcSQahWfh8mRmrrgfcQF5SfEBc/iPL/JdHozuedz+r2zHJdCWBm/wX8ORHkh7j7jMp2GomLwWPuvsjMmvMY78x1FhFBCSJA/q2ZfZoYHawysx3dfUVl36uyvG519xuyjAAOAHYBzjCzM7K89spj3EAEsM8TF4V9gUXu3mxmvwT+yd3vy+cIFwNfybI8Pj83updfMZC4cO1C9CB3Js7hWuIi/HHg3939N2b2M6JnfiIR3Jpbbesk4C+Bde7+qpmtJMq92jYaiXrzZ8BUM4Oog8Mq25lIXGhW5rG/zcxGE+1qOXFhH0N0bmrndxFRHxqJevYyEVA+RdTpx4AjiTZRq/s17T2/eCq3/yPinEwg6uPJRJ3ZkOtNyuPZKfO3Mvc5kKi7tVHge3P9KUS9PZxoYzsA1xPn6jXiAv8y0dYa8zi3J0aj5H4H0DIaG0fUrVdy/QOJc3sdcdGfSFwTPkCcu2VEUL2G6BBMyudHnXkWNxO4xsxuIjoBbTkcGJllA7CTme0I/AXR0cPdbzOzJbUVsl6vNbP3uPvy9nbe5x7Am9muwKHA5WY2j4juxxMXtDWVVZtp+0eX6zzDKXGC1xA93Y9W1v86EQhOJS7I21TWN95pHVEB21PLV+1CMwBY7u6j3H0U0Wi+Uj3M/NyOqLg7uft2RE+61hjW5ueqXOentJTBGKKyX030dL9K9IB/RpzTNUTv4sZc77F8gLYKWGxmX8/1/tzd9yFGTXsRZbou97uSljI+Kfd9QB7Pq0TF3z6Xb030tjcQF0JoabgQPcd7aLnNsW2lLJ5l4z+xU/UWG59zKtusLvfK/LrKOkaMcEcRPdajWgWSTbHc1scy/T7ExaBmGhHgr8352oV2IdBsZocSt1jvA3D304me52CiU7Dxzsz+OCfHAlOJi9HnaQnYq1oluZO4iH6RqL/3mtnlbFy2sHE5rW/VNgbmcc6q1NVV7n5EJc1KWkYWq1tt+0Sixz+buBitAn4ObGvxMsceWR5G9M4fJi6anyXq30FEp2M7oi5vU9n2Kja+Pm0L4O6/J3rb2+Y2x+Z2dyKCyTPAhsrxfIkIxN8jzuftRKAaStu2ynWvJW4LLSTaQTMt52AgUedrfkLcpqsF9OeIa8sObNwOIMqTLJOpmXaiu48kAtabRL25gegM/Jr6HUP8Fx/70tLm2zq+A2rl4+5D62wTg3jn+X/HhvuaccDV7v4+d2909+FEIzy4YJt3E4VRK9yd8/MTRKWq9RTX8s7GCFGIb2UexprZETk/nOjZ1C6o+xIV7g/ANmZ2YKY34plEa3OJB3HrzOyTxO0giN7O4JweQDS29cQ90Z2JyjaQlgv/GOJWxbjK/oYTI4zZmWbH/O5m4r63u/sKizewDiBuiV3aRh7J9C8TvfxaPt9DNP4NxAW3kZZnGE8S96NrhhCNcjkwvtW2f0uU2ZfMbHsz24H4MztdMWqeQ5RVzTveENuEh4mLw9dy/otE0K2pvXDwVDWRuy8jgu31xGjsJOA+M/sT4vh/Soxwt8r595hZA3FRmUHcVvxtpjuSrEtE/ZpKjJQGEReZWk/+AeAfPP4a9+c6cYwQZdRQqauY2d6t1rmYuJVa6+isI87/8cSodCrx3O1YYoRCfi4kzv3TxO2dAcTFdG/iItucx/1/iGBUtYhoT5jZvrVjtXiT8X5iJP088BF3X0zUlw8Tt7W2MrPP53YGE3V/59znQuI2oNFSfgOJAPcWEaQPB3D3pXmstSA3qpK/Z4lRx8fc/SdsHAjXEef/1MqyB2kZMZ9EnOODgF3zuHYg2vl2wM7ufjvR6d2HOmTwHu7u9xC3zGptfjlxrmruJJ7P1dLVjul+ss2a2VG0dAoxs92A1919HZvQF4PJiUSPumpKLt8s2Rv7DDDAzJ4nIn7tT93vSEtv4Q1gg5k9ycZDfYge0AjiNtaviSBSe07yDeKkX0Jc9JuJ3txFua3RRK+ptb8Dts7bXDcSlRziFt/2ZjaLqOhL8zieIEZZu+V+z8n1v577eCzntwJ+Qdz7Phi4JBsGRE95IGBmtpp4yL2e6Gmf204RXkP0mM4jzsUa4rbRgVlmd5rZTOIZwRyiR/oH4P15/M8SF4zBxG223Vtt/0jidsmS3N5aNr5wb66bgD0zbx+jExdad28iLgbn5Pm5mJZRCMTzlzZfzSWC+mCizowiyu0HxDmaTNSbQUQP+3giUL8KnEncAvpnIuieQlwgf0F0fL5M3AKdkdtrJp6vnQv8MF+1bX27q6PjXJv5rdXV7YjbadV1ZhG3Z2oWZD52JerX94nz+6/E7bkBxAXtytze/cRzo+8T53U1ccH9GfHc5zO01N2au4lbwLOIkffvc/mHiQfE7yPegjs/l68k3taaTYxErszzdi1xYbyYKPPTiRcklhPlO4C4ZXUY0an5GnFb6gSL18xnEgFxPBEwaqOTx4lz9ni+Utz6df2LM13N3xPXmvuINvDl3P/niOvHQ8RIf3vg1qyzv8vv6jEA+IWZPUWUca3N/wr4TL4KfXAe32iLV8+fyfKAqEN/keX9WaJO1nySuJW/SfpzKv1M9kAeB45z9+c2I/1oYlhdMtKTTeiJMrb4vc5TwL45GuoXeiLfOZL5urt/oQu29fYzNov/ZnwPdz+zdLv9ST7DnJC3GNvVF0cm0g6LH53NBe7azEAygRhZnN3VeZPQE2VsZocTty9/1M8CSY/k290fB+5p/aPFzXRM9uqfJkZh53eUYEuSLwDd1FEgAY1MRESkC2hkIiIixRRMRESkmIKJiIgUUzAREZFiCiYiIlLs/wNzbHMdv9dkUQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "x={}\n",
    "for i in mdf.columns[4:23]:\n",
    "    x[i]=mdf[i].value_counts()[1]\n",
    "    print(\"{}    \\t\\t\\t\\t{}\".format(i,x[i]))\n",
    "\n",
    "plt.bar(height=x.values(),x=x.keys())\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "589b0b8d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>rating</th>\n",
       "      <th>userId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>321</th>\n",
       "      <td>356</td>\n",
       "      <td>Forrest Gump (1994)</td>\n",
       "      <td>4.054252</td>\n",
       "      <td>341.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>296</td>\n",
       "      <td>Pulp Fiction (1994)</td>\n",
       "      <td>4.256173</td>\n",
       "      <td>324.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>284</th>\n",
       "      <td>318</td>\n",
       "      <td>Shawshank Redemption, The (1994)</td>\n",
       "      <td>4.487138</td>\n",
       "      <td>311.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>525</th>\n",
       "      <td>593</td>\n",
       "      <td>Silence of the Lambs, The (1991)</td>\n",
       "      <td>4.138158</td>\n",
       "      <td>304.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>232</th>\n",
       "      <td>260</td>\n",
       "      <td>Star Wars: Episode IV - A New Hope (1977)</td>\n",
       "      <td>4.221649</td>\n",
       "      <td>291.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9112</th>\n",
       "      <td>161336</td>\n",
       "      <td>Author: The JT LeRoy Story (2016)</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9113</th>\n",
       "      <td>161582</td>\n",
       "      <td>Hell or High Water (2016)</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9121</th>\n",
       "      <td>163056</td>\n",
       "      <td>Shin Godzilla (2016)</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9123</th>\n",
       "      <td>164977</td>\n",
       "      <td>The Gay Desperado (1936)</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9124</th>\n",
       "      <td>164979</td>\n",
       "      <td>Women of '69, Unboxed</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9125 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      movieId                                      title    rating  userId\n",
       "321       356                        Forrest Gump (1994)  4.054252   341.0\n",
       "266       296                        Pulp Fiction (1994)  4.256173   324.0\n",
       "284       318           Shawshank Redemption, The (1994)  4.487138   311.0\n",
       "525       593           Silence of the Lambs, The (1991)  4.138158   304.0\n",
       "232       260  Star Wars: Episode IV - A New Hope (1977)  4.221649   291.0\n",
       "...       ...                                        ...       ...     ...\n",
       "9112   161336          Author: The JT LeRoy Story (2016)         0     NaN\n",
       "9113   161582                  Hell or High Water (2016)         0     NaN\n",
       "9121   163056                       Shin Godzilla (2016)         0     NaN\n",
       "9123   164977                   The Gay Desperado (1936)         0     NaN\n",
       "9124   164979                      Women of '69, Unboxed         0     NaN\n",
       "\n",
       "[9125 rows x 4 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Add a Column `rating` in movie DF and assign them with the Mean Movie Rating for that Movie.\n",
    "x=rdf.groupby('movieId').rating.mean()\n",
    "mdf = pd.merge(mdf,x,how='outer',on='movieId')\n",
    "mdf['rating'].fillna('0',inplace=True)\n",
    "# Now Lets group all the ratings with respect to movieId and count the no of Users\n",
    "x = rdf.groupby('movieId',as_index=False).userId.count()\n",
    "x.sort_values('userId',ascending=False,inplace=True)\n",
    "y = pd.merge(mdf,x,how='outer',on='movieId')\n",
    "\n",
    "y.drop(columns=[i for i in mdf.columns[2:23]],inplace=True)\n",
    "\n",
    "y.sort_values(['userId','rating'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d1356824",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>671.000000</td>\n",
       "      <td>671.000000</td>\n",
       "      <td>671.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>336.000000</td>\n",
       "      <td>149.037258</td>\n",
       "      <td>3.657587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>193.845299</td>\n",
       "      <td>231.226948</td>\n",
       "      <td>0.471339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>1.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>168.500000</td>\n",
       "      <td>37.000000</td>\n",
       "      <td>3.396193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>336.000000</td>\n",
       "      <td>71.000000</td>\n",
       "      <td>3.675000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>503.500000</td>\n",
       "      <td>161.000000</td>\n",
       "      <td>3.984026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>671.000000</td>\n",
       "      <td>2391.000000</td>\n",
       "      <td>4.948718</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           userId      movieId      rating\n",
       "count  671.000000   671.000000  671.000000\n",
       "mean   336.000000   149.037258    3.657587\n",
       "std    193.845299   231.226948    0.471339\n",
       "min      1.000000    20.000000    1.333333\n",
       "25%    168.500000    37.000000    3.396193\n",
       "50%    336.000000    71.000000    3.675000\n",
       "75%    503.500000   161.000000    3.984026\n",
       "max    671.000000  2391.000000    4.948718"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Find the user with highest no.of. movie ratings and that users mean rating. \n",
    "x = rdf.groupby('userId',as_index=False).movieId.count()\n",
    "y = rdf.groupby('userId',as_index=False).rating.mean()\n",
    "x = pd.merge(x,y,how='outer',on='userId')\n",
    "x.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fa8ebfee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>546</th>\n",
       "      <td>547</td>\n",
       "      <td>2391</td>\n",
       "      <td>3.366792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>563</th>\n",
       "      <td>564</td>\n",
       "      <td>1868</td>\n",
       "      <td>3.552463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>623</th>\n",
       "      <td>624</td>\n",
       "      <td>1735</td>\n",
       "      <td>2.894236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>1700</td>\n",
       "      <td>2.621765</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>73</td>\n",
       "      <td>1610</td>\n",
       "      <td>3.374224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>296</td>\n",
       "      <td>20</td>\n",
       "      <td>3.975000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>288</th>\n",
       "      <td>289</td>\n",
       "      <td>20</td>\n",
       "      <td>3.675000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>248</th>\n",
       "      <td>249</td>\n",
       "      <td>20</td>\n",
       "      <td>3.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220</th>\n",
       "      <td>221</td>\n",
       "      <td>20</td>\n",
       "      <td>2.775000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>20</td>\n",
       "      <td>2.550000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>671 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     userId  movieId    rating\n",
       "546     547     2391  3.366792\n",
       "563     564     1868  3.552463\n",
       "623     624     1735  2.894236\n",
       "14       15     1700  2.621765\n",
       "72       73     1610  3.374224\n",
       "..      ...      ...       ...\n",
       "295     296       20  3.975000\n",
       "288     289       20  3.675000\n",
       "248     249       20  3.600000\n",
       "220     221       20  2.775000\n",
       "0         1       20  2.550000\n",
       "\n",
       "[671 rows x 3 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# From below we found that userId - 546 has given the Highest no.of Ratings with \n",
    "# 2391 and an Average of 3.37 Stars followed by userId-564 with 1868 Ratings and \n",
    "# an Average of 3.55 Stars and so on …\n",
    "x.sort_values('movieId',ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ff9e1b4",
   "metadata": {},
   "source": [
    "# Using the Surprise recommender library api"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "11b04054",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load and Procress all of the data using Surprise library tools.\n",
    "\n",
    "reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)\n",
    "ratingsDataset = Dataset.load_from_file(ratingsFile, reader=reader)\n",
    "\n",
    "movieID_to_name = {}\n",
    "name_to_movieID = {}\n",
    "\n",
    "ratings = defaultdict(int)\n",
    "rankings = defaultdict(int)\n",
    "genres = defaultdict(list)\n",
    "\n",
    "genreIDs = {}\n",
    "maxGenreID = 0\n",
    "rank = 1\n",
    "\n",
    "with open(moviesFile, newline='') as csvfile:\n",
    "    movieReader = csv.reader(csvfile)\n",
    "    next(movieReader)  #Skip header line\n",
    "    for row in movieReader:\n",
    "        movieID = int(row[0])\n",
    "        movieName = row[1]\n",
    "        movieID_to_name[movieID] = movieName\n",
    "        name_to_movieID[movieName] = movieID\n",
    "\n",
    "with open(ratingsFile, newline='') as csvfile:\n",
    "    ratingReader = csv.reader(csvfile)\n",
    "    next(ratingReader)\n",
    "    for row in ratingReader:\n",
    "        movieID = int(row[1])\n",
    "        ratings[movieID] += 1\n",
    "    \n",
    "with open(moviesFile, newline='') as csvfile:\n",
    "    movieReader = csv.reader(csvfile)\n",
    "    next(movieReader)  #Skip header line\n",
    "    for row in movieReader:\n",
    "        movieID = int(row[0])\n",
    "        genreList = row[2].split('|')\n",
    "        genreIDList = []\n",
    "        for genre in genreList:\n",
    "            if genre in genreIDs:\n",
    "                genreID = genreIDs[genre]\n",
    "            else:\n",
    "                genreID = maxGenreID\n",
    "                genreIDs[genre] = genreID\n",
    "                maxGenreID += 1\n",
    "            genreIDList.append(genreID)\n",
    "        genres[movieID] = genreIDList\n",
    "\n",
    "# Build rankings dictionary.\n",
    "for movieID, ratingCount in sorted(ratings.items(), key=lambda x: x[1], reverse=True):\n",
    "    rankings[movieID] = rank\n",
    "    rank += 1\n",
    "\n",
    "# Convert integer-encoded genre lists to bitfields that we can treat as vectors\n",
    "for (movieID, genreIDList) in genres.items():\n",
    "    bitfield = [0] * maxGenreID\n",
    "    for genreID in genreIDList:\n",
    "         bitfield[genreID] = 1\n",
    "    genres[movieID] = bitfield\n",
    "            \n",
    "def getUserRatings(user):\n",
    "    userRatings = []\n",
    "    hitUser = False\n",
    "    with open(ratingsFile, newline='') as csvfile:\n",
    "        ratingReader = csv.reader(csvfile)\n",
    "        next(ratingReader)\n",
    "        for row in ratingReader:\n",
    "            userID = int(row[0])\n",
    "            if (user == userID):\n",
    "                movieID = int(row[1])\n",
    "                rating = float(row[2])\n",
    "                userRatings.append((movieID, rating))\n",
    "                hitUser = True\n",
    "            if (hitUser and (user != userID)):\n",
    "                break\n",
    "    return userRatings\n",
    "        \n",
    "def BuildAntiTestSetForUser(testSubject, trainset):\n",
    "    fill = trainset.global_mean\n",
    "    anti_testset = []\n",
    "    u = trainset.to_inner_uid(str(testSubject))\n",
    "    user_items = set([j for (j, _) in trainset.ur[u]])\n",
    "    anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for\n",
    "                             i in trainset.all_items() if\n",
    "                             i not in user_items]\n",
    "    return anti_testset\n",
    "\n",
    "def getMovieName(movieID):\n",
    "    if movieID in movieID_to_name:\n",
    "        return movieID_to_name[movieID]\n",
    "    else:\n",
    "        return \"\"\n",
    "        \n",
    "def getMovieID(movieName):\n",
    "    if movieName in name_to_movieID:\n",
    "        return name_to_movieID[movieName]\n",
    "    else:\n",
    "        return 0  \n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "806336c7",
   "metadata": {},
   "source": [
    "# Simple Collaborative Filtering\n",
    "![alt text](collabf.png \"Simple Collaborative filtering\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d75b4a7b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "James Dean Story, The (1957) 10.0\n",
      "Get Real (1998) 9.987241120712646\n",
      "Kiss of Death (1995) 9.966881877751941\n",
      "Set It Off (1996) 9.963732215657119\n",
      "How Green Was My Valley (1941) 9.943984081065269\n",
      "Amos & Andrew (1993) 9.93973694500253\n",
      "My Crazy Life (Mi vida loca) (1993) 9.938290487546041\n",
      "Grace of My Heart (1996) 9.926255896645218\n",
      "Fanny and Alexander (Fanny och Alexander) (1982) 9.925699671455906\n",
      "Wild Reeds (Les roseaux sauvages) (1994) 9.916226404418774\n",
      "Edge of Seventeen (1998) 9.913028764691676\n"
     ]
    }
   ],
   "source": [
    "# Simple Item Based Collaborative Filtering -- the algorithm that Amazon made famous in 2003.\n",
    "\n",
    "from surprise import KNNBasic\n",
    "import heapq\n",
    "from collections import defaultdict\n",
    "from operator import itemgetter\n",
    "\n",
    "testSubject = '85'\n",
    "k = 10\n",
    "trainSet = ratingsDataset.build_full_trainset()\n",
    "sim_options = {'name': 'cosine','user_based': False}\n",
    "model = KNNBasic(sim_options=sim_options)\n",
    "model.fit(trainSet)\n",
    "simsMatrix = model.compute_similarities()\n",
    "testUserInnerID = trainSet.to_inner_uid(testSubject)\n",
    "# Get the top K items we rated\n",
    "testUserRatings = trainSet.ur[testUserInnerID]\n",
    "kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])\n",
    "# Get similar items to stuff we liked (weighted by rating)\n",
    "candidates = defaultdict(float)\n",
    "\n",
    "for itemID, rating in kNeighbors:\n",
    "    similarityRow = simsMatrix[itemID]\n",
    "    for innerID, score in enumerate(similarityRow):\n",
    "        candidates[innerID] += score * (rating / 5.0)\n",
    "\n",
    "#  Build a dictionary of stuff the user has already seen\n",
    "watched = {}\n",
    "\n",
    "for itemID, rating in trainSet.ur[testUserInnerID]:\n",
    "    watched[itemID] = 1\n",
    "\n",
    "# Get top-rated items from similar users:\n",
    "pos = 0\n",
    "\n",
    "for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):\n",
    "    if not itemID in watched:\n",
    "        movieID = trainSet.to_raw_iid(itemID)\n",
    "        print(getMovieName(int(movieID)), ratingSum)\n",
    "        pos += 1\n",
    "        if (pos > 10):\n",
    "            break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "16f69db3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Inception (2010) 3.3\n",
      "Star Wars: Episode V - The Empire Strikes Back (1980) 2.4\n",
      "Bourne Identity, The (1988) 2.0\n",
      "Crouching Tiger, Hidden Dragon (Wo hu cang long) (2000) 2.0\n",
      "Dark Knight, The (2008) 2.0\n",
      "Good, the Bad and the Ugly, The (Buono, il brutto, il cattivo, Il) (1966) 1.9\n",
      "Departed, The (2006) 1.9\n",
      "Dark Knight Rises, The (2012) 1.9\n",
      "Back to the Future (1985) 1.9\n",
      "Gravity (2013) 1.8\n",
      "Fight Club (1999) 1.8\n"
     ]
    }
   ],
   "source": [
    "# Simple User-based collaborative filtering\n",
    "from surprise import KNNBasic\n",
    "import heapq\n",
    "from collections import defaultdict\n",
    "from operator import itemgetter\n",
    "\n",
    "testSubject = '85'\n",
    "k = 10\n",
    "trainSet = ratingsDataset.build_full_trainset()\n",
    "sim_options = {'name': 'cosine','user_based': True}\n",
    "model = KNNBasic(sim_options=sim_options)\n",
    "model.fit(trainSet)\n",
    "simsMatrix = model.compute_similarities()\n",
    "# Get top N similar users to our test subject\n",
    "# (Alternate approach would be to select users up to some similarity threshold - try it!)\n",
    "testUserInnerID = trainSet.to_inner_uid(testSubject)\n",
    "similarityRow = simsMatrix[testUserInnerID]\n",
    "similarUsers = []\n",
    "for innerID, score in enumerate(similarityRow):\n",
    "    if (innerID != testUserInnerID):\n",
    "        similarUsers.append( (innerID, score) )\n",
    "\n",
    "kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1])\n",
    "# Get the stuff they rated, and add up ratings for each item, weighted by user similarity\n",
    "candidates = defaultdict(float)\n",
    "for similarUser in kNeighbors:\n",
    "    innerID = similarUser[0]\n",
    "    userSimilarityScore = similarUser[1]\n",
    "    theirRatings = trainSet.ur[innerID]\n",
    "    for rating in theirRatings:\n",
    "        candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore\n",
    "\n",
    "# Build a dictionary of stuff the user has already seen\n",
    "watched = {}\n",
    "for itemID, rating in trainSet.ur[testUserInnerID]:\n",
    "    watched[itemID] = 1\n",
    "\n",
    "# Get top-rated items from similar users:\n",
    "pos = 0\n",
    "for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):\n",
    "    if not itemID in watched:\n",
    "        movieID = trainSet.to_raw_iid(itemID)\n",
    "        print(getMovieName(int(movieID)), ratingSum)\n",
    "        pos += 1\n",
    "        if (pos > 10):\n",
    "            break\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "12638d74",
   "metadata": {},
   "source": [
    "# Evaluation of Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "807914e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Helper class to evalute Recommendation Systems Quality by multiple poplular measures.\n",
    "# As you can see there is no shortage on ways that quality is measured in this field.\n",
    "import itertools\n",
    "from surprise import accuracy\n",
    "from collections import defaultdict\n",
    "\n",
    "class RecommenderMetrics:\n",
    "\n",
    "    # Mean Average Error\n",
    "    def MAE(predictions):\n",
    "        return accuracy.mae(predictions, verbose=False)\n",
    "    \n",
    "    # Root Mean Squared error\n",
    "    def RMSE(predictions):\n",
    "        return accuracy.rmse(predictions, verbose=False)\n",
    "\n",
    "    # Not a metric. It s the top-N predictions for an algorithm.\n",
    "    def GetTopN(predictions, n=10, minimumRating=4.0):\n",
    "        topN = defaultdict(list)\n",
    "        for userID, movieID, actualRating, estimatedRating, _ in predictions:\n",
    "            if (estimatedRating >= minimumRating):\n",
    "                topN[int(userID)].append((int(movieID), estimatedRating))\n",
    "        for userID, ratings in topN.items():\n",
    "            ratings.sort(key=lambda x: x[1], reverse=True)\n",
    "            topN[int(userID)] = ratings[:n]\n",
    "        return topN\n",
    "\n",
    "    # How good is this top-10 list?\n",
    "    # - Find all items in this user’s history in the training data.\n",
    "    # - Intentionally remove one of these items ( Leave-One-Out cross-validation).\n",
    "    # - Use all other items to feed the recommender and ask for top 10 recommendations.\n",
    "    # - If the removed item appear in the top 10 recommendations, it is a hit. If not, it’s not a hit.\n",
    "    def HitRate(topNPredicted, leftOutPredictions):\n",
    "        hits = 0\n",
    "        total = 0\n",
    "        # For each left-out rating\n",
    "        for leftOut in leftOutPredictions:\n",
    "            userID = leftOut[0]\n",
    "            leftOutMovieID = leftOut[1]\n",
    "            # Is it in the predicted top 10 for this user?\n",
    "            hit = False\n",
    "            for movieID, predictedRating in topNPredicted[int(userID)]:\n",
    "                if (int(leftOutMovieID) == int(movieID)):\n",
    "                    hit = True\n",
    "                    break\n",
    "            if (hit) :\n",
    "                hits += 1\n",
    "            total += 1\n",
    "        # Compute overall precision\n",
    "        return hits/total\n",
    "    \n",
    "    # Normally you set the rating cutoff. When you do the quantifies how good\n",
    "    # the results are based on rating preference.\n",
    "    def CumulativeHitRate(topNPredicted, leftOutPredictions, ratingCutoff=0):\n",
    "        hits = 0\n",
    "        total = 0\n",
    "        # For each left-out rating\n",
    "        for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:\n",
    "            # Only look at ability to recommend things the users actually liked...\n",
    "            if (actualRating >= ratingCutoff):\n",
    "                # Is it in the predicted top 10 for this user?\n",
    "                hit = False\n",
    "                for movieID, predictedRating in topNPredicted[int(userID)]:\n",
    "                    if (int(leftOutMovieID) == movieID):\n",
    "                        hit = True\n",
    "                        break\n",
    "                if (hit) :\n",
    "                    hits += 1\n",
    "                total += 1\n",
    "        # Compute overall precision\n",
    "        return hits/total\n",
    "    \n",
    "    # Determine the hit rat by each rating grade.\n",
    "    def RatingHitRate(topNPredicted, leftOutPredictions):\n",
    "        hits = defaultdict(float)\n",
    "        total = defaultdict(float)\n",
    "        # For each left-out rating\n",
    "        for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:\n",
    "            # Is it in the predicted top N for this user?\n",
    "            hit = False\n",
    "            for movieID, predictedRating in topNPredicted[int(userID)]:\n",
    "                if (int(leftOutMovieID) == movieID):\n",
    "                    hit = True\n",
    "                    break\n",
    "            if (hit) :\n",
    "                hits[actualRating] += 1\n",
    "            total[actualRating] += 1\n",
    "        # Compute overall precision\n",
    "        for rating in sorted(hits.keys()):\n",
    "            print (rating, hits[rating] / total[rating])\n",
    "    \n",
    "    # Some argue that this is one of the best ways to evaluate a recommender system.\n",
    "    # It is similar to reciprical rank -- the average of the first relevant hit for each prediction.\n",
    "    def AverageReciprocalHitRank(topNPredicted, leftOutPredictions):\n",
    "        summation = 0\n",
    "        total = 0\n",
    "        # For each left-out rating\n",
    "        for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:\n",
    "            # Is it in the predicted top N for this user?\n",
    "            hitRank = 0\n",
    "            rank = 0\n",
    "            for movieID, predictedRating in topNPredicted[int(userID)]:\n",
    "                rank = rank + 1\n",
    "                if (int(leftOutMovieID) == movieID):\n",
    "                    hitRank = rank\n",
    "                    break\n",
    "            if (hitRank > 0) :\n",
    "                summation += 1.0 / hitRank\n",
    "            total += 1\n",
    "        return summation / total\n",
    "\n",
    "    # What percentage of users have at least one \"good\" recommendation?\n",
    "    def UserCoverage(topNPredicted, numUsers, ratingThreshold=0):\n",
    "        hits = 0\n",
    "        for userID in topNPredicted.keys():\n",
    "            hit = False\n",
    "            for movieID, predictedRating in topNPredicted[userID]:\n",
    "                if (predictedRating >= ratingThreshold):\n",
    "                    hit = True\n",
    "                    break\n",
    "            if (hit):\n",
    "                hits += 1\n",
    "        return hits / numUsers\n",
    "\n",
    "    # A somewhat infamous issue in recommendations is that over time, they do a poor\n",
    "    # job of recommending \"different\" things than what you have already selected.\n",
    "    def Diversity(topNPredicted, simsAlgo):\n",
    "        n = 0\n",
    "        total = 0\n",
    "        simsMatrix = simsAlgo.compute_similarities()\n",
    "        for userID in topNPredicted.keys():\n",
    "            pairs = itertools.combinations(topNPredicted[userID], 2)\n",
    "            for pair in pairs:\n",
    "                movie1 = pair[0][0]\n",
    "                movie2 = pair[1][0]\n",
    "                innerID1 = simsAlgo.trainset.to_inner_iid(str(movie1))\n",
    "                innerID2 = simsAlgo.trainset.to_inner_iid(str(movie2))\n",
    "                similarity = simsMatrix[innerID1][innerID2]\n",
    "                total += similarity\n",
    "                n += 1\n",
    "        S = total / n\n",
    "        return (1-S)\n",
    "\n",
    "    # Closely related to the diversity problem -- does the algorithm recommend really\n",
    "    # \"differet\" things or not? For example if you watch a bunch of horror movies, will\n",
    "    # you only see other horror movies in the list?\n",
    "    def Novelty(topNPredicted, rankings):\n",
    "        n = 0\n",
    "        total = 0\n",
    "        for userID in topNPredicted.keys():\n",
    "            for rating in topNPredicted[userID]:\n",
    "                movieID = rating[0]\n",
    "                rank = rankings[movieID]\n",
    "                total += rank\n",
    "                n += 1\n",
    "        return total / n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "2047b257",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Helper Class for Evaluation runs\n",
    "from surprise.model_selection import train_test_split\n",
    "from surprise.model_selection import LeaveOneOut\n",
    "from surprise import KNNBaseline\n",
    "\n",
    "class EvaluationData:\n",
    "    def __init__(self, data, popularityRankings):\n",
    "        self.rankings = popularityRankings\n",
    "        #Build a full training set for evaluating overall properties\n",
    "        self.fullTrainSet = data.build_full_trainset()\n",
    "        self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()\n",
    "        #Build a 75/25 train/test split for measuring accuracy\n",
    "        self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)\n",
    "        #Build a \"leave one out\" train/test split for evaluating top-N recommenders\n",
    "        #And build an anti-test-set for building predictions\n",
    "        LOOCV = LeaveOneOut(n_splits=1, random_state=1)\n",
    "        for train, test in LOOCV.split(data):\n",
    "            self.LOOCVTrain = train\n",
    "            self.LOOCVTest = test\n",
    "        self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()\n",
    "        #Compute similarty matrix between items so we can measure diversity\n",
    "        sim_options = {'name': 'cosine', 'user_based': False}\n",
    "        self.simsAlgo = KNNBaseline(sim_options=sim_options)\n",
    "        self.simsAlgo.fit(self.fullTrainSet)\n",
    "\n",
    "    def GetFullTrainSet(self):\n",
    "        return self.fullTrainSet\n",
    "\n",
    "    def GetFullAntiTestSet(self):\n",
    "        return self.fullAntiTestSet\n",
    "    \n",
    "    def GetAntiTestSetForUser(self, testSubject):\n",
    "        trainset = self.fullTrainSet\n",
    "        fill = trainset.global_mean\n",
    "        anti_testset = []\n",
    "        u = trainset.to_inner_uid(str(testSubject))\n",
    "        user_items = set([j for (j, _) in trainset.ur[u]])\n",
    "        anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for\n",
    "                                 i in trainset.all_items() if\n",
    "                                 i not in user_items]\n",
    "        return anti_testset\n",
    "\n",
    "    def GetTrainSet(self):\n",
    "        return self.trainSet\n",
    "\n",
    "    def GetTestSet(self):\n",
    "        return self.testSet\n",
    "\n",
    "    def GetLOOCVTrainSet(self):\n",
    "        return self.LOOCVTrain\n",
    "\n",
    "    def GetLOOCVTestSet(self):\n",
    "        return self.LOOCVTest\n",
    "\n",
    "    def GetLOOCVAntiTestSet(self):\n",
    "        return self.LOOCVAntiTestSet\n",
    "\n",
    "    def GetSimilarities(self):\n",
    "        return self.simsAlgo\n",
    "    \n",
    "    def GetPopularityRankings(self):\n",
    "        return self.rankings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "1b30c736",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Finally a helper class that combines the two above into a single comprehensive tool to measure qualityand\n",
    "# compare algortihm performance.\n",
    "class EvaluationAlgorithm:\n",
    "\n",
    "    def __init__(self, algorithm, name):\n",
    "        self.algorithm = algorithm\n",
    "        self.name = name\n",
    "\n",
    "    def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):\n",
    "        metrics = {}\n",
    "        # Compute accuracy\n",
    "        if (verbose):\n",
    "            print(\"Evaluating accuracy...\")\n",
    "        self.algorithm.fit(evaluationData.GetTrainSet())\n",
    "        predictions = self.algorithm.test(evaluationData.GetTestSet())\n",
    "        metrics[\"RMSE\"] = RecommenderMetrics.RMSE(predictions)\n",
    "        metrics[\"MAE\"] = RecommenderMetrics.MAE(predictions)\n",
    "        if (doTopN):\n",
    "            # Evaluate top-10 with Leave One Out testing\n",
    "            if (verbose):\n",
    "                print(\"Evaluating top-N with leave-one-out...\")\n",
    "            self.algorithm.fit(evaluationData.GetLOOCVTrainSet())\n",
    "            leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())\n",
    "            # Build predictions for all ratings not in the training set\n",
    "            allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())\n",
    "            # Compute top 10 recs for each user\n",
    "            topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)\n",
    "            if (verbose):\n",
    "                print(\"Computing hit-rate and rank metrics...\")\n",
    "            # See how often we recommended a movie the user actually rated\n",
    "            metrics[\"HR\"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)\n",
    "                        # See how often we recommended a movie the user actually liked\n",
    "            metrics[\"cHR\"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)\n",
    "            # Compute ARHR\n",
    "            metrics[\"ARHR\"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)\n",
    "            #Evaluate properties of recommendations on full training set\n",
    "            if (verbose):\n",
    "                print(\"Computing recommendations with full data set...\")\n",
    "            self.algorithm.fit(evaluationData.GetFullTrainSet())\n",
    "            allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())\n",
    "            topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)\n",
    "            if (verbose):\n",
    "                print(\"Analyzing coverage, diversity, and novelty...\")\n",
    "            # Print user coverage with a minimum predicted rating of 4.0:\n",
    "            metrics[\"Coverage\"] = RecommenderMetrics.UserCoverage(  topNPredicted,\n",
    "                                                                   evaluationData.GetFullTrainSet().n_users,\n",
    "                                                                   ratingThreshold=4.0)\n",
    "            # Measure diversity of recommendations:\n",
    "            metrics[\"Diversity\"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())\n",
    "            # Measure novelty (average popularity rank of recommendations):\n",
    "            metrics[\"Novelty\"] = RecommenderMetrics.Novelty(topNPredicted,\n",
    "                                                            evaluationData.GetPopularityRankings())\n",
    "        if (verbose):\n",
    "            print(\"Analysis complete.\")\n",
    "        return metrics\n",
    "    \n",
    "    def GetName(self):\n",
    "        return self.name\n",
    "\n",
    "    def GetAlgorithm(self):\n",
    "        return self.algorithm\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "10ab14a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Evaluator:\n",
    "    algorithms = []\n",
    "\n",
    "    def __init__(self, dataset, rankings):\n",
    "        ed = EvaluationData(dataset, rankings)\n",
    "        self.dataset = ed\n",
    "\n",
    "    def AddAlgorithm(self, algorithm, name):\n",
    "        alg = EvaluationAlgorithm(algorithm, name)\n",
    "        self.algorithms.append(alg)\n",
    "\n",
    "    def Evaluate(self, doTopN):\n",
    "        results = {}\n",
    "        for algorithm in self.algorithms:\n",
    "            print(\"Evaluating \", algorithm.GetName(), \"...\")\n",
    "            results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)\n",
    "        # Print results\n",
    "        print(\"\\n\")\n",
    "        if (doTopN):\n",
    "            print(\"{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}\".format(\n",
    "                    \"Algorithm\", \"RMSE\", \"MAE\", \"HR\", \"cHR\", \"ARHR\", \"Coverage\", \"Diversity\", \"Novelty\"))\n",
    "            for (name, metrics) in results.items():\n",
    "                print(\"{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}\".format(\n",
    "                        name, metrics[\"RMSE\"], metrics[\"MAE\"], metrics[\"HR\"], metrics[\"cHR\"], metrics[\"ARHR\"],\n",
    "                        metrics[\"Coverage\"], metrics[\"Diversity\"], metrics[\"Novelty\"]))\n",
    "        else:\n",
    "            print(\"{:<10} {:<10} {:<10}\".format(\"Algorithm\", \"RMSE\", \"MAE\"))\n",
    "            for (name, metrics) in results.items():\n",
    "                print(\"{:<10} {:<10.4f} {:<10.4f}\".format(name, metrics[\"RMSE\"], metrics[\"MAE\"]))\n",
    "        print(\"\\nLegend:\\n\")\n",
    "        print(\"RMSE:      Root Mean Squared Error. Lower values mean better accuracy.\")\n",
    "        print(\"MAE:       Mean Absolute Error. Lower values mean better accuracy.\")\n",
    "        if (doTopN):\n",
    "            print(\"HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.\")\n",
    "            print(\"cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.\")\n",
    "            print(\"ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better.\" )\n",
    "            print(\"Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.\")\n",
    "            print(\"Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations\")\n",
    "            print(\"           for a given user. Higher means more diverse.\")\n",
    "            print(\"Novelty:   Average popularity rank of recommended items. Higher means more novel.\")\n",
    "\n",
    "    def SampleTopNRecs(self, testSubject=85, k=10):\n",
    "        for algo in self.algorithms:\n",
    "            print(\"\\nUsing recommendation algorithm\", algo.GetName())\n",
    "            print(\"\\nBuilding recommendation model...\")\n",
    "            trainSet = self.dataset.GetFullTrainSet()\n",
    "            algo.GetAlgorithm().fit(trainSet)\n",
    "            print(\"Computing recommendations...\")\n",
    "            testSet = self.dataset.GetAntiTestSetForUser(testSubject)\n",
    "            predictions = algo.GetAlgorithm().test(testSet)\n",
    "            recommendations = []\n",
    "            print (\"\\nRecommend:\")\n",
    "            for userID, movieID, actualRating, estimatedRating, _ in predictions:\n",
    "                intMovieID = int(movieID)\n",
    "                recommendations.append((intMovieID, estimatedRating))\n",
    "            recommendations.sort(key=lambda x: x[1], reverse=True)\n",
    "            for ratings in recommendations[:10]:\n",
    "                print(getMovieName(ratings[0]), ratings[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "02da19d4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimating biases using als...\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "HR 0.05514157973174367\n"
     ]
    }
   ],
   "source": [
    "#Evaluating UserCF\n",
    "from surprise import KNNBasic\n",
    "import heapq\n",
    "from collections import defaultdict\n",
    "from operator import itemgetter\n",
    "from surprise.model_selection import LeaveOneOut\n",
    "\n",
    "k = 10\n",
    "evalData = EvaluationData(ratingsDataset, rankings)\n",
    "# Train on leave-One-Out train set\n",
    "trainSet = evalData.GetLOOCVTrainSet()\n",
    "sim_options = {'name': 'cosine','user_based': True}\n",
    "model = KNNBasic(sim_options=sim_options)\n",
    "model.fit(trainSet)\n",
    "simsMatrix = model.compute_similarities()\n",
    "leftOutTestSet = evalData.GetLOOCVTestSet()\n",
    "# Build up dict to lists of (int(movieID), predictedrating) pairs\n",
    "topN = defaultdict(list)\n",
    "for uiid in range(trainSet.n_users):\n",
    "    # Get top N similar users to this one\n",
    "    similarityRow = simsMatrix[uiid]\n",
    "    similarUsers = []\n",
    "    for innerID, score in enumerate(similarityRow):\n",
    "        if (innerID != uiid):\n",
    "            similarUsers.append( (innerID, score) )\n",
    "    kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1])\n",
    "    # Get the stuff they rated, and add up ratings for each item, weighted by user similarity\n",
    "    candidates = defaultdict(float)\n",
    "    for similarUser in kNeighbors:\n",
    "        innerID = similarUser[0]\n",
    "        userSimilarityScore = similarUser[1]\n",
    "        theirRatings = trainSet.ur[innerID]\n",
    "        for rating in theirRatings:\n",
    "            candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore\n",
    "    # Build a dictionary of stuff the user has already seen\n",
    "    watched = {}\n",
    "    for itemID, rating in trainSet.ur[uiid]:\n",
    "        watched[itemID] = 1\n",
    "    # Get top-rated items from similar users:\n",
    "    pos = 0\n",
    "    for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):\n",
    "        if not itemID in watched:\n",
    "            movieID = trainSet.to_raw_iid(itemID)\n",
    "            topN[int(trainSet.to_raw_uid(uiid))].append( (int(movieID), 0.0) )\n",
    "            pos += 1\n",
    "            if (pos > 40):\n",
    "                break                \n",
    "\n",
    "# Measure\n",
    "print(\"HR\", RecommenderMetrics.HitRate(topN, leftOutTestSet))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ac270abc",
   "metadata": {},
   "source": [
    "# A more advanced model -- Singular Vector Decomposition\n",
    "* As discussed above, the data is really a large matrix of Users and Items\n",
    "* But the data is very sparse. Many users only rate a few items. \n",
    "* This biases the datatset somewhat, and SVD is a matrix method that can be used to alleviate this bias.\n",
    "* Simon Funk was the first to use this method in the now famous netflix challenge. \n",
    "* See his blog if you are interested in how it really works -- https://sifter.org/~simon/journal/20061211.html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "93ca6ba4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading movie ratings...\n",
      "\n",
      "User  85  loved these movies:\n",
      "Jumanji (1995)\n",
      "GoldenEye (1995)\n",
      "Braveheart (1995)\n",
      "Jerky Boys, The (1995)\n",
      "Léon: The Professional (a.k.a. The Professional) (Léon) (1994)\n",
      "Pulp Fiction (1994)\n",
      "Stargate (1994)\n",
      "Shawshank Redemption, The (1994)\n",
      "Star Trek: Generations (1994)\n",
      "Clear and Present Danger (1994)\n",
      "Speed (1994)\n",
      "True Lies (1994)\n",
      "Fugitive, The (1993)\n",
      "Jurassic Park (1993)\n",
      "Terminator 2: Judgment Day (1991)\n",
      "Mission: Impossible (1996)\n",
      "Rock, The (1996)\n",
      "\n",
      "...and didn't like these movies:\n",
      "Grumpier Old Men (1995)\n",
      "Mortal Kombat (1995)\n",
      "Postman, The (Postino, Il) (1994)\n",
      "Casper (1995)\n",
      "Lord of Illusions (1995)\n",
      "Mighty Morphin Power Rangers: The Movie (1995)\n",
      "Prophecy, The (1995)\n",
      "Dolores Claiborne (1995)\n",
      "Heavenly Creatures (1994)\n",
      "Little Women (1994)\n",
      "Miracle on 34th Street (1994)\n",
      "Nell (1994)\n",
      "Poison Ivy II (1996)\n",
      "Tank Girl (1995)\n",
      "While You Were Sleeping (1995)\n",
      "Wes Craven's New Nightmare (Nightmare on Elm Street Part 7: Freddy's Finale, A) (1994)\n",
      "Naked Gun 33 1/3: The Final Insult (1994)\n",
      "Richie Rich (1994)\n",
      "Beverly Hills Cop III (1994)\n",
      "Philadelphia (1993)\n",
      "Schindler's List (1993)\n",
      "Super Mario Bros. (1993)\n",
      "Nightmare Before Christmas, The (1993)\n",
      "Snow White and the Seven Dwarfs (1937)\n",
      "Operation Dumbo Drop (1995)\n",
      "Oliver & Company (1988)\n",
      "\n",
      "Building recommendation model...\n",
      "Computing recommendations...\n",
      "\n",
      "We recommend:\n",
      "Rear Window (1954)\n",
      "Hamlet (1996)\n",
      "In the Heat of the Night (1967)\n",
      "Harry Potter and the Half-Blood Prince (2009)\n",
      "Body Heat (1981)\n",
      "Best Years of Our Lives, The (1946)\n",
      "L.A. Confidential (1997)\n",
      "Casablanca (1942)\n",
      "Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)\n",
      "African Queen, The (1951)\n"
     ]
    }
   ],
   "source": [
    "# Pick an arbitrary test subject\n",
    "testSubject = 85\n",
    "print(\"Loading movie ratings...\")\n",
    "userRatings = getUserRatings(testSubject)\n",
    "recommendations = []\n",
    "loved = []\n",
    "hated = []\n",
    "for ratings in userRatings:\n",
    "    if (float(ratings[1]) > 4.0):\n",
    "        loved.append(ratings)\n",
    "    if (float(ratings[1]) < 3.0):\n",
    "        hated.append(ratings)\n",
    "\n",
    "print(\"\\nUser \", testSubject, \" loved these movies:\")\n",
    "for ratings in loved:\n",
    "    print(getMovieName(ratings[0]))\n",
    "\n",
    "print(\"\\n...and didn't like these movies:\")\n",
    "for ratings in hated:\n",
    "    print(getMovieName(ratings[0]))\n",
    "\n",
    "print(\"\\nBuilding recommendation model...\")\n",
    "trainSet = ratingsDataset.build_full_trainset()\n",
    "\n",
    "algo = SVD()\n",
    "algo.fit(trainSet)\n",
    "\n",
    "print(\"Computing recommendations...\")\n",
    "testSet = BuildAntiTestSetForUser(testSubject, trainSet)\n",
    "predictions = algo.test(testSet)\n",
    "\n",
    "print (\"\\nWe recommend:\")\n",
    "for userID, movieID, actualRating, estimatedRating, _ in predictions:\n",
    "    intMovieID = int(movieID)\n",
    "    recommendations.append((intMovieID, estimatedRating))\n",
    "\n",
    "recommendations.sort(key=lambda x: x[1], reverse=True)\n",
    "\n",
    "for ratings in recommendations[:10]:\n",
    "    print(getMovieName(ratings[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ac127296",
   "metadata": {},
   "source": [
    "# The three algorithm shoot-out on the MovieLens dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "ce0a93e6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimating biases using als...\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Evaluating  User KNN ...\n",
      "Evaluating accuracy...\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Analysis complete.\n",
      "Evaluating  Item KNN ...\n",
      "Evaluating accuracy...\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Analysis complete.\n",
      "Evaluating  Random ...\n",
      "Evaluating accuracy...\n",
      "Analysis complete.\n",
      "Evaluating  SVD ...\n",
      "Evaluating accuracy...\n",
      "Analysis complete.\n",
      "\n",
      "\n",
      "Algorithm  RMSE       MAE       \n",
      "User KNN   0.9961     0.7711    \n",
      "Item KNN   0.9995     0.7798    \n",
      "Random     1.4385     1.1478    \n",
      "SVD        0.9043     0.6987    \n",
      "\n",
      "Legend:\n",
      "\n",
      "RMSE:      Root Mean Squared Error. Lower values mean better accuracy.\n",
      "MAE:       Mean Absolute Error. Lower values mean better accuracy.\n",
      "\n",
      "Using recommendation algorithm User KNN\n",
      "\n",
      "Building recommendation model...\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing recommendations...\n",
      "\n",
      "Recommend:\n",
      "One Magic Christmas (1985) 5\n",
      "Step Into Liquid (2002) 5\n",
      "Art of War, The (2000) 5\n",
      "Taste of Cherry (Ta'm e guilass) (1997) 5\n",
      "King Is Alive, The (2000) 5\n",
      "Innocence (2000) 5\n",
      "Maelström (2000) 5\n",
      "Faust (1926) 5\n",
      "Seconds (1966) 5\n",
      "Amazing Grace (2006) 5\n",
      "\n",
      "Using recommendation algorithm Item KNN\n",
      "\n",
      "Building recommendation model...\n",
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing recommendations...\n",
      "\n",
      "Recommend:\n",
      "Life in a Day (2011) 5\n",
      "Under Suspicion (2000) 5\n",
      "Asterix and the Gauls (Astérix le Gaulois) (1967) 5\n",
      "Find Me Guilty (2006) 5\n",
      "Elementary Particles, The (Elementarteilchen) (2006) 5\n",
      "Asterix and the Vikings (Astérix et les Vikings) (2006) 5\n",
      "From the Sky Down (2011) 5\n",
      "Vive L'Amour (Ai qing wan sui) (1994) 5\n",
      "Vagabond (Sans toit ni loi) (1985) 5\n",
      "Ariel (1988) 5\n",
      "\n",
      "Using recommendation algorithm Random\n",
      "\n",
      "Building recommendation model...\n",
      "Computing recommendations...\n",
      "\n",
      "Recommend:\n",
      "Dumbo (1941) 5\n",
      "Englishman Who Went Up a Hill But Came Down a Mountain, The (1995) 5\n",
      "Sleepless in Seattle (1993) 5\n",
      "Pet Sematary (1989) 5\n",
      "Eternal Sunshine of the Spotless Mind (2004) 5\n",
      "Birdcage, The (1996) 5\n",
      "Some Like It Hot (1959) 5\n",
      "Cinderella (1950) 5\n",
      "Fish Called Wanda, A (1988) 5\n",
      "Goodfellas (1990) 5\n",
      "\n",
      "Using recommendation algorithm SVD\n",
      "\n",
      "Building recommendation model...\n",
      "Computing recommendations...\n",
      "\n",
      "Recommend:\n",
      "Matrix, The (1999) 4.407616528756567\n",
      "Ran (1985) 4.317388798918363\n",
      "Sunset Blvd. (a.k.a. Sunset Boulevard) (1950) 4.308875555039512\n",
      "3:10 to Yuma (2007) 4.299052769082302\n",
      "Fight Club (1999) 4.295575347437914\n",
      "Godfather, The (1972) 4.273186543912806\n",
      "Raging Bull (1980) 4.266575754730711\n",
      "Lock, Stock & Two Smoking Barrels (1998) 4.256520579061788\n",
      "Paris, Texas (1984) 4.250402302580284\n",
      "Taxi Driver (1976) 4.233609033622251\n"
     ]
    }
   ],
   "source": [
    "# Now a full comparison of simple algorithms.\n",
    "from surprise import KNNBasic\n",
    "from surprise import NormalPredictor\n",
    "import random\n",
    "import numpy as np\n",
    "\n",
    "np.random.seed(0)\n",
    "random.seed(0)\n",
    "#evalData = EvaluationData(ratingsDataset, rankings)\n",
    "# Construct an Evaluator to, you know, evaluate them\n",
    "evaluator = Evaluator(ratingsDataset, rankings)\n",
    "# User-based KNN\n",
    "UserKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': True})\n",
    "evaluator.AddAlgorithm(UserKNN, 'User KNN')\n",
    "# Item-based KNN\n",
    "ItemKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': False})\n",
    "evaluator.AddAlgorithm(ItemKNN, 'Item KNN')\n",
    "# Just make random recommendations\n",
    "Random = NormalPredictor()\n",
    "evaluator.AddAlgorithm(Random, 'Random')\n",
    "\n",
    "SVDalgo = SVD()\n",
    "evaluator.AddAlgorithm(SVDalgo,'SVD')\n",
    "\n",
    "# Fight!\n",
    "evaluator.Evaluate(False)\n",
    "evaluator.SampleTopNRecs()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40987dca",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}