{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/vincent/Project/jupyter/DataScience/Week9/Lecture/ml-100k\n" ] } ], "source": [ "cd ml-100k/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_iditem_idratingtimestamp
01962423881250949
11863023891717742
2223771878887116
3244512880606923
41663461886397596
\n", "
" ], "text/plain": [ " user_id item_id rating timestamp\n", "0 196 242 3 881250949\n", "1 186 302 3 891717742\n", "2 22 377 1 878887116\n", "3 244 51 2 880606923\n", "4 166 346 1 886397596" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "names = ['user_id', 'item_id', 'rating', 'timestamp']\n", "df = pd.read_csv('u.data', sep='\\t', names=names)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "943 users\n", "1682 items\n" ] } ], "source": [ "n_users = df.user_id.unique().shape[0]\n", "n_items = df.item_id.unique().shape[0]\n", "print(str(n_users) + ' users')\n", "print(str(n_items) + ' items')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[1. 1. 1. ... 0. 0. 0.]\n", " [1. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [1. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 1. 0. ... 0. 0. 0.]]\n" ] } ], "source": [ "ratingsNum = np.zeros((n_users, n_items))\n", "for row in df.itertuples():\n", " ratingsNum[row[1]-1, row[2]-1] = 1\n", "print(ratingsNum)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "itemRateNumCurrent = ratingsNum.sum(axis=0)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "itemRateNumCurrent.sort()\n", "\n", "itemRateNumCurrent\n", "import matplotlib.pyplot as plt\n", "plt.plot(itemRateNumCurrent[::-1])\n", "plt.xlabel('sorted items') # adds label to x axis\n", "plt.ylabel('popularity') # adds label to y axis\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[5. 3. 4. ... 0. 0. 0.]\n", " [4. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [5. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 5. 0. ... 0. 0. 0.]]\n" ] } ], "source": [ "ratings = np.zeros((n_users, n_items))\n", "for row in df.itertuples():\n", " ratings[row[1]-1, row[2]-1] = row[3]\n", "print(ratings)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[3.87831858 3.20610687 3.03333333 ... 2. 3. 3. ]\n", "[452. 131. 90. ... 1. 1. 1.]\n" ] } ], "source": [ "itemRateNum = ratingsNum.sum(axis=0)\n", "itemRateSum = ratings.sum(axis=0)\n", "itemRateAvg = itemRateSum/itemRateNum\n", "print(itemRateAvg)\n", "print(itemRateNum)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie idmovie titlerelease datevideo release dateIMDb URLunknownActionAdventureAnimationChildren's...FantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
01Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...00011...0000000000
12GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...01100...0000000100
23Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...00000...0000000100
34Get Shorty (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Get%20Shorty%...01000...0000000000
45Copycat (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Copycat%20(1995)00000...0000000100
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " movie id movie title release date video release date \\\n", "0 1 Toy Story (1995) 01-Jan-1995 NaN \n", "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n", "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n", "3 4 Get Shorty (1995) 01-Jan-1995 NaN \n", "4 5 Copycat (1995) 01-Jan-1995 NaN \n", "\n", " IMDb URL unknown Action \\\n", "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 \n", "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 \n", "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 \n", "3 http://us.imdb.com/M/title-exact?Get%20Shorty%... 0 1 \n", "4 http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 \n", "\n", " Adventure Animation Children's ... Fantasy Film-Noir Horror Musical \\\n", "0 0 1 1 ... 0 0 0 0 \n", "1 1 0 0 ... 0 0 0 0 \n", "2 0 0 0 ... 0 0 0 0 \n", "3 0 0 0 ... 0 0 0 0 \n", "4 0 0 0 ... 0 0 0 0 \n", "\n", " Mystery Romance Sci-Fi Thriller War Western \n", "0 0 0 0 0 0 0 \n", "1 0 0 0 1 0 0 \n", "2 0 0 0 1 0 0 \n", "3 0 0 0 0 0 0 \n", "4 0 0 0 1 0 0 \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',\n", " 'Animation', 'Children\\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',\n", " 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']\n", "items = pd.read_csv('u.item', sep='|', names=i_cols, encoding='latin-1')\n", "\n", "items.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "movie ID\t movie title\n", "1535 Aiqing wansui (1994)\n", "1652 Entertaining Angels: The Dorothy Day Story (1996)\n", "1200 Marlene Dietrich: Shadow and Light (1996) \n", "1598 Someone Else's America (1995)\n", "1121 They Made Me a Criminal (1939)\n", "Name: movie title, dtype: object\n" ] } ], "source": [ "#Implementation of MovieAvg to recommend the top_n = 5 movies to the activeUser = 0\n", "#change top_n to a larger number to recommend more movie\n", "\n", "top_n = 5\n", "activeUser = 0\n", "mask_activeUser = ratings[activeUser, :] > 0\n", "itemRateAvgCurrent = itemRateAvg.copy()\n", "itemRateAvgCurrent[mask_activeUser] = 0\n", "itemSortInd = itemRateAvgCurrent.argsort()\n", "print('movie ID' + '\\t movie title')\n", "print(items['movie title'][itemSortInd[range(len(itemSortInd)-1,len(itemSortInd)-top_n-1, -1)]])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "movie ID\t movie title\n", "49 Star Wars (1977)\n", "99 Fargo (1996)\n", "285 English Patient, The (1996)\n", "0 Toy Story (1995)\n", "120 Independence Day (ID4) (1996)\n", "173 Raiders of the Lost Ark (1981)\n", "126 Godfather, The (1972)\n", "55 Pulp Fiction (1994)\n", "6 Twelve Monkeys (1995)\n", "97 Silence of the Lambs, The (1991)\n", "Name: movie title, dtype: object\n" ] } ], "source": [ "#Implementation of TopPop to recommend the top_n = 5 movies to the activeUser = 0\n", "\n", "top_n = 10\n", "activeUser = 2\n", "mask_activeUser = ratings[activeUser, :] > 0\n", "itemRateNumCurrent = itemRateNum.copy()\n", "#print(itemRateNumCurrent)\n", "itemRateNumCurrent[mask_activeUser] = 0\n", "itemSortInd = itemRateNumCurrent.argsort()\n", "print('movie ID' + '\\t movie title')\n", "print(items['movie title'][itemSortInd[range(len(itemSortInd)-1,len(itemSortInd)-top_n-1, -1)]])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 4 }