{
"cells": [
{
"cell_type": "code",
"execution_count": 46,
"source": [
"# This Notebook is created with VS Code on Windows\r\n",
"# Create python virtual environment\r\n",
"!python -m venv .venv\r\n",
"# If you want to use it on macOS/Linux\r\n",
"# You may need to run sudo apt-get install python3-venv first\r\n",
"#python3 -m venv .venv\r\n",
"\r\n",
"# Install Python Packages\r\n",
"!pip install --user --upgrade pip\r\n",
"!pip install --upgrade setuptools\r\n",
"!pip install --user seaborn\r\n",
"!pip install --user numpy\r\n",
"!pip install --user pandas\r\n",
"!pip install --user matplotlib\r\n",
"!pip install --user plotly\r\n",
"!pip install --user nbformat\r\n",
"!pip install --user surprise\r\n"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: pip in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (21.2.4)\n",
"Requirement already satisfied: seaborn in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (0.11.2)\n",
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (3.3.4)\n",
"Requirement already satisfied: numpy>=1.15 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (1.19.5)\n",
"Requirement already satisfied: scipy>=1.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (1.6.1)\n",
"Requirement already satisfied: pandas>=0.23 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (1.2.2)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.1.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
"Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
"Requirement already satisfied: six in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.15.0)\n",
"Requirement already satisfied: pytz>=2017.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas>=0.23->seaborn) (2021.1)\n",
"Requirement already satisfied: numpy in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (1.19.5)\n",
"Requirement already satisfied: pandas in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (1.2.2)\n",
"Requirement already satisfied: pytz>=2017.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas) (2021.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: numpy>=1.16.5 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas) (1.19.5)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
"Requirement already satisfied: matplotlib in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (3.3.4)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (0.10.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (1.3.1)\n",
"Requirement already satisfied: numpy>=1.15 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (1.19.5)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (2.4.7)\n",
"Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from matplotlib) (2.8.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (8.1.0)\n",
"Requirement already satisfied: six in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from cycler>=0.10->matplotlib) (1.15.0)\n",
"Requirement already satisfied: plotly in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (5.3.0)\n",
"Requirement already satisfied: six in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from plotly) (1.15.0)\n",
"Requirement already satisfied: tenacity>=6.2.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from plotly) (8.0.1)\n",
"Requirement already satisfied: nbformat in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (5.1.3)\n",
"Requirement already satisfied: traitlets>=4.1 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (5.0.5)\n",
"Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (3.2.0)\n",
"Requirement already satisfied: ipython-genutils in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (0.2.0)\n",
"Requirement already satisfied: jupyter-core in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (4.7.1)\n",
"Requirement already satisfied: six>=1.11.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (1.15.0)\n",
"Requirement already satisfied: setuptools in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (49.2.1)\n",
"Requirement already satisfied: pyrsistent>=0.14.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (0.18.0)\n",
"Requirement already satisfied: attrs>=17.4.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (21.2.0)\n",
"Requirement already satisfied: pywin32>=1.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jupyter-core->nbformat) (300)\n",
"Collecting surprise\n",
" Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)\n",
"Collecting scikit-surprise\n",
" Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)\n",
"Requirement already satisfied: joblib>=0.11 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from scikit-surprise->surprise) (1.0.1)\n",
"Requirement already satisfied: numpy>=1.11.2 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from scikit-surprise->surprise) (1.19.5)\n",
"Requirement already satisfied: scipy>=1.0.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from scikit-surprise->surprise) (1.6.1)\n",
"Requirement already satisfied: six>=1.10.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from scikit-surprise->surprise) (1.15.0)\n",
"Building wheels for collected packages: scikit-surprise\n",
" Building wheel for scikit-surprise (setup.py): started\n",
" Building wheel for scikit-surprise (setup.py): finished with status 'error'\n",
" Running setup.py clean for scikit-surprise\n",
"Failed to build scikit-surprise\n",
"Installing collected packages: scikit-surprise, surprise\n",
" Running setup.py install for scikit-surprise: started\n",
" Running setup.py install for scikit-surprise: finished with status 'error'\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
" ERROR: Command errored out with exit status 1:\n",
" command: 'c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\python.exe' -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"'; __file__='\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"';f = getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__) if os.path.exists(__file__) else io.StringIO('\"'\"'from setuptools import setup; setup()'\"'\"');code = f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' bdist_wheel -d 'C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-wheel-dz4_1ymq'\n",
" cwd: C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-install-jibnz200\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\n",
" Complete output (49 lines):\n",
" running bdist_wheel\n",
" running build\n",
" running build_py\n",
" creating build\n",
" creating build\\lib.win-amd64-3.8\n",
" creating build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\accuracy.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\builtin_datasets.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\dataset.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\dump.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\reader.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\trainset.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\utils.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\__main__.py -> build\\lib.win-amd64-3.8\\surprise\n",
" creating build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\search.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\split.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\validation.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" creating build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\algo_base.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\baseline_only.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\knns.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\predictions.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\random_pred.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" running egg_info\n",
" writing scikit_surprise.egg-info\\PKG-INFO\n",
" writing dependency_links to scikit_surprise.egg-info\\dependency_links.txt\n",
" writing entry points to scikit_surprise.egg-info\\entry_points.txt\n",
" writing requirements to scikit_surprise.egg-info\\requires.txt\n",
" writing top-level names to scikit_surprise.egg-info\\top_level.txt\n",
" reading manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
" reading manifest template 'MANIFEST.in'\n",
" writing manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
" copying surprise\\similarities.c -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\similarities.pyx -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\prediction_algorithms\\co_clustering.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\matrix_factorization.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\optimize_baselines.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\slope_one.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\co_clustering.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\matrix_factorization.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\optimize_baselines.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\slope_one.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" running build_ext\n",
" building 'surprise.similarities' extension\n",
" error: Microsoft Visual C++ 14.0 is required. Get it with \"Build Tools for Visual Studio\": https://visualstudio.microsoft.com/downloads/\n",
" ----------------------------------------\n",
" ERROR: Failed building wheel for scikit-surprise\n",
" ERROR: Command errored out with exit status 1:\n",
" command: 'c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\python.exe' -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"'; __file__='\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"';f = getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__) if os.path.exists(__file__) else io.StringIO('\"'\"'from setuptools import setup; setup()'\"'\"');code = f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' install --record 'C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-record-ivcio2vz\\install-record.txt' --single-version-externally-managed --user --prefix= --compile --install-headers 'C:\\Users\\Oli\\AppData\\Roaming\\Python\\Python38\\Include\\scikit-surprise'\n",
" cwd: C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-install-jibnz200\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\n",
" Complete output (49 lines):\n",
" running install\n",
" running build\n",
" running build_py\n",
" creating build\n",
" creating build\\lib.win-amd64-3.8\n",
" creating build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\accuracy.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\builtin_datasets.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\dataset.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\dump.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\reader.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\trainset.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\utils.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\__main__.py -> build\\lib.win-amd64-3.8\\surprise\n",
" creating build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\search.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\split.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\validation.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" copying surprise\\model_selection\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
" creating build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\algo_base.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\baseline_only.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\knns.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\predictions.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\random_pred.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" running egg_info\n",
" writing scikit_surprise.egg-info\\PKG-INFO\n",
" writing dependency_links to scikit_surprise.egg-info\\dependency_links.txt\n",
" writing entry points to scikit_surprise.egg-info\\entry_points.txt\n",
" writing requirements to scikit_surprise.egg-info\\requires.txt\n",
" writing top-level names to scikit_surprise.egg-info\\top_level.txt\n",
" reading manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
" reading manifest template 'MANIFEST.in'\n",
" writing manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
" copying surprise\\similarities.c -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\similarities.pyx -> build\\lib.win-amd64-3.8\\surprise\n",
" copying surprise\\prediction_algorithms\\co_clustering.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\matrix_factorization.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\optimize_baselines.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\slope_one.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\co_clustering.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\matrix_factorization.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\optimize_baselines.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" copying surprise\\prediction_algorithms\\slope_one.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
" running build_ext\n",
" building 'surprise.similarities' extension\n",
" error: Microsoft Visual C++ 14.0 is required. Get it with \"Build Tools for Visual Studio\": https://visualstudio.microsoft.com/downloads/\n",
" ----------------------------------------\n",
"ERROR: Command errored out with exit status 1: 'c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\python.exe' -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"'; __file__='\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"';f = getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__) if os.path.exists(__file__) else io.StringIO('\"'\"'from setuptools import setup; setup()'\"'\"');code = f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' install --record 'C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-record-ivcio2vz\\install-record.txt' --single-version-externally-managed --user --prefix= --compile --install-headers 'C:\\Users\\Oli\\AppData\\Roaming\\Python\\Python38\\Include\\scikit-surprise' Check the logs for full command output.\n"
]
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 3,
"source": [
"import numpy as np # maths\r\n",
"import pandas as pd # data processing\r\n",
"import matplotlib.pyplot as plt\r\n",
"import seaborn as sns\r\n",
"import os\r\n",
"import re\r\n",
"\r\n",
"from plotly.offline import init_notebook_mode, iplot\r\n",
"import plotly.graph_objs as go\r\n",
"import plotly.offline as py\r\n",
"py.init_notebook_mode(connected=True)\r\n",
"\r\n",
"import warnings\r\n",
"warnings.filterwarnings('ignore')\r\n",
"\r\n",
"plt.style.use('fivethirtyeight')\r\n",
"plt.rcParams['figure.figsize'] = [18, 8]"
],
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {}
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 4,
"source": [
"# Import Tables\r\n",
"reviews = pd.read_csv('./ml-1m/ratings.dat', names=['userId', 'movieId', 'rating', 'timestamp'], delimiter='::', engine='python')\r\n",
"movies = pd.read_csv('./ml-1m/movies.dat', names=['movieId', 'title', 'genres'], delimiter='::', engine='python')\r\n",
"users = pd.read_csv('./ml-1m/users.dat', names=['userId', 'gender', 'age', 'occupation', 'zip'], delimiter='::', engine='python')\r\n",
"\r\n",
"# Print Table shape\r\n",
"print('Reviews shape:', reviews.shape)\r\n",
"print('Users shape:', users.shape)\r\n",
"print('Movies shape:', movies.shape)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Reviews shape: (1000209, 4)\n",
"Users shape: (6040, 5)\n",
"Movies shape: (3883, 3)\n"
]
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 5,
"source": [
"# Drop unused Attributes\r\n",
"reviews.drop(['timestamp'], axis=1, inplace=True) # Time\r\n",
"users.drop(['zip'], axis=1, inplace=True) # Zip-Code\r\n",
"\r\n",
"# Extract the movie year from title to extra attrbute\r\n",
"movies['release_year'] = movies['title'].str.extract(r'(?:\\((\\d{4})\\))?\\s*$', expand=False)"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 6,
"source": [
"# Print movie table\r\n",
"movies.head()"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" movieId title genres \\\n",
"0 1 Toy Story (1995) Animation|Children's|Comedy \n",
"1 2 Jumanji (1995) Adventure|Children's|Fantasy \n",
"2 3 Grumpier Old Men (1995) Comedy|Romance \n",
"3 4 Waiting to Exhale (1995) Comedy|Drama \n",
"4 5 Father of the Bride Part II (1995) Comedy \n",
"\n",
" release_year \n",
"0 1995 \n",
"1 1995 \n",
"2 1995 \n",
"3 1995 \n",
"4 1995 "
],
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" movieId | \n",
" title | \n",
" genres | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Toy Story (1995) | \n",
" Animation|Children's|Comedy | \n",
" 1995 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Jumanji (1995) | \n",
" Adventure|Children's|Fantasy | \n",
" 1995 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" Grumpier Old Men (1995) | \n",
" Comedy|Romance | \n",
" 1995 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Waiting to Exhale (1995) | \n",
" Comedy|Drama | \n",
" 1995 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Father of the Bride Part II (1995) | \n",
" Comedy | \n",
" 1995 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 6
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 7,
"source": [
"# Changed feature values based on README_users.txt\r\n",
"\r\n",
"ages_map = {1: 'Under 18',\r\n",
" 18: '18 - 24',\r\n",
" 25: '25 - 34',\r\n",
" 35: '35 - 44',\r\n",
" 45: '45 - 49',\r\n",
" 50: '50 - 55',\r\n",
" 56: '56+'}\r\n",
"\r\n",
"occupations_map = {0: 'Not specified',\r\n",
" 1: 'Academic / Educator',\r\n",
" 2: 'Artist',\r\n",
" 3: 'Clerical / Admin',\r\n",
" 4: 'College / Grad Student',\r\n",
" 5: 'Customer Service',\r\n",
" 6: 'Doctor / Health Care',\r\n",
" 7: 'Executive / Managerial',\r\n",
" 8: 'Farmer',\r\n",
" 9: 'Homemaker',\r\n",
" 10: 'K-12 student',\r\n",
" 11: 'Lawyer',\r\n",
" 12: 'Programmer',\r\n",
" 13: 'Retired',\r\n",
" 14: 'Sales / Marketing',\r\n",
" 15: 'Scientist',\r\n",
" 16: 'Self-Employed',\r\n",
" 17: 'Technician / Engineer',\r\n",
" 18: 'Tradesman / Craftsman',\r\n",
" 19: 'Unemployed',\r\n",
" 20: 'Writer'}\r\n",
"\r\n",
"gender_map = {'M': 'Male', 'F': 'Female'}\r\n",
"\r\n",
"users['age'] = users['age'].map(ages_map)\r\n",
"users['occupation'] = users['occupation'].map(occupations_map)\r\n",
"users['gender'] = users['gender'].map(gender_map)"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 8,
"source": [
"# Plot age kategories\r\n",
"\r\n",
"age_reindex = ['Under 18', '18 - 24', '25 - 34', '35 - 44', '45 - 49', '50 - 55', '56+']\r\n",
"age_counts = users['age'].value_counts().reindex(age_reindex)\r\n",
"sns.barplot(x=age_counts.values,\r\n",
" y=age_counts.index,\r\n",
" palette='magma').set_title(\r\n",
" 'Users age', fontsize=12)\r\n",
"\r\n",
"plt.show()"
],
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"image/svg+xml": "\r\n\r\n\r\n\r\n",
"image/png": ""
},
"metadata": {}
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 9,
"source": [
"# Plot gender of users\r\n",
"gender_counts = users['gender'].value_counts()\r\n",
"colors1 = ['lightblue', 'pink']\r\n",
"pie = go.Pie(labels=gender_counts.index,\r\n",
" values=gender_counts.values,\r\n",
" marker=dict(colors=colors1),\r\n",
" hole=0.5)\r\n",
"layout = go.Layout(title='Gender Users', font=dict(size=12), legend=dict(orientation='h'))\r\n",
"\r\n",
"fig = go.Figure(data=[pie], layout=layout)\r\n",
"py.iplot(fig)"
],
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
""
],
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"hole": 0.5,
"labels": [
"Male",
"Female"
],
"marker": {
"colors": [
"lightblue",
"pink"
]
},
"type": "pie",
"values": [
4331,
1709
]
}
],
"layout": {
"font": {
"size": 12
},
"legend": {
"orientation": "h"
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Gender Users"
}
}
}
},
"metadata": {}
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 10,
"source": [
"# Merge reviews, movie and user dataset\r\n",
"final_df = reviews.merge(movies, on='movieId', how='left').merge(users, on='userId', how='left')\r\n",
"print('final_df shape:', final_df.shape)\r\n",
"final_df.head()"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"final_df shape: (1000209, 9)\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" userId movieId rating title \\\n",
"0 1 1193 5 One Flew Over the Cuckoo's Nest (1975) \n",
"1 1 661 3 James and the Giant Peach (1996) \n",
"2 1 914 3 My Fair Lady (1964) \n",
"3 1 3408 4 Erin Brockovich (2000) \n",
"4 1 2355 5 Bug's Life, A (1998) \n",
"\n",
" genres release_year gender age occupation \n",
"0 Drama 1975 Female Under 18 K-12 student \n",
"1 Animation|Children's|Musical 1996 Female Under 18 K-12 student \n",
"2 Musical|Romance 1964 Female Under 18 K-12 student \n",
"3 Drama 2000 Female Under 18 K-12 student \n",
"4 Animation|Children's|Comedy 1998 Female Under 18 K-12 student "
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" userId | \n",
" movieId | \n",
" rating | \n",
" title | \n",
" genres | \n",
" release_year | \n",
" gender | \n",
" age | \n",
" occupation | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 1193 | \n",
" 5 | \n",
" One Flew Over the Cuckoo's Nest (1975) | \n",
" Drama | \n",
" 1975 | \n",
" Female | \n",
" Under 18 | \n",
" K-12 student | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 661 | \n",
" 3 | \n",
" James and the Giant Peach (1996) | \n",
" Animation|Children's|Musical | \n",
" 1996 | \n",
" Female | \n",
" Under 18 | \n",
" K-12 student | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 914 | \n",
" 3 | \n",
" My Fair Lady (1964) | \n",
" Musical|Romance | \n",
" 1964 | \n",
" Female | \n",
" Under 18 | \n",
" K-12 student | \n",
"
\n",
" \n",
" | 3 | \n",
" 1 | \n",
" 3408 | \n",
" 4 | \n",
" Erin Brockovich (2000) | \n",
" Drama | \n",
" 2000 | \n",
" Female | \n",
" Under 18 | \n",
" K-12 student | \n",
"
\n",
" \n",
" | 4 | \n",
" 1 | \n",
" 2355 | \n",
" 5 | \n",
" Bug's Life, A (1998) | \n",
" Animation|Children's|Comedy | \n",
" 1998 | \n",
" Female | \n",
" Under 18 | \n",
" K-12 student | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 10
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 39,
"source": [
"final_df[final_df['age'] == '18 - 24']['title'].value_counts()[:10].to_frame()"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" title\n",
"American Beauty (1999) 715\n",
"Star Wars: Episode VI - Return of the Jedi (1983) 586\n",
"Star Wars: Episode V - The Empire Strikes Back ... 579\n",
"Matrix, The (1999) 567\n",
"Star Wars: Episode IV - A New Hope (1977) 562\n",
"Braveheart (1995) 544\n",
"Saving Private Ryan (1998) 543\n",
"Jurassic Park (1993) 541\n",
"Terminator 2: Judgment Day (1991) 529\n",
"Sixth Sense, The (1999) 514"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
"
\n",
" \n",
" \n",
" \n",
" | American Beauty (1999) | \n",
" 715 | \n",
"
\n",
" \n",
" | Star Wars: Episode VI - Return of the Jedi (1983) | \n",
" 586 | \n",
"
\n",
" \n",
" | Star Wars: Episode V - The Empire Strikes Back (1980) | \n",
" 579 | \n",
"
\n",
" \n",
" | Matrix, The (1999) | \n",
" 567 | \n",
"
\n",
" \n",
" | Star Wars: Episode IV - A New Hope (1977) | \n",
" 562 | \n",
"
\n",
" \n",
" | Braveheart (1995) | \n",
" 544 | \n",
"
\n",
" \n",
" | Saving Private Ryan (1998) | \n",
" 543 | \n",
"
\n",
" \n",
" | Jurassic Park (1993) | \n",
" 541 | \n",
"
\n",
" \n",
" | Terminator 2: Judgment Day (1991) | \n",
" 529 | \n",
"
\n",
" \n",
" | Sixth Sense, The (1999) | \n",
" 514 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 39
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 44,
"source": [
"# Print movie / user sum\r\n",
"n_movies = final_df['movieId'].nunique()\r\n",
"n_users = final_df['userId'].nunique()\r\n",
"\r\n",
"print('Number of movies:', n_movies)\r\n",
"print('Number of users:', n_users) "
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Number of movies: 3706\n",
"Number of users: 6040\n"
]
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 45,
"source": [
"# implement SVD with Python SurPRISE, a Python Recommendation Framework\r\n",
"\r\n",
"from surprise import Reader, Dataset, SVD, SVDpp\r\n",
"from surprise import accuracy\r\n",
"\r\n",
"reader = Reader(rating_scale=(1, 5))\r\n",
"dataset = Dataset.load_from_df(final_df[['userId', 'movieId', 'rating']], reader=reader)\r\n",
"\r\n",
"svd = SVD(n_factors=50)\r\n",
"svd_plusplus = SVDpp(n_factors=50)\r\n",
"\r\n",
"# train with SVD\r\n",
"trainset = dataset.build_full_trainset()\r\n",
"svd.fit(trainset)\r\n",
"# train with SVD++, ATTENTION this take a LONG TIME\r\n",
"# svd_plusplus.fit(trainset)\r\n"
],
"outputs": [
{
"output_type": "error",
"ename": "ModuleNotFoundError",
"evalue": "No module named 'surprise'",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# implement SVD with Python SurPRISE, a Python Recommendation Framework\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0msurprise\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mReader\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataset\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSVD\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSVDpp\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msurprise\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0maccuracy\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'surprise'"
]
}
],
"metadata": {}
}
],
"metadata": {
"orig_nbformat": 4,
"language_info": {
"name": "python",
"version": "3.8.8",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3.8.8 64-bit"
},
"interpreter": {
"hash": "53e4db133e7a886bd36ef8c79c0b5519f0af174d53fdba9ad5d5d94e6d9f4b55"
}
},
"nbformat": 4,
"nbformat_minor": 2
}