diff --git a/test.ipynb b/test.ipynb
index 138c081..a919fa7 100644
--- a/test.ipynb
+++ b/test.ipynb
@@ -2,84 +2,203 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 46,
"source": [
"# This Notebook is created with VS Code on Windows\r\n",
"# Create python virtual environment\r\n",
- "#!python -m venv .venv\r\n",
+ "!python -m venv .venv\r\n",
"# If you want to use it on macOS/Linux\r\n",
"# You may need to run sudo apt-get install python3-venv first\r\n",
"#python3 -m venv .venv\r\n",
"\r\n",
"# Install Python Packages\r\n",
"!pip install --user --upgrade pip\r\n",
+ "!pip install --upgrade setuptools\r\n",
"!pip install --user seaborn\r\n",
"!pip install --user numpy\r\n",
"!pip install --user pandas\r\n",
"!pip install --user matplotlib\r\n",
"!pip install --user plotly\r\n",
- "!pip install --user nbformat\r\n"
+ "!pip install --user nbformat\r\n",
+ "!pip install --user surprise\r\n"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Requirement already satisfied: pip in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (21.2.4)\n",
+ "Requirement already satisfied: pip in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (21.2.4)\n",
"Requirement already satisfied: seaborn in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (0.11.2)\n",
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (3.3.4)\n",
- "Requirement already satisfied: numpy>=1.15 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (1.20.1)\n",
+ "Requirement already satisfied: numpy>=1.15 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (1.19.5)\n",
"Requirement already satisfied: scipy>=1.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (1.6.1)\n",
"Requirement already satisfied: pandas>=0.23 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from seaborn) (1.2.2)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
- "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.1.0)\n",
- "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from matplotlib>=2.2->seaborn) (2.4.6)\n",
- "Requirement already satisfied: six in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.15.0)\n",
+ "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
+ "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
+ "Requirement already satisfied: six in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.15.0)\n",
"Requirement already satisfied: pytz>=2017.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas>=0.23->seaborn) (2021.1)\n",
- "Requirement already satisfied: numpy in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (1.20.1)\n",
+ "Requirement already satisfied: numpy in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (1.19.5)\n",
"Requirement already satisfied: pandas in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (1.2.2)\n",
- "Requirement already satisfied: numpy>=1.16.5 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas) (1.20.1)\n",
"Requirement already satisfied: pytz>=2017.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas) (2021.1)\n",
- "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas) (2.8.1)\n",
- "Requirement already satisfied: six>=1.5 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
+ "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from pandas) (2.8.1)\n",
+ "Requirement already satisfied: numpy>=1.16.5 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from pandas) (1.19.5)\n",
+ "Requirement already satisfied: six>=1.5 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
"Requirement already satisfied: matplotlib in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (3.3.4)\n",
- "Requirement already satisfied: numpy>=1.15 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (1.20.1)\n",
- "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from matplotlib) (2.4.6)\n",
- "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (2.8.1)\n",
- "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (1.3.1)\n",
- "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (8.1.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (0.10.0)\n",
- "Requirement already satisfied: six in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from cycler>=0.10->matplotlib) (1.15.0)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (1.3.1)\n",
+ "Requirement already satisfied: numpy>=1.15 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (1.19.5)\n",
+ "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (2.4.7)\n",
+ "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from matplotlib) (2.8.1)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from matplotlib) (8.1.0)\n",
+ "Requirement already satisfied: six in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from cycler>=0.10->matplotlib) (1.15.0)\n",
"Requirement already satisfied: plotly in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (5.3.0)\n",
- "Requirement already satisfied: six in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from plotly) (1.15.0)\n",
+ "Requirement already satisfied: six in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from plotly) (1.15.0)\n",
"Requirement already satisfied: tenacity>=6.2.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from plotly) (8.0.1)\n",
- "Collecting nbformat\n",
- " Downloading nbformat-5.1.3-py3-none-any.whl (178 kB)\n",
- "Requirement already satisfied: ipython-genutils in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (0.2.0)\n",
+ "Requirement already satisfied: nbformat in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (5.1.3)\n",
"Requirement already satisfied: traitlets>=4.1 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (5.0.5)\n",
- "Collecting jsonschema!=2.5.0,>=2.4\n",
- " Downloading jsonschema-3.2.0-py2.py3-none-any.whl (56 kB)\n",
+ "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (3.2.0)\n",
+ "Requirement already satisfied: ipython-genutils in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (0.2.0)\n",
"Requirement already satisfied: jupyter-core in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from nbformat) (4.7.1)\n",
- "Requirement already satisfied: six>=1.11.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (1.15.0)\n",
- "Requirement already satisfied: attrs>=17.4.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (19.3.0)\n",
+ "Requirement already satisfied: six>=1.11.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (1.15.0)\n",
"Requirement already satisfied: setuptools in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (49.2.1)\n",
- "Collecting pyrsistent>=0.14.0\n",
- " Downloading pyrsistent-0.18.0-cp38-cp38-win_amd64.whl (62 kB)\n",
+ "Requirement already satisfied: pyrsistent>=0.14.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (0.18.0)\n",
+ "Requirement already satisfied: attrs>=17.4.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat) (21.2.0)\n",
"Requirement already satisfied: pywin32>=1.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from jupyter-core->nbformat) (300)\n",
- "Installing collected packages: pyrsistent, jsonschema, nbformat\n",
- "Successfully installed jsonschema-3.2.0 nbformat-5.1.3 pyrsistent-0.18.0\n"
+ "Collecting surprise\n",
+ " Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)\n",
+ "Collecting scikit-surprise\n",
+ " Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)\n",
+ "Requirement already satisfied: joblib>=0.11 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from scikit-surprise->surprise) (1.0.1)\n",
+ "Requirement already satisfied: numpy>=1.11.2 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from scikit-surprise->surprise) (1.19.5)\n",
+ "Requirement already satisfied: scipy>=1.0.0 in c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\lib\\site-packages (from scikit-surprise->surprise) (1.6.1)\n",
+ "Requirement already satisfied: six>=1.10.0 in c:\\users\\oli\\appdata\\roaming\\python\\python38\\site-packages (from scikit-surprise->surprise) (1.15.0)\n",
+ "Building wheels for collected packages: scikit-surprise\n",
+ " Building wheel for scikit-surprise (setup.py): started\n",
+ " Building wheel for scikit-surprise (setup.py): finished with status 'error'\n",
+ " Running setup.py clean for scikit-surprise\n",
+ "Failed to build scikit-surprise\n",
+ "Installing collected packages: scikit-surprise, surprise\n",
+ " Running setup.py install for scikit-surprise: started\n",
+ " Running setup.py install for scikit-surprise: finished with status 'error'\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
- " WARNING: The script jsonschema.exe is installed in 'C:\\Users\\Oli\\AppData\\Roaming\\Python\\Python38\\Scripts' which is not on PATH.\n",
- " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
- " WARNING: The script jupyter-trust.exe is installed in 'C:\\Users\\Oli\\AppData\\Roaming\\Python\\Python38\\Scripts' which is not on PATH.\n",
- " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n"
+ " ERROR: Command errored out with exit status 1:\n",
+ " command: 'c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\python.exe' -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"'; __file__='\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"';f = getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__) if os.path.exists(__file__) else io.StringIO('\"'\"'from setuptools import setup; setup()'\"'\"');code = f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' bdist_wheel -d 'C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-wheel-dz4_1ymq'\n",
+ " cwd: C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-install-jibnz200\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\n",
+ " Complete output (49 lines):\n",
+ " running bdist_wheel\n",
+ " running build\n",
+ " running build_py\n",
+ " creating build\n",
+ " creating build\\lib.win-amd64-3.8\n",
+ " creating build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\accuracy.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\builtin_datasets.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\dataset.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\dump.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\reader.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\trainset.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\utils.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\__main__.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " creating build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\search.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\split.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\validation.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " creating build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\algo_base.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\baseline_only.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\knns.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\predictions.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\random_pred.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " running egg_info\n",
+ " writing scikit_surprise.egg-info\\PKG-INFO\n",
+ " writing dependency_links to scikit_surprise.egg-info\\dependency_links.txt\n",
+ " writing entry points to scikit_surprise.egg-info\\entry_points.txt\n",
+ " writing requirements to scikit_surprise.egg-info\\requires.txt\n",
+ " writing top-level names to scikit_surprise.egg-info\\top_level.txt\n",
+ " reading manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
+ " reading manifest template 'MANIFEST.in'\n",
+ " writing manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
+ " copying surprise\\similarities.c -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\similarities.pyx -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\prediction_algorithms\\co_clustering.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\matrix_factorization.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\optimize_baselines.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\slope_one.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\co_clustering.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\matrix_factorization.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\optimize_baselines.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\slope_one.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " running build_ext\n",
+ " building 'surprise.similarities' extension\n",
+ " error: Microsoft Visual C++ 14.0 is required. Get it with \"Build Tools for Visual Studio\": https://visualstudio.microsoft.com/downloads/\n",
+ " ----------------------------------------\n",
+ " ERROR: Failed building wheel for scikit-surprise\n",
+ " ERROR: Command errored out with exit status 1:\n",
+ " command: 'c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\python.exe' -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"'; __file__='\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"';f = getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__) if os.path.exists(__file__) else io.StringIO('\"'\"'from setuptools import setup; setup()'\"'\"');code = f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' install --record 'C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-record-ivcio2vz\\install-record.txt' --single-version-externally-managed --user --prefix= --compile --install-headers 'C:\\Users\\Oli\\AppData\\Roaming\\Python\\Python38\\Include\\scikit-surprise'\n",
+ " cwd: C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-install-jibnz200\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\n",
+ " Complete output (49 lines):\n",
+ " running install\n",
+ " running build\n",
+ " running build_py\n",
+ " creating build\n",
+ " creating build\\lib.win-amd64-3.8\n",
+ " creating build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\accuracy.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\builtin_datasets.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\dataset.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\dump.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\reader.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\trainset.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\utils.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\__main__.py -> build\\lib.win-amd64-3.8\\surprise\n",
+ " creating build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\search.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\split.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\validation.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " copying surprise\\model_selection\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\model_selection\n",
+ " creating build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\algo_base.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\baseline_only.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\knns.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\predictions.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\random_pred.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\__init__.py -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " running egg_info\n",
+ " writing scikit_surprise.egg-info\\PKG-INFO\n",
+ " writing dependency_links to scikit_surprise.egg-info\\dependency_links.txt\n",
+ " writing entry points to scikit_surprise.egg-info\\entry_points.txt\n",
+ " writing requirements to scikit_surprise.egg-info\\requires.txt\n",
+ " writing top-level names to scikit_surprise.egg-info\\top_level.txt\n",
+ " reading manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
+ " reading manifest template 'MANIFEST.in'\n",
+ " writing manifest file 'scikit_surprise.egg-info\\SOURCES.txt'\n",
+ " copying surprise\\similarities.c -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\similarities.pyx -> build\\lib.win-amd64-3.8\\surprise\n",
+ " copying surprise\\prediction_algorithms\\co_clustering.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\matrix_factorization.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\optimize_baselines.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\slope_one.c -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\co_clustering.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\matrix_factorization.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\optimize_baselines.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " copying surprise\\prediction_algorithms\\slope_one.pyx -> build\\lib.win-amd64-3.8\\surprise\\prediction_algorithms\n",
+ " running build_ext\n",
+ " building 'surprise.similarities' extension\n",
+ " error: Microsoft Visual C++ 14.0 is required. Get it with \"Build Tools for Visual Studio\": https://visualstudio.microsoft.com/downloads/\n",
+ " ----------------------------------------\n",
+ "ERROR: Command errored out with exit status 1: 'c:\\users\\oli\\appdata\\local\\programs\\python\\python38\\python.exe' -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"'; __file__='\"'\"'C:\\\\Users\\\\Oli\\\\AppData\\\\Local\\\\Temp\\\\pip-install-jibnz200\\\\scikit-surprise_07dc192d3cf347769fae216d5b05a944\\\\setup.py'\"'\"';f = getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__) if os.path.exists(__file__) else io.StringIO('\"'\"'from setuptools import setup; setup()'\"'\"');code = f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' install --record 'C:\\Users\\Oli\\AppData\\Local\\Temp\\pip-record-ivcio2vz\\install-record.txt' --single-version-externally-managed --user --prefix= --compile --install-headers 'C:\\Users\\Oli\\AppData\\Roaming\\Python\\Python38\\Include\\scikit-surprise' Check the logs for full command output.\n"
]
}
],
@@ -87,10 +206,10 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 3,
"source": [
- "import numpy as np # linear algebra\r\n",
- "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\r\n",
+ "import numpy as np # maths\r\n",
+ "import pandas as pd # data processing\r\n",
"import matplotlib.pyplot as plt\r\n",
"import seaborn as sns\r\n",
"import os\r\n",
@@ -137,12 +256,12 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 4,
"source": [
"# Import Tables\r\n",
- "reviews = pd.read_csv('./ml-latest-small/ratings.csv', names=['userId', 'movieId', 'rating', 'timestamp'], delimiter=',', engine='python')\r\n",
- "movies = pd.read_csv('./ml-latest-small/movies.csv', names=['movieId', 'title', 'genres'], delimiter=',', engine='python')\r\n",
- "users = pd.read_csv('./ml-latest-small/users.csv', names=['userId', 'gender', 'age', 'occupation', 'zip'], delimiter='::', engine='python')\r\n",
+ "reviews = pd.read_csv('./ml-1m/ratings.dat', names=['userId', 'movieId', 'rating', 'timestamp'], delimiter='::', engine='python')\r\n",
+ "movies = pd.read_csv('./ml-1m/movies.dat', names=['movieId', 'title', 'genres'], delimiter='::', engine='python')\r\n",
+ "users = pd.read_csv('./ml-1m/users.dat', names=['userId', 'gender', 'age', 'occupation', 'zip'], delimiter='::', engine='python')\r\n",
"\r\n",
"# Print Table shape\r\n",
"print('Reviews shape:', reviews.shape)\r\n",
@@ -154,9 +273,9 @@
"output_type": "stream",
"name": "stdout",
"text": [
- "Reviews shape: (100836, 4)\n",
- "Users shape: (610, 5)\n",
- "Movies shape: (9742, 3)\n"
+ "Reviews shape: (1000209, 4)\n",
+ "Users shape: (6040, 5)\n",
+ "Movies shape: (3883, 3)\n"
]
}
],
@@ -164,7 +283,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 5,
"source": [
"# Drop unused Attributes\r\n",
"reviews.drop(['timestamp'], axis=1, inplace=True) # Time\r\n",
@@ -178,9 +297,9 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 6,
"source": [
- "# print movie table\r\n",
+ "# Print movie table\r\n",
"movies.head()"
],
"outputs": [
@@ -188,19 +307,19 @@
"output_type": "execute_result",
"data": {
"text/plain": [
- " movieId title \\\n",
- "0 1 Toy Story (1995) \n",
- "1 2 Jumanji (1995) \n",
- "2 3 Grumpier Old Men (1995) \n",
- "3 4 Waiting to Exhale (1995) \n",
- "4 5 Father of the Bride Part II (1995) \n",
+ " movieId title genres \\\n",
+ "0 1 Toy Story (1995) Animation|Children's|Comedy \n",
+ "1 2 Jumanji (1995) Adventure|Children's|Fantasy \n",
+ "2 3 Grumpier Old Men (1995) Comedy|Romance \n",
+ "3 4 Waiting to Exhale (1995) Comedy|Drama \n",
+ "4 5 Father of the Bride Part II (1995) Comedy \n",
"\n",
- " genres release_year \n",
- "0 Adventure|Animation|Children|Comedy|Fantasy 1995 \n",
- "1 Adventure|Children|Fantasy 1995 \n",
- "2 Comedy|Romance 1995 \n",
- "3 Comedy|Drama|Romance 1995 \n",
- "4 Comedy 1995 "
+ " release_year \n",
+ "0 1995 \n",
+ "1 1995 \n",
+ "2 1995 \n",
+ "3 1995 \n",
+ "4 1995 "
],
"text/html": [
"
\n",
@@ -232,14 +351,14 @@
"
0 | \n",
"
1 | \n",
"
Toy Story (1995) | \n",
- "
Adventure|Animation|Children|Comedy|Fantasy | \n",
+ "
Animation|Children's|Comedy | \n",
"
1995 | \n",
" \n",
"
\n",
" | 1 | \n",
" 2 | \n",
" Jumanji (1995) | \n",
- " Adventure|Children|Fantasy | \n",
+ " Adventure|Children's|Fantasy | \n",
" 1995 | \n",
"
\n",
"
\n",
@@ -253,7 +372,7 @@
" | 3 | \n",
" 4 | \n",
" Waiting to Exhale (1995) | \n",
- " Comedy|Drama|Romance | \n",
+ " Comedy|Drama | \n",
" 1995 | \n",
"
\n",
"
\n",
@@ -269,16 +388,16 @@
]
},
"metadata": {},
- "execution_count": 26
+ "execution_count": 6
}
],
"metadata": {}
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 7,
"source": [
- "# changed feature values based on README_users.txt\r\n",
+ "# Changed feature values based on README_users.txt\r\n",
"\r\n",
"ages_map = {1: 'Under 18',\r\n",
" 18: '18 - 24',\r\n",
@@ -321,16 +440,16 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 8,
"source": [
+ "# Plot age kategories\r\n",
+ "\r\n",
"age_reindex = ['Under 18', '18 - 24', '25 - 34', '35 - 44', '45 - 49', '50 - 55', '56+']\r\n",
- "\r\n",
"age_counts = users['age'].value_counts().reindex(age_reindex)\r\n",
- "\r\n",
"sns.barplot(x=age_counts.values,\r\n",
" y=age_counts.index,\r\n",
" palette='magma').set_title(\r\n",
- " 'Users age', fontsize=24)\r\n",
+ " 'Users age', fontsize=12)\r\n",
"\r\n",
"plt.show()"
],
@@ -341,13 +460,1214 @@
"text/plain": [
""
],
- "image/svg+xml": "\r\n\r\n\r\n\r\n",
- "image/png": ""
+ "image/svg+xml": "\r\n\r\n\r\n\r\n",
+ "image/png": ""
},
"metadata": {}
}
],
"metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "source": [
+ "# Plot gender of users\r\n",
+ "gender_counts = users['gender'].value_counts()\r\n",
+ "colors1 = ['lightblue', 'pink']\r\n",
+ "pie = go.Pie(labels=gender_counts.index,\r\n",
+ " values=gender_counts.values,\r\n",
+ " marker=dict(colors=colors1),\r\n",
+ " hole=0.5)\r\n",
+ "layout = go.Layout(title='Gender Users', font=dict(size=12), legend=dict(orientation='h'))\r\n",
+ "\r\n",
+ "fig = go.Figure(data=[pie], layout=layout)\r\n",
+ "py.iplot(fig)"
+ ],
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "linkText": "Export to plot.ly",
+ "plotlyServerURL": "https://plot.ly",
+ "showLink": false
+ },
+ "data": [
+ {
+ "hole": 0.5,
+ "labels": [
+ "Male",
+ "Female"
+ ],
+ "marker": {
+ "colors": [
+ "lightblue",
+ "pink"
+ ]
+ },
+ "type": "pie",
+ "values": [
+ 4331,
+ 1709
+ ]
+ }
+ ],
+ "layout": {
+ "font": {
+ "size": 12
+ },
+ "legend": {
+ "orientation": "h"
+ },
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "Gender Users"
+ }
+ }
+ }
+ },
+ "metadata": {}
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "source": [
+ "# Merge reviews, movie and user dataset\r\n",
+ "final_df = reviews.merge(movies, on='movieId', how='left').merge(users, on='userId', how='left')\r\n",
+ "print('final_df shape:', final_df.shape)\r\n",
+ "final_df.head()"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "final_df shape: (1000209, 9)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " userId movieId rating title \\\n",
+ "0 1 1193 5 One Flew Over the Cuckoo's Nest (1975) \n",
+ "1 1 661 3 James and the Giant Peach (1996) \n",
+ "2 1 914 3 My Fair Lady (1964) \n",
+ "3 1 3408 4 Erin Brockovich (2000) \n",
+ "4 1 2355 5 Bug's Life, A (1998) \n",
+ "\n",
+ " genres release_year gender age occupation \n",
+ "0 Drama 1975 Female Under 18 K-12 student \n",
+ "1 Animation|Children's|Musical 1996 Female Under 18 K-12 student \n",
+ "2 Musical|Romance 1964 Female Under 18 K-12 student \n",
+ "3 Drama 2000 Female Under 18 K-12 student \n",
+ "4 Animation|Children's|Comedy 1998 Female Under 18 K-12 student "
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " userId | \n",
+ " movieId | \n",
+ " rating | \n",
+ " title | \n",
+ " genres | \n",
+ " release_year | \n",
+ " gender | \n",
+ " age | \n",
+ " occupation | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 1193 | \n",
+ " 5 | \n",
+ " One Flew Over the Cuckoo's Nest (1975) | \n",
+ " Drama | \n",
+ " 1975 | \n",
+ " Female | \n",
+ " Under 18 | \n",
+ " K-12 student | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 661 | \n",
+ " 3 | \n",
+ " James and the Giant Peach (1996) | \n",
+ " Animation|Children's|Musical | \n",
+ " 1996 | \n",
+ " Female | \n",
+ " Under 18 | \n",
+ " K-12 student | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 914 | \n",
+ " 3 | \n",
+ " My Fair Lady (1964) | \n",
+ " Musical|Romance | \n",
+ " 1964 | \n",
+ " Female | \n",
+ " Under 18 | \n",
+ " K-12 student | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ " 3408 | \n",
+ " 4 | \n",
+ " Erin Brockovich (2000) | \n",
+ " Drama | \n",
+ " 2000 | \n",
+ " Female | \n",
+ " Under 18 | \n",
+ " K-12 student | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1 | \n",
+ " 2355 | \n",
+ " 5 | \n",
+ " Bug's Life, A (1998) | \n",
+ " Animation|Children's|Comedy | \n",
+ " 1998 | \n",
+ " Female | \n",
+ " Under 18 | \n",
+ " K-12 student | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "source": [
+ "final_df[final_df['age'] == '18 - 24']['title'].value_counts()[:10].to_frame()"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " title\n",
+ "American Beauty (1999) 715\n",
+ "Star Wars: Episode VI - Return of the Jedi (1983) 586\n",
+ "Star Wars: Episode V - The Empire Strikes Back ... 579\n",
+ "Matrix, The (1999) 567\n",
+ "Star Wars: Episode IV - A New Hope (1977) 562\n",
+ "Braveheart (1995) 544\n",
+ "Saving Private Ryan (1998) 543\n",
+ "Jurassic Park (1993) 541\n",
+ "Terminator 2: Judgment Day (1991) 529\n",
+ "Sixth Sense, The (1999) 514"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " title | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | American Beauty (1999) | \n",
+ " 715 | \n",
+ "
\n",
+ " \n",
+ " | Star Wars: Episode VI - Return of the Jedi (1983) | \n",
+ " 586 | \n",
+ "
\n",
+ " \n",
+ " | Star Wars: Episode V - The Empire Strikes Back (1980) | \n",
+ " 579 | \n",
+ "
\n",
+ " \n",
+ " | Matrix, The (1999) | \n",
+ " 567 | \n",
+ "
\n",
+ " \n",
+ " | Star Wars: Episode IV - A New Hope (1977) | \n",
+ " 562 | \n",
+ "
\n",
+ " \n",
+ " | Braveheart (1995) | \n",
+ " 544 | \n",
+ "
\n",
+ " \n",
+ " | Saving Private Ryan (1998) | \n",
+ " 543 | \n",
+ "
\n",
+ " \n",
+ " | Jurassic Park (1993) | \n",
+ " 541 | \n",
+ "
\n",
+ " \n",
+ " | Terminator 2: Judgment Day (1991) | \n",
+ " 529 | \n",
+ "
\n",
+ " \n",
+ " | Sixth Sense, The (1999) | \n",
+ " 514 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 39
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "source": [
+ "# Print movie / user sum\r\n",
+ "n_movies = final_df['movieId'].nunique()\r\n",
+ "n_users = final_df['userId'].nunique()\r\n",
+ "\r\n",
+ "print('Number of movies:', n_movies)\r\n",
+ "print('Number of users:', n_users) "
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Number of movies: 3706\n",
+ "Number of users: 6040\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "source": [
+ "# implement SVD with Python SurPRISE, a Python Recommendation Framework\r\n",
+ "\r\n",
+ "from surprise import Reader, Dataset, SVD, SVDpp\r\n",
+ "from surprise import accuracy\r\n",
+ "\r\n",
+ "reader = Reader(rating_scale=(1, 5))\r\n",
+ "dataset = Dataset.load_from_df(final_df[['userId', 'movieId', 'rating']], reader=reader)\r\n",
+ "\r\n",
+ "svd = SVD(n_factors=50)\r\n",
+ "svd_plusplus = SVDpp(n_factors=50)\r\n",
+ "\r\n",
+ "# train with SVD\r\n",
+ "trainset = dataset.build_full_trainset()\r\n",
+ "svd.fit(trainset)\r\n",
+ "# train with SVD++, ATTENTION this take a LONG TIME\r\n",
+ "# svd_plusplus.fit(trainset)\r\n"
+ ],
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "ModuleNotFoundError",
+ "evalue": "No module named 'surprise'",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# implement SVD with Python SurPRISE, a Python Recommendation Framework\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0msurprise\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mReader\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataset\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSVD\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSVDpp\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msurprise\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0maccuracy\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'surprise'"
+ ]
+ }
+ ],
+ "metadata": {}
}
],
"metadata": {