-
Notifications
You must be signed in to change notification settings - Fork 0
/
rec_hybrid
1 lines (1 loc) · 50.3 KB
/
rec_hybrid
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"rec_hybrid","provenance":[],"authorship_tag":"ABX9TyMzPmwK/UOqsbhTDXpXBpoE"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"id":"lWqS5bu4sbrK","executionInfo":{"status":"ok","timestamp":1648339610852,"user_tz":-540,"elapsed":3692,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"outputs":[],"source":["import numpy as np\n","import pandas as pd\n","from sklearn.utils import shuffle\n","import tensorflow as tf"]},{"cell_type":"code","source":["r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']\n","ratings = pd.read_csv('u.data', names=r_cols, sep='\\t',encoding='latin-1')\n","ratings = ratings[['user_id', 'movie_id', 'rating']].astype(int)"],"metadata":{"id":"iCV3bSDMsluy","executionInfo":{"status":"ok","timestamp":1648339616196,"user_tz":-540,"elapsed":232,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["\n","TRAIN_SIZE = 0.75\n","ratings = shuffle(ratings, random_state=1)\n","cutoff = int(TRAIN_SIZE * len(ratings))\n","ratings_train = ratings.iloc[:cutoff]\n","ratings_test = ratings.iloc[cutoff:]"],"metadata":{"id":"_G05xUBvsn5R","executionInfo":{"status":"ok","timestamp":1648339620867,"user_tz":-540,"elapsed":245,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["def RMSE2(y_true, y_pred):\n"," return np.sqrt(np.mean((np.array(y_true) - np.array(y_pred))**2))"],"metadata":{"id":"FOnt5mLDspCZ","executionInfo":{"status":"ok","timestamp":1648339625230,"user_tz":-540,"elapsed":222,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["rating_matrix = ratings_train.pivot(index='user_id', columns='movie_id', values='rating')"],"metadata":{"id":"rLCu28KXsqEf","executionInfo":{"status":"ok","timestamp":1648339629503,"user_tz":-540,"elapsed":331,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["from sklearn.metrics.pairwise import cosine_similarity"],"metadata":{"id":"obxaYrmdsrIw","executionInfo":{"status":"ok","timestamp":1648339642248,"user_tz":-540,"elapsed":234,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":6,"outputs":[]},{"cell_type":"code","source":["matrix_dummy = rating_matrix.copy().fillna(0)\n","user_similarity = cosine_similarity(matrix_dummy, matrix_dummy)\n","user_similarity = pd.DataFrame(user_similarity, index=rating_matrix.index, columns=rating_matrix.index)"],"metadata":{"id":"jBjTuEoisuRi","executionInfo":{"status":"ok","timestamp":1648339649242,"user_tz":-540,"elapsed":610,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["user_similarity"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":519},"id":"mfv_mcDYsv49","executionInfo":{"status":"ok","timestamp":1648339652968,"user_tz":-540,"elapsed":252,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"555d1837-fed4-49e3-ed50-068de4b4329e"},"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["user_id 1 2 3 4 5 6 7 \\\n","user_id \n","1 1.000000 0.118894 0.027479 0.043384 0.260520 0.336632 0.307169 \n","2 0.118894 1.000000 0.087667 0.167153 0.060599 0.184610 0.056137 \n","3 0.027479 0.087667 1.000000 0.199823 0.028460 0.079827 0.055555 \n","4 0.043384 0.167153 0.199823 1.000000 0.030303 0.030273 0.072929 \n","5 0.260520 0.060599 0.028460 0.030303 1.000000 0.169481 0.262558 \n","... ... ... ... ... ... ... ... \n","939 0.090740 0.216103 0.019598 0.000000 0.087379 0.088731 0.069541 \n","940 0.229875 0.185626 0.163782 0.164329 0.207298 0.259508 0.222177 \n","941 0.082766 0.083104 0.062447 0.066491 0.155782 0.076413 0.045202 \n","942 0.094420 0.116565 0.131386 0.079274 0.120876 0.284027 0.220873 \n","943 0.261059 0.038939 0.020482 0.081783 0.232376 0.169697 0.276898 \n","\n","user_id 8 9 10 ... 934 935 936 \\\n","user_id ... \n","1 0.255234 0.048358 0.311033 ... 0.294259 0.087302 0.204429 \n","2 0.096995 0.138731 0.122185 ... 0.085297 0.189758 0.256179 \n","3 0.080656 0.000000 0.035705 ... 0.041629 0.040328 0.109325 \n","4 0.185741 0.080936 0.084981 ... 0.045556 0.000000 0.128589 \n","5 0.210069 0.072622 0.138234 ... 0.260848 0.042810 0.075570 \n","... ... ... ... ... ... ... ... \n","939 0.095983 0.055567 0.062182 ... 0.056636 0.197151 0.169130 \n","940 0.245705 0.051795 0.249833 ... 0.277813 0.094757 0.167268 \n","941 0.137625 0.088527 0.059929 ... 0.026934 0.117419 0.179141 \n","942 0.116436 0.046357 0.183558 ... 0.152376 0.048037 0.083757 \n","943 0.201518 0.094368 0.162465 ... 0.198771 0.110724 0.077695 \n","\n","user_id 937 938 939 940 941 942 943 \n","user_id \n","1 0.111134 0.132648 0.090740 0.229875 0.082766 0.094420 0.261059 \n","2 0.272712 0.178380 0.216103 0.185626 0.083104 0.116565 0.038939 \n","3 0.059769 0.134040 0.019598 0.163782 0.062447 0.131386 0.020482 \n","4 0.069175 0.123098 0.000000 0.164329 0.066491 0.079274 0.081783 \n","5 0.051034 0.114721 0.087379 0.207298 0.155782 0.120876 0.232376 \n","... ... ... ... ... ... ... ... \n","939 0.229859 0.308653 1.000000 0.063317 0.125536 0.021343 0.088901 \n","940 0.108380 0.142710 0.063317 1.000000 0.056857 0.230674 0.196262 \n","941 0.202774 0.251722 0.125536 0.056857 1.000000 0.068007 0.029818 \n","942 0.067921 0.098533 0.021343 0.230674 0.068007 1.000000 0.097588 \n","943 0.026057 0.117605 0.088901 0.196262 0.029818 0.097588 1.000000 \n","\n","[943 rows x 943 columns]"],"text/html":["\n"," <div id=\"df-ebd13ded-a484-40e5-8392-a69691e1b8e0\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th>user_id</th>\n"," <th>1</th>\n"," <th>2</th>\n"," <th>3</th>\n"," <th>4</th>\n"," <th>5</th>\n"," <th>6</th>\n"," <th>7</th>\n"," <th>8</th>\n"," <th>9</th>\n"," <th>10</th>\n"," <th>...</th>\n"," <th>934</th>\n"," <th>935</th>\n"," <th>936</th>\n"," <th>937</th>\n"," <th>938</th>\n"," <th>939</th>\n"," <th>940</th>\n"," <th>941</th>\n"," <th>942</th>\n"," <th>943</th>\n"," </tr>\n"," <tr>\n"," <th>user_id</th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>1</th>\n"," <td>1.000000</td>\n"," <td>0.118894</td>\n"," <td>0.027479</td>\n"," <td>0.043384</td>\n"," <td>0.260520</td>\n"," <td>0.336632</td>\n"," <td>0.307169</td>\n"," <td>0.255234</td>\n"," <td>0.048358</td>\n"," <td>0.311033</td>\n"," <td>...</td>\n"," <td>0.294259</td>\n"," <td>0.087302</td>\n"," <td>0.204429</td>\n"," <td>0.111134</td>\n"," <td>0.132648</td>\n"," <td>0.090740</td>\n"," <td>0.229875</td>\n"," <td>0.082766</td>\n"," <td>0.094420</td>\n"," <td>0.261059</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>0.118894</td>\n"," <td>1.000000</td>\n"," <td>0.087667</td>\n"," <td>0.167153</td>\n"," <td>0.060599</td>\n"," <td>0.184610</td>\n"," <td>0.056137</td>\n"," <td>0.096995</td>\n"," <td>0.138731</td>\n"," <td>0.122185</td>\n"," <td>...</td>\n"," <td>0.085297</td>\n"," <td>0.189758</td>\n"," <td>0.256179</td>\n"," <td>0.272712</td>\n"," <td>0.178380</td>\n"," <td>0.216103</td>\n"," <td>0.185626</td>\n"," <td>0.083104</td>\n"," <td>0.116565</td>\n"," <td>0.038939</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>0.027479</td>\n"," <td>0.087667</td>\n"," <td>1.000000</td>\n"," <td>0.199823</td>\n"," <td>0.028460</td>\n"," <td>0.079827</td>\n"," <td>0.055555</td>\n"," <td>0.080656</td>\n"," <td>0.000000</td>\n"," <td>0.035705</td>\n"," <td>...</td>\n"," <td>0.041629</td>\n"," <td>0.040328</td>\n"," <td>0.109325</td>\n"," <td>0.059769</td>\n"," <td>0.134040</td>\n"," <td>0.019598</td>\n"," <td>0.163782</td>\n"," <td>0.062447</td>\n"," <td>0.131386</td>\n"," <td>0.020482</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0.043384</td>\n"," <td>0.167153</td>\n"," <td>0.199823</td>\n"," <td>1.000000</td>\n"," <td>0.030303</td>\n"," <td>0.030273</td>\n"," <td>0.072929</td>\n"," <td>0.185741</td>\n"," <td>0.080936</td>\n"," <td>0.084981</td>\n"," <td>...</td>\n"," <td>0.045556</td>\n"," <td>0.000000</td>\n"," <td>0.128589</td>\n"," <td>0.069175</td>\n"," <td>0.123098</td>\n"," <td>0.000000</td>\n"," <td>0.164329</td>\n"," <td>0.066491</td>\n"," <td>0.079274</td>\n"," <td>0.081783</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>0.260520</td>\n"," <td>0.060599</td>\n"," <td>0.028460</td>\n"," <td>0.030303</td>\n"," <td>1.000000</td>\n"," <td>0.169481</td>\n"," <td>0.262558</td>\n"," <td>0.210069</td>\n"," <td>0.072622</td>\n"," <td>0.138234</td>\n"," <td>...</td>\n"," <td>0.260848</td>\n"," <td>0.042810</td>\n"," <td>0.075570</td>\n"," <td>0.051034</td>\n"," <td>0.114721</td>\n"," <td>0.087379</td>\n"," <td>0.207298</td>\n"," <td>0.155782</td>\n"," <td>0.120876</td>\n"," <td>0.232376</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>939</th>\n"," <td>0.090740</td>\n"," <td>0.216103</td>\n"," <td>0.019598</td>\n"," <td>0.000000</td>\n"," <td>0.087379</td>\n"," <td>0.088731</td>\n"," <td>0.069541</td>\n"," <td>0.095983</td>\n"," <td>0.055567</td>\n"," <td>0.062182</td>\n"," <td>...</td>\n"," <td>0.056636</td>\n"," <td>0.197151</td>\n"," <td>0.169130</td>\n"," <td>0.229859</td>\n"," <td>0.308653</td>\n"," <td>1.000000</td>\n"," <td>0.063317</td>\n"," <td>0.125536</td>\n"," <td>0.021343</td>\n"," <td>0.088901</td>\n"," </tr>\n"," <tr>\n"," <th>940</th>\n"," <td>0.229875</td>\n"," <td>0.185626</td>\n"," <td>0.163782</td>\n"," <td>0.164329</td>\n"," <td>0.207298</td>\n"," <td>0.259508</td>\n"," <td>0.222177</td>\n"," <td>0.245705</td>\n"," <td>0.051795</td>\n"," <td>0.249833</td>\n"," <td>...</td>\n"," <td>0.277813</td>\n"," <td>0.094757</td>\n"," <td>0.167268</td>\n"," <td>0.108380</td>\n"," <td>0.142710</td>\n"," <td>0.063317</td>\n"," <td>1.000000</td>\n"," <td>0.056857</td>\n"," <td>0.230674</td>\n"," <td>0.196262</td>\n"," </tr>\n"," <tr>\n"," <th>941</th>\n"," <td>0.082766</td>\n"," <td>0.083104</td>\n"," <td>0.062447</td>\n"," <td>0.066491</td>\n"," <td>0.155782</td>\n"," <td>0.076413</td>\n"," <td>0.045202</td>\n"," <td>0.137625</td>\n"," <td>0.088527</td>\n"," <td>0.059929</td>\n"," <td>...</td>\n"," <td>0.026934</td>\n"," <td>0.117419</td>\n"," <td>0.179141</td>\n"," <td>0.202774</td>\n"," <td>0.251722</td>\n"," <td>0.125536</td>\n"," <td>0.056857</td>\n"," <td>1.000000</td>\n"," <td>0.068007</td>\n"," <td>0.029818</td>\n"," </tr>\n"," <tr>\n"," <th>942</th>\n"," <td>0.094420</td>\n"," <td>0.116565</td>\n"," <td>0.131386</td>\n"," <td>0.079274</td>\n"," <td>0.120876</td>\n"," <td>0.284027</td>\n"," <td>0.220873</td>\n"," <td>0.116436</td>\n"," <td>0.046357</td>\n"," <td>0.183558</td>\n"," <td>...</td>\n"," <td>0.152376</td>\n"," <td>0.048037</td>\n"," <td>0.083757</td>\n"," <td>0.067921</td>\n"," <td>0.098533</td>\n"," <td>0.021343</td>\n"," <td>0.230674</td>\n"," <td>0.068007</td>\n"," <td>1.000000</td>\n"," <td>0.097588</td>\n"," </tr>\n"," <tr>\n"," <th>943</th>\n"," <td>0.261059</td>\n"," <td>0.038939</td>\n"," <td>0.020482</td>\n"," <td>0.081783</td>\n"," <td>0.232376</td>\n"," <td>0.169697</td>\n"," <td>0.276898</td>\n"," <td>0.201518</td>\n"," <td>0.094368</td>\n"," <td>0.162465</td>\n"," <td>...</td>\n"," <td>0.198771</td>\n"," <td>0.110724</td>\n"," <td>0.077695</td>\n"," <td>0.026057</td>\n"," <td>0.117605</td>\n"," <td>0.088901</td>\n"," <td>0.196262</td>\n"," <td>0.029818</td>\n"," <td>0.097588</td>\n"," <td>1.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>943 rows × 943 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ebd13ded-a484-40e5-8392-a69691e1b8e0')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-ebd13ded-a484-40e5-8392-a69691e1b8e0 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-ebd13ded-a484-40e5-8392-a69691e1b8e0');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":8}]},{"cell_type":"code","source":["matrix_dummy"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":455},"id":"SqDTSYGwsw4e","executionInfo":{"status":"ok","timestamp":1648339661497,"user_tz":-540,"elapsed":227,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"b54cf242-8b7f-4778-de43-ed818bf1bdce"},"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["movie_id 1 2 3 4 5 6 7 8 9 10 ... \\\n","user_id ... \n","1 0.0 3.0 0.0 3.0 0.0 0.0 4.0 0.0 0.0 3.0 ... \n","2 4.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 ... \n","3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... \n","4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... \n","5 4.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... \n","... ... ... ... ... ... ... ... ... ... ... ... \n","939 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.0 0.0 ... \n","940 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 3.0 0.0 ... \n","941 0.0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0.0 0.0 ... \n","942 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... \n","943 0.0 5.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... \n","\n","movie_id 1670 1671 1672 1673 1675 1676 1678 1679 1680 1681 \n","user_id \n","1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","... ... ... ... ... ... ... ... ... ... ... \n","939 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","940 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","941 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","942 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","943 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n","\n","[943 rows x 1641 columns]"],"text/html":["\n"," <div id=\"df-b4ab984c-91a9-44a8-96fb-974aa3ea6691\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th>movie_id</th>\n"," <th>1</th>\n"," <th>2</th>\n"," <th>3</th>\n"," <th>4</th>\n"," <th>5</th>\n"," <th>6</th>\n"," <th>7</th>\n"," <th>8</th>\n"," <th>9</th>\n"," <th>10</th>\n"," <th>...</th>\n"," <th>1670</th>\n"," <th>1671</th>\n"," <th>1672</th>\n"," <th>1673</th>\n"," <th>1675</th>\n"," <th>1676</th>\n"," <th>1678</th>\n"," <th>1679</th>\n"," <th>1680</th>\n"," <th>1681</th>\n"," </tr>\n"," <tr>\n"," <th>user_id</th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>1</th>\n"," <td>0.0</td>\n"," <td>3.0</td>\n"," <td>0.0</td>\n"," <td>3.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>4.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>4.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>4.0</td>\n"," <td>3.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>939</th>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>5.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>940</th>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>941</th>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>4.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>942</th>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>943</th>\n"," <td>0.0</td>\n"," <td>5.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>943 rows × 1641 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b4ab984c-91a9-44a8-96fb-974aa3ea6691')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-b4ab984c-91a9-44a8-96fb-974aa3ea6691 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-b4ab984c-91a9-44a8-96fb-974aa3ea6691');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":9}]},{"cell_type":"code","source":["rating_matrix.mean(axis=1)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_0tbQ3-ns5ou","executionInfo":{"status":"ok","timestamp":1648339693206,"user_tz":-540,"elapsed":244,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"a6515e10-1829-4430-d157-add752f36038"},"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":["user_id\n","1 3.600000\n","2 3.785714\n","3 2.825000\n","4 4.166667\n","5 2.899225\n"," ... \n","939 4.470588\n","940 3.481928\n","941 4.200000\n","942 4.303030\n","943 3.414634\n","Length: 943, dtype: float64"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":["# train 데이터의 user의 rating 평균과 영화의 평점편차 계산 \n","rating_mean = rating_matrix.mean(axis=1)\n","rating_bias = (rating_matrix.T - rating_mean).T"],"metadata":{"id":"9nUE714pszBr","executionInfo":{"status":"ok","timestamp":1648339674697,"user_tz":-540,"elapsed":224,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":10,"outputs":[]},{"cell_type":"code","source":["def CF_knn_bias(user_id, movie_id, neighbor_size=0):\n"," if movie_id in rating_bias:\n"," sim_scores = user_similarity[user_id]\n"," movie_ratings = rating_bias[movie_id]\n"," none_rating_idx = movie_ratings[movie_ratings.isnull()].index\n"," movie_ratings = movie_ratings.drop(none_rating_idx)\n"," sim_scores = sim_scores.drop(none_rating_idx)\n"," if neighbor_size == 0:\n"," prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()\n"," prediction = prediction + rating_mean[user_id]\n"," else:\n"," if len(sim_scores) > 1:\n"," neighbor_size = min(neighbor_size, len(sim_scores))\n"," sim_scores = np.array(sim_scores)\n"," movie_ratings = np.array(movie_ratings)\n"," user_idx = np.argsort(sim_scores)\n"," sim_scores = sim_scores[user_idx][-neighbor_size:]\n"," movie_ratings = movie_ratings[user_idx][-neighbor_size:]\n"," prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()\n"," prediction = prediction + rating_mean[user_id]\n"," else:\n"," prediction = rating_mean[user_id]\n"," else:\n"," prediction = rating_mean[user_id]\n"," return prediction"],"metadata":{"id":"KJHDnRUWs2Me","executionInfo":{"status":"ok","timestamp":1648339715127,"user_tz":-540,"elapsed":224,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":12,"outputs":[]},{"cell_type":"code","source":["class NEW_MF():\n"," def __init__(self, ratings, K, alpha, beta, iterations, verbose=True):\n"," self.R = np.array(ratings)\n"," item_id_index = []\n"," index_item_id = []\n"," for i, one_id in enumerate(ratings):\n"," item_id_index.append([one_id, i])\n"," index_item_id.append([i, one_id])\n"," self.item_id_index = dict(item_id_index)\n"," self.index_item_id = dict(index_item_id) \n"," user_id_index = []\n"," index_user_id = []\n"," for i, one_id in enumerate(ratings.T):\n"," user_id_index.append([one_id, i])\n"," index_user_id.append([i, one_id])\n"," self.user_id_index = dict(user_id_index)\n"," self.index_user_id = dict(index_user_id)\n"," self.num_users, self.num_items = np.shape(self.R)\n"," self.K = K\n"," self.alpha = alpha\n"," self.beta = beta\n"," self.iterations = iterations\n"," self.verbose = verbose\n","\n"," def rmse(self):\n"," xs, ys = self.R.nonzero()\n"," self.predictions = []\n"," self.errors = []\n"," for x, y in zip(xs, ys):\n"," prediction = self.get_prediction(x, y)\n"," self.predictions.append(prediction)\n"," self.errors.append(self.R[x, y] - prediction)\n"," self.predictions = np.array(self.predictions)\n"," self.errors = np.array(self.errors)\n"," return np.sqrt(np.mean(self.errors**2))\n","\n"," # Ratings for user i and item j\n"," def get_prediction(self, i, j):\n"," prediction = self.b + self.b_u[i] + self.b_d[j] + self.P[i, :].dot(self.Q[j, :].T)\n"," return prediction\n","\n"," # Stochastic gradient descent to get optimized P and Q matrix\n"," def sgd(self):\n"," for i, j, r in self.samples:\n"," prediction = self.get_prediction(i, j)\n"," e = (r - prediction)\n","\n"," self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])\n"," self.b_d[j] += self.alpha * (e - self.beta * self.b_d[j])\n","\n"," self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])\n"," self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])\n","\n"," def set_test(self, ratings_test):\n"," test_set = []\n"," for i in range(len(ratings_test)):\n"," x = self.user_id_index[ratings_test.iloc[i, 0]] # [i, 0] is user_id\n"," y = self.item_id_index[ratings_test.iloc[i, 1]] # [i, 1] is movie_id\n"," z = ratings_test.iloc[i, 2] # [i, 2] is rating\n"," test_set.append([x, y, z]) # [user_idx, movie_idx, rating]\n"," self.R[x, y] = 0 # set rating 0 cuz this x-y is used as test purpose\n","\n"," self.test_set = test_set\n"," return test_set\n","\n"," def test_rmse(self):\n"," error = 0\n"," for one_set in self.test_set: # [user_idx, movie_idx, rating]\n"," predicted = self.get_prediction(one_set[0], one_set[1])\n"," error += pow(one_set[2] - predicted, 2)\n"," return np.sqrt(error/len(self.test_set))\n","\n"," def test(self):\n"," # Initializing user-feature and item-feature matrix\n"," self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))\n"," self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))\n","\n"," # Initializing the bias terms\n"," self.b_u = np.zeros(self.num_users)\n"," self.b_d = np.zeros(self.num_items)\n"," self.b = np.mean(self.R[self.R.nonzero()])\n","\n"," # List of training samples\n"," rows, columns = self.R.nonzero()\n"," self.samples = [(i, j, self.R[i,j]) for i, j in zip(rows, columns)]\n","\n"," # Stochastic gradient descent for given number of iterations\n"," training_process = []\n"," for i in range(self.iterations):\n"," np.random.shuffle(self.samples)\n"," self.sgd()\n"," rmse1 = self.rmse()\n"," rmse2 = self.test_rmse()\n"," training_process.append((i+1, rmse1, rmse2))\n"," if self.verbose:\n"," if (i+1) % 10 == 0:\n"," print(\"Iteration: %d ; Train RMSE = %.4f ; Test RMSE = %.4f\" % (i+1, rmse1, rmse2))\n"," return training_process\n","\n"," # Ratings for given user_id and item_id\n"," def get_one_prediction(self, user_id, item_id):\n"," prediction = self.get_prediction(self.user_id_index[user_id], self.item_id_index[item_id])\n"," return prediction\n","\n"," # Full user-movie rating matrix\n"," def full_prediction(self):\n"," return self.b + self.b_u[:,np.newaxis] + self.b_d[np.newaxis,:] + self.P.dot(self.Q.T)"],"metadata":{"id":"Eq1D2-VftAEJ","executionInfo":{"status":"ok","timestamp":1648339834422,"user_tz":-540,"elapsed":602,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":13,"outputs":[]},{"cell_type":"code","source":["R_temp = ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)\n","mf = NEW_MF(R_temp, K=200, alpha=0.001, beta=0.02, iterations=250, verbose=True)\n","test_set = mf.set_test(ratings_test)\n","result = mf.test()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0pVMDbW5tdGG","executionInfo":{"status":"ok","timestamp":1648340438639,"user_tz":-540,"elapsed":570530,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"ac0b3732-ef40-41aa-c34b-1bc9019d0c85"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["Iteration: 10 ; Train RMSE = 0.9664 ; Test RMSE = 0.9833\n","Iteration: 20 ; Train RMSE = 0.9420 ; Test RMSE = 0.9644\n","Iteration: 30 ; Train RMSE = 0.9313 ; Test RMSE = 0.9566\n","Iteration: 40 ; Train RMSE = 0.9253 ; Test RMSE = 0.9524\n","Iteration: 50 ; Train RMSE = 0.9214 ; Test RMSE = 0.9497\n","Iteration: 60 ; Train RMSE = 0.9186 ; Test RMSE = 0.9480\n","Iteration: 70 ; Train RMSE = 0.9166 ; Test RMSE = 0.9468\n","Iteration: 80 ; Train RMSE = 0.9148 ; Test RMSE = 0.9459\n","Iteration: 90 ; Train RMSE = 0.9131 ; Test RMSE = 0.9451\n","Iteration: 100 ; Train RMSE = 0.9113 ; Test RMSE = 0.9444\n","Iteration: 110 ; Train RMSE = 0.9091 ; Test RMSE = 0.9436\n","Iteration: 120 ; Train RMSE = 0.9061 ; Test RMSE = 0.9425\n","Iteration: 130 ; Train RMSE = 0.9018 ; Test RMSE = 0.9409\n","Iteration: 140 ; Train RMSE = 0.8958 ; Test RMSE = 0.9387\n","Iteration: 150 ; Train RMSE = 0.8875 ; Test RMSE = 0.9356\n","Iteration: 160 ; Train RMSE = 0.8770 ; Test RMSE = 0.9319\n","Iteration: 170 ; Train RMSE = 0.8646 ; Test RMSE = 0.9280\n","Iteration: 180 ; Train RMSE = 0.8507 ; Test RMSE = 0.9242\n","Iteration: 190 ; Train RMSE = 0.8354 ; Test RMSE = 0.9208\n","Iteration: 200 ; Train RMSE = 0.8185 ; Test RMSE = 0.9177\n","Iteration: 210 ; Train RMSE = 0.8001 ; Test RMSE = 0.9149\n","Iteration: 220 ; Train RMSE = 0.7801 ; Test RMSE = 0.9127\n","Iteration: 230 ; Train RMSE = 0.7587 ; Test RMSE = 0.9109\n","Iteration: 240 ; Train RMSE = 0.7362 ; Test RMSE = 0.9098\n","Iteration: 250 ; Train RMSE = 0.7129 ; Test RMSE = 0.9093\n"]}]},{"cell_type":"code","source":[""],"metadata":{"id":"InPgFvTAtleA"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## hybrid one"],"metadata":{"id":"huVW2Ah9tz1F"}},{"cell_type":"code","source":["def recommender0(recomm_list, mf):\n"," recommendations = np.array([mf.get_one_prediction(user, movie) for (user, movie) in recomm_list])\n"," return recommendations\n","\n","def recommender1(recomm_list, neighbor_size=0):\n"," recommendations = np.array([CF_knn_bias(user, movie, neighbor_size) for (user, movie) in recomm_list])\n"," return recommendations"],"metadata":{"id":"ZskGGiTQt0sw","executionInfo":{"status":"ok","timestamp":1648340438639,"user_tz":-540,"elapsed":14,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}}},"execution_count":15,"outputs":[]},{"cell_type":"code","source":["recomm_list = np.array(ratings_test.iloc[:, [0, 1]])\n","predictions0 = recommender0(recomm_list, mf)\n","RMSE2(ratings_test.iloc[:, 2], predictions0)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DiI__UQjt1CM","executionInfo":{"status":"ok","timestamp":1648340939069,"user_tz":-540,"elapsed":298,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"5aa4d129-a775-40f8-ba88-ddd7f646b672"},"execution_count":19,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.9093459604476004"]},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":["predictions1 = recommender1(recomm_list, 37)\n","RMSE2(ratings_test.iloc[:, 2], predictions1)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cMwSdzokxptD","executionInfo":{"status":"ok","timestamp":1648340971698,"user_tz":-540,"elapsed":30928,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"1fbd5c30-2591-47b8-f0c3-0ebbee09204c"},"execution_count":20,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.9467199341641682"]},"metadata":{},"execution_count":20}]},{"cell_type":"code","source":["weight = [0.8, 0.2]\n","predictions = predictions0 * weight[0] + predictions1 * weight[1]\n","RMSE2(ratings_test.iloc[:, 2], predictions)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"s8yY9Wz6t2s7","executionInfo":{"status":"ok","timestamp":1648340468842,"user_tz":-540,"elapsed":10,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"8c6112c4-ad80-4919-f7e5-0f82352a3c97"},"execution_count":17,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.9090282144455832"]},"metadata":{},"execution_count":17}]},{"cell_type":"code","source":["for i in np.arange(0, 1, 0.01):\n"," weight = [i, 1.0 - i]\n"," predictions = predictions0 * weight[0] + predictions1 * weight[1]\n"," print(\"Weights - %.2f : %.2f ; RMSE = %.7f\" % (weight[0], \n"," weight[1], RMSE2(ratings_test.iloc[:, 2], predictions)))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tpqHiDhDt37Y","executionInfo":{"status":"ok","timestamp":1648340468842,"user_tz":-540,"elapsed":7,"user":{"displayName":"Roy Son","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhVMzhGmOLO-LtCUnDj98IHVt9qwUVSM38Ow5VCJA=s64","userId":"14004762039756736463"}},"outputId":"be7765e7-c58e-458a-9e9c-8cba1080c0bc"},"execution_count":18,"outputs":[{"output_type":"stream","name":"stdout","text":["Weights - 0.00 : 1.00 ; RMSE = 0.9467199\n","Weights - 0.01 : 0.99 ; RMSE = 0.9458809\n","Weights - 0.02 : 0.98 ; RMSE = 0.9450508\n","Weights - 0.03 : 0.97 ; RMSE = 0.9442294\n","Weights - 0.04 : 0.96 ; RMSE = 0.9434169\n","Weights - 0.05 : 0.95 ; RMSE = 0.9426133\n","Weights - 0.06 : 0.94 ; RMSE = 0.9418186\n","Weights - 0.07 : 0.93 ; RMSE = 0.9410329\n","Weights - 0.08 : 0.92 ; RMSE = 0.9402561\n","Weights - 0.09 : 0.91 ; RMSE = 0.9394882\n","Weights - 0.10 : 0.90 ; RMSE = 0.9387294\n","Weights - 0.11 : 0.89 ; RMSE = 0.9379795\n","Weights - 0.12 : 0.88 ; RMSE = 0.9372387\n","Weights - 0.13 : 0.87 ; RMSE = 0.9365070\n","Weights - 0.14 : 0.86 ; RMSE = 0.9357843\n","Weights - 0.15 : 0.85 ; RMSE = 0.9350708\n","Weights - 0.16 : 0.84 ; RMSE = 0.9343663\n","Weights - 0.17 : 0.83 ; RMSE = 0.9336710\n","Weights - 0.18 : 0.82 ; RMSE = 0.9329849\n","Weights - 0.19 : 0.81 ; RMSE = 0.9323079\n","Weights - 0.20 : 0.80 ; RMSE = 0.9316402\n","Weights - 0.21 : 0.79 ; RMSE = 0.9309817\n","Weights - 0.22 : 0.78 ; RMSE = 0.9303324\n","Weights - 0.23 : 0.77 ; RMSE = 0.9296924\n","Weights - 0.24 : 0.76 ; RMSE = 0.9290616\n","Weights - 0.25 : 0.75 ; RMSE = 0.9284402\n","Weights - 0.26 : 0.74 ; RMSE = 0.9278280\n","Weights - 0.27 : 0.73 ; RMSE = 0.9272253\n","Weights - 0.28 : 0.72 ; RMSE = 0.9266318\n","Weights - 0.29 : 0.71 ; RMSE = 0.9260478\n","Weights - 0.30 : 0.70 ; RMSE = 0.9254731\n","Weights - 0.31 : 0.69 ; RMSE = 0.9249079\n","Weights - 0.32 : 0.68 ; RMSE = 0.9243520\n","Weights - 0.33 : 0.67 ; RMSE = 0.9238056\n","Weights - 0.34 : 0.66 ; RMSE = 0.9232687\n","Weights - 0.35 : 0.65 ; RMSE = 0.9227413\n","Weights - 0.36 : 0.64 ; RMSE = 0.9222233\n","Weights - 0.37 : 0.63 ; RMSE = 0.9217148\n","Weights - 0.38 : 0.62 ; RMSE = 0.9212159\n","Weights - 0.39 : 0.61 ; RMSE = 0.9207265\n","Weights - 0.40 : 0.60 ; RMSE = 0.9202467\n","Weights - 0.41 : 0.59 ; RMSE = 0.9197764\n","Weights - 0.42 : 0.58 ; RMSE = 0.9193157\n","Weights - 0.43 : 0.57 ; RMSE = 0.9188647\n","Weights - 0.44 : 0.56 ; RMSE = 0.9184232\n","Weights - 0.45 : 0.55 ; RMSE = 0.9179914\n","Weights - 0.46 : 0.54 ; RMSE = 0.9175692\n","Weights - 0.47 : 0.53 ; RMSE = 0.9171566\n","Weights - 0.48 : 0.52 ; RMSE = 0.9167537\n","Weights - 0.49 : 0.51 ; RMSE = 0.9163605\n","Weights - 0.50 : 0.50 ; RMSE = 0.9159770\n","Weights - 0.51 : 0.49 ; RMSE = 0.9156032\n","Weights - 0.52 : 0.48 ; RMSE = 0.9152391\n","Weights - 0.53 : 0.47 ; RMSE = 0.9148848\n","Weights - 0.54 : 0.46 ; RMSE = 0.9145401\n","Weights - 0.55 : 0.45 ; RMSE = 0.9142053\n","Weights - 0.56 : 0.44 ; RMSE = 0.9138802\n","Weights - 0.57 : 0.43 ; RMSE = 0.9135648\n","Weights - 0.58 : 0.42 ; RMSE = 0.9132593\n","Weights - 0.59 : 0.41 ; RMSE = 0.9129635\n","Weights - 0.60 : 0.40 ; RMSE = 0.9126775\n","Weights - 0.61 : 0.39 ; RMSE = 0.9124014\n","Weights - 0.62 : 0.38 ; RMSE = 0.9121350\n","Weights - 0.63 : 0.37 ; RMSE = 0.9118785\n","Weights - 0.64 : 0.36 ; RMSE = 0.9116318\n","Weights - 0.65 : 0.35 ; RMSE = 0.9113950\n","Weights - 0.66 : 0.34 ; RMSE = 0.9111680\n","Weights - 0.67 : 0.33 ; RMSE = 0.9109509\n","Weights - 0.68 : 0.32 ; RMSE = 0.9107436\n","Weights - 0.69 : 0.31 ; RMSE = 0.9105463\n","Weights - 0.70 : 0.30 ; RMSE = 0.9103588\n","Weights - 0.71 : 0.29 ; RMSE = 0.9101811\n","Weights - 0.72 : 0.28 ; RMSE = 0.9100134\n","Weights - 0.73 : 0.27 ; RMSE = 0.9098556\n","Weights - 0.74 : 0.26 ; RMSE = 0.9097076\n","Weights - 0.75 : 0.25 ; RMSE = 0.9095696\n","Weights - 0.76 : 0.24 ; RMSE = 0.9094415\n","Weights - 0.77 : 0.23 ; RMSE = 0.9093233\n","Weights - 0.78 : 0.22 ; RMSE = 0.9092150\n","Weights - 0.79 : 0.21 ; RMSE = 0.9091166\n","Weights - 0.80 : 0.20 ; RMSE = 0.9090282\n","Weights - 0.81 : 0.19 ; RMSE = 0.9089497\n","Weights - 0.82 : 0.18 ; RMSE = 0.9088811\n","Weights - 0.83 : 0.17 ; RMSE = 0.9088225\n","Weights - 0.84 : 0.16 ; RMSE = 0.9087738\n","Weights - 0.85 : 0.15 ; RMSE = 0.9087350\n","Weights - 0.86 : 0.14 ; RMSE = 0.9087062\n","Weights - 0.87 : 0.13 ; RMSE = 0.9086873\n","Weights - 0.88 : 0.12 ; RMSE = 0.9086783\n","Weights - 0.89 : 0.11 ; RMSE = 0.9086793\n","Weights - 0.90 : 0.10 ; RMSE = 0.9086902\n","Weights - 0.91 : 0.09 ; RMSE = 0.9087111\n","Weights - 0.92 : 0.08 ; RMSE = 0.9087419\n","Weights - 0.93 : 0.07 ; RMSE = 0.9087827\n","Weights - 0.94 : 0.06 ; RMSE = 0.9088333\n","Weights - 0.95 : 0.05 ; RMSE = 0.9088940\n","Weights - 0.96 : 0.04 ; RMSE = 0.9089645\n","Weights - 0.97 : 0.03 ; RMSE = 0.9090450\n","Weights - 0.98 : 0.02 ; RMSE = 0.9091354\n","Weights - 0.99 : 0.01 ; RMSE = 0.9092357\n"]}]},{"cell_type":"code","source":[""],"metadata":{"id":"740Eb3Nnt6KK"},"execution_count":null,"outputs":[]}]}