kopia lustrzana https://github.com/animator/learn-python
Merge 6980eb7f9f
into 406004d9c9
commit
4546ddaa72
|
@ -0,0 +1 @@
|
|||
Data sets that are used in making this Movie Recommended System -: https://drive.google.com/drive/folders/1cHQFeg3lRb_5c8qIy2xiL-HlWqlKNsuf?usp=drive_link
|
|
@ -0,0 +1,952 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "6beaa6b9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"movies = pd.read_csv(\"movies.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "ac5fe1eb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>movieId</th>\n",
|
||||
" <th>title</th>\n",
|
||||
" <th>genres</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Jumanji (1995)</td>\n",
|
||||
" <td>Adventure|Children|Fantasy</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Grumpier Old Men (1995)</td>\n",
|
||||
" <td>Comedy|Romance</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>Waiting to Exhale (1995)</td>\n",
|
||||
" <td>Comedy|Drama|Romance</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>Father of the Bride Part II (1995)</td>\n",
|
||||
" <td>Comedy</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" movieId title \\\n",
|
||||
"0 1 Toy Story (1995) \n",
|
||||
"1 2 Jumanji (1995) \n",
|
||||
"2 3 Grumpier Old Men (1995) \n",
|
||||
"3 4 Waiting to Exhale (1995) \n",
|
||||
"4 5 Father of the Bride Part II (1995) \n",
|
||||
"\n",
|
||||
" genres \n",
|
||||
"0 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||||
"1 Adventure|Children|Fantasy \n",
|
||||
"2 Comedy|Romance \n",
|
||||
"3 Comedy|Drama|Romance \n",
|
||||
"4 Comedy "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"movies.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b6b1ec00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def clean_title(title):\n",
|
||||
" title = re.sub(\"[^a-zA-Z0-9 ]\", \"\", title)\n",
|
||||
" return title"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0fd4cf17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"movies[\"clean_title\"] = movies[\"title\"].apply(clean_title)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e69c38af",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>movieId</th>\n",
|
||||
" <th>title</th>\n",
|
||||
" <th>genres</th>\n",
|
||||
" <th>clean_title</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||||
" <td>Toy Story 1995</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Jumanji (1995)</td>\n",
|
||||
" <td>Adventure|Children|Fantasy</td>\n",
|
||||
" <td>Jumanji 1995</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Grumpier Old Men (1995)</td>\n",
|
||||
" <td>Comedy|Romance</td>\n",
|
||||
" <td>Grumpier Old Men 1995</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>Waiting to Exhale (1995)</td>\n",
|
||||
" <td>Comedy|Drama|Romance</td>\n",
|
||||
" <td>Waiting to Exhale 1995</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>Father of the Bride Part II (1995)</td>\n",
|
||||
" <td>Comedy</td>\n",
|
||||
" <td>Father of the Bride Part II 1995</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>62418</th>\n",
|
||||
" <td>209157</td>\n",
|
||||
" <td>We (2018)</td>\n",
|
||||
" <td>Drama</td>\n",
|
||||
" <td>We 2018</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>62419</th>\n",
|
||||
" <td>209159</td>\n",
|
||||
" <td>Window of the Soul (2001)</td>\n",
|
||||
" <td>Documentary</td>\n",
|
||||
" <td>Window of the Soul 2001</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>62420</th>\n",
|
||||
" <td>209163</td>\n",
|
||||
" <td>Bad Poems (2018)</td>\n",
|
||||
" <td>Comedy|Drama</td>\n",
|
||||
" <td>Bad Poems 2018</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>62421</th>\n",
|
||||
" <td>209169</td>\n",
|
||||
" <td>A Girl Thing (2001)</td>\n",
|
||||
" <td>(no genres listed)</td>\n",
|
||||
" <td>A Girl Thing 2001</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>62422</th>\n",
|
||||
" <td>209171</td>\n",
|
||||
" <td>Women of Devil's Island (1962)</td>\n",
|
||||
" <td>Action|Adventure|Drama</td>\n",
|
||||
" <td>Women of Devils Island 1962</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>62423 rows × 4 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" movieId title \\\n",
|
||||
"0 1 Toy Story (1995) \n",
|
||||
"1 2 Jumanji (1995) \n",
|
||||
"2 3 Grumpier Old Men (1995) \n",
|
||||
"3 4 Waiting to Exhale (1995) \n",
|
||||
"4 5 Father of the Bride Part II (1995) \n",
|
||||
"... ... ... \n",
|
||||
"62418 209157 We (2018) \n",
|
||||
"62419 209159 Window of the Soul (2001) \n",
|
||||
"62420 209163 Bad Poems (2018) \n",
|
||||
"62421 209169 A Girl Thing (2001) \n",
|
||||
"62422 209171 Women of Devil's Island (1962) \n",
|
||||
"\n",
|
||||
" genres \\\n",
|
||||
"0 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||||
"1 Adventure|Children|Fantasy \n",
|
||||
"2 Comedy|Romance \n",
|
||||
"3 Comedy|Drama|Romance \n",
|
||||
"4 Comedy \n",
|
||||
"... ... \n",
|
||||
"62418 Drama \n",
|
||||
"62419 Documentary \n",
|
||||
"62420 Comedy|Drama \n",
|
||||
"62421 (no genres listed) \n",
|
||||
"62422 Action|Adventure|Drama \n",
|
||||
"\n",
|
||||
" clean_title \n",
|
||||
"0 Toy Story 1995 \n",
|
||||
"1 Jumanji 1995 \n",
|
||||
"2 Grumpier Old Men 1995 \n",
|
||||
"3 Waiting to Exhale 1995 \n",
|
||||
"4 Father of the Bride Part II 1995 \n",
|
||||
"... ... \n",
|
||||
"62418 We 2018 \n",
|
||||
"62419 Window of the Soul 2001 \n",
|
||||
"62420 Bad Poems 2018 \n",
|
||||
"62421 A Girl Thing 2001 \n",
|
||||
"62422 Women of Devils Island 1962 \n",
|
||||
"\n",
|
||||
"[62423 rows x 4 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"movies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0b2a4752",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"vectorizer = TfidfVectorizer(ngram_range=(1,2))\n",
|
||||
"\n",
|
||||
"tfidf = vectorizer.fit_transform(movies[\"clean_title\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "948d38ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"def search(title):\n",
|
||||
" title = clean_title(title)\n",
|
||||
" query_vec = vectorizer.transform([title])\n",
|
||||
" similarity = cosine_similarity(query_vec, tfidf).flatten()\n",
|
||||
" indices = np.argpartition(similarity, -5)[-5:]\n",
|
||||
" results = movies.iloc[indices].iloc[::-1]\n",
|
||||
" \n",
|
||||
" return results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "20445f70",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "2ae4cdc198c74ae18922a2bc9fdcd1b8",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Text(value='Toy Story', description='Movie Title:')"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "da25b587a1c84ea487fab47ba876eeb5",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Output()"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import ipywidgets as widgets\n",
|
||||
"from IPython.display import display\n",
|
||||
"\n",
|
||||
"movie_input = widgets.Text(\n",
|
||||
" value='Enter Here: ',\n",
|
||||
" description='Movie Title:',\n",
|
||||
" disabled=False\n",
|
||||
")\n",
|
||||
"movie_list = widgets.Output()\n",
|
||||
"\n",
|
||||
"def on_type(data):\n",
|
||||
" with movie_list:\n",
|
||||
" movie_list.clear_output()\n",
|
||||
" title = data[\"new\"]\n",
|
||||
" if len(title) > 5:\n",
|
||||
" display(search(title))\n",
|
||||
"\n",
|
||||
"movie_input.observe(on_type, names='value')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"display(movie_input, movie_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "2901e869",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"movie_id = 89745\n",
|
||||
"\n",
|
||||
"#def find_similar_movies(movie_id):\n",
|
||||
"movie = movies[movies[\"movieId\"] == movie_id]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "7b8d1efa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ratings = pd.read_csv(\"ratings.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "50526290",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"userId int64\n",
|
||||
"movieId int64\n",
|
||||
"rating float64\n",
|
||||
"timestamp int64\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ratings.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "b27d0ba0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"similar_users = ratings[(ratings[\"movieId\"] == movie_id) & (ratings[\"rating\"] > 4)][\"userId\"].unique()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "7c13c44b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"similar_user_recs = ratings[(ratings[\"userId\"].isin(similar_users)) & (ratings[\"rating\"] > 4)][\"movieId\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "165057e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"similar_user_recs = similar_user_recs.value_counts() / len(similar_users)\n",
|
||||
"\n",
|
||||
"similar_user_recs = similar_user_recs[similar_user_recs > .10]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "b603f47e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"all_users = ratings[(ratings[\"movieId\"].isin(similar_user_recs.index)) & (ratings[\"rating\"] > 4)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "1765ebef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"all_user_recs = all_users[\"movieId\"].value_counts() / len(all_users[\"userId\"].unique())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "65b35c3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)\n",
|
||||
"rec_percentages.columns = [\"similar\", \"all\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "3dcca5cd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>similar</th>\n",
|
||||
" <th>all</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>89745</th>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>0.040459</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>58559</th>\n",
|
||||
" <td>0.573393</td>\n",
|
||||
" <td>0.148256</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>59315</th>\n",
|
||||
" <td>0.530649</td>\n",
|
||||
" <td>0.054931</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>79132</th>\n",
|
||||
" <td>0.519715</td>\n",
|
||||
" <td>0.132987</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2571</th>\n",
|
||||
" <td>0.496687</td>\n",
|
||||
" <td>0.247010</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>47610</th>\n",
|
||||
" <td>0.103545</td>\n",
|
||||
" <td>0.022770</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>780</th>\n",
|
||||
" <td>0.103380</td>\n",
|
||||
" <td>0.054723</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>88744</th>\n",
|
||||
" <td>0.103048</td>\n",
|
||||
" <td>0.010383</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1258</th>\n",
|
||||
" <td>0.101226</td>\n",
|
||||
" <td>0.083887</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1193</th>\n",
|
||||
" <td>0.100895</td>\n",
|
||||
" <td>0.120244</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>193 rows × 2 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" similar all\n",
|
||||
"89745 1.000000 0.040459\n",
|
||||
"58559 0.573393 0.148256\n",
|
||||
"59315 0.530649 0.054931\n",
|
||||
"79132 0.519715 0.132987\n",
|
||||
"2571 0.496687 0.247010\n",
|
||||
"... ... ...\n",
|
||||
"47610 0.103545 0.022770\n",
|
||||
"780 0.103380 0.054723\n",
|
||||
"88744 0.103048 0.010383\n",
|
||||
"1258 0.101226 0.083887\n",
|
||||
"1193 0.100895 0.120244\n",
|
||||
"\n",
|
||||
"[193 rows x 2 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"rec_percentages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "b84c2747",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rec_percentages[\"score\"] = rec_percentages[\"similar\"] / rec_percentages[\"all\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "f2debdc7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rec_percentages = rec_percentages.sort_values(\"score\", ascending=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "ac76b826",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>similar</th>\n",
|
||||
" <th>all</th>\n",
|
||||
" <th>score</th>\n",
|
||||
" <th>movieId</th>\n",
|
||||
" <th>title</th>\n",
|
||||
" <th>genres</th>\n",
|
||||
" <th>clean_title</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>17067</th>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>0.040459</td>\n",
|
||||
" <td>24.716368</td>\n",
|
||||
" <td>89745</td>\n",
|
||||
" <td>Avengers, The (2012)</td>\n",
|
||||
" <td>Action|Adventure|Sci-Fi|IMAX</td>\n",
|
||||
" <td>Avengers The 2012</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>20513</th>\n",
|
||||
" <td>0.103711</td>\n",
|
||||
" <td>0.005289</td>\n",
|
||||
" <td>19.610199</td>\n",
|
||||
" <td>106072</td>\n",
|
||||
" <td>Thor: The Dark World (2013)</td>\n",
|
||||
" <td>Action|Adventure|Fantasy|IMAX</td>\n",
|
||||
" <td>Thor The Dark World 2013</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25058</th>\n",
|
||||
" <td>0.241054</td>\n",
|
||||
" <td>0.012367</td>\n",
|
||||
" <td>19.491770</td>\n",
|
||||
" <td>122892</td>\n",
|
||||
" <td>Avengers: Age of Ultron (2015)</td>\n",
|
||||
" <td>Action|Adventure|Sci-Fi</td>\n",
|
||||
" <td>Avengers Age of Ultron 2015</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>19678</th>\n",
|
||||
" <td>0.216534</td>\n",
|
||||
" <td>0.012119</td>\n",
|
||||
" <td>17.867419</td>\n",
|
||||
" <td>102125</td>\n",
|
||||
" <td>Iron Man 3 (2013)</td>\n",
|
||||
" <td>Action|Sci-Fi|Thriller|IMAX</td>\n",
|
||||
" <td>Iron Man 3 2013</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>16725</th>\n",
|
||||
" <td>0.215043</td>\n",
|
||||
" <td>0.012052</td>\n",
|
||||
" <td>17.843074</td>\n",
|
||||
" <td>88140</td>\n",
|
||||
" <td>Captain America: The First Avenger (2011)</td>\n",
|
||||
" <td>Action|Adventure|Sci-Fi|Thriller|War</td>\n",
|
||||
" <td>Captain America The First Avenger 2011</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>16312</th>\n",
|
||||
" <td>0.175447</td>\n",
|
||||
" <td>0.010142</td>\n",
|
||||
" <td>17.299824</td>\n",
|
||||
" <td>86332</td>\n",
|
||||
" <td>Thor (2011)</td>\n",
|
||||
" <td>Action|Adventure|Drama|Fantasy|IMAX</td>\n",
|
||||
" <td>Thor 2011</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>21348</th>\n",
|
||||
" <td>0.287608</td>\n",
|
||||
" <td>0.016737</td>\n",
|
||||
" <td>17.183667</td>\n",
|
||||
" <td>110102</td>\n",
|
||||
" <td>Captain America: The Winter Soldier (2014)</td>\n",
|
||||
" <td>Action|Adventure|Sci-Fi|IMAX</td>\n",
|
||||
" <td>Captain America The Winter Soldier 2014</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25071</th>\n",
|
||||
" <td>0.214049</td>\n",
|
||||
" <td>0.012856</td>\n",
|
||||
" <td>16.649399</td>\n",
|
||||
" <td>122920</td>\n",
|
||||
" <td>Captain America: Civil War (2016)</td>\n",
|
||||
" <td>Action|Sci-Fi|Thriller</td>\n",
|
||||
" <td>Captain America Civil War 2016</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25061</th>\n",
|
||||
" <td>0.136017</td>\n",
|
||||
" <td>0.008573</td>\n",
|
||||
" <td>15.865628</td>\n",
|
||||
" <td>122900</td>\n",
|
||||
" <td>Ant-Man (2015)</td>\n",
|
||||
" <td>Action|Adventure|Sci-Fi</td>\n",
|
||||
" <td>AntMan 2015</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14628</th>\n",
|
||||
" <td>0.242876</td>\n",
|
||||
" <td>0.015517</td>\n",
|
||||
" <td>15.651921</td>\n",
|
||||
" <td>77561</td>\n",
|
||||
" <td>Iron Man 2 (2010)</td>\n",
|
||||
" <td>Action|Adventure|Sci-Fi|Thriller|IMAX</td>\n",
|
||||
" <td>Iron Man 2 2010</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" similar all score movieId \\\n",
|
||||
"17067 1.000000 0.040459 24.716368 89745 \n",
|
||||
"20513 0.103711 0.005289 19.610199 106072 \n",
|
||||
"25058 0.241054 0.012367 19.491770 122892 \n",
|
||||
"19678 0.216534 0.012119 17.867419 102125 \n",
|
||||
"16725 0.215043 0.012052 17.843074 88140 \n",
|
||||
"16312 0.175447 0.010142 17.299824 86332 \n",
|
||||
"21348 0.287608 0.016737 17.183667 110102 \n",
|
||||
"25071 0.214049 0.012856 16.649399 122920 \n",
|
||||
"25061 0.136017 0.008573 15.865628 122900 \n",
|
||||
"14628 0.242876 0.015517 15.651921 77561 \n",
|
||||
"\n",
|
||||
" title \\\n",
|
||||
"17067 Avengers, The (2012) \n",
|
||||
"20513 Thor: The Dark World (2013) \n",
|
||||
"25058 Avengers: Age of Ultron (2015) \n",
|
||||
"19678 Iron Man 3 (2013) \n",
|
||||
"16725 Captain America: The First Avenger (2011) \n",
|
||||
"16312 Thor (2011) \n",
|
||||
"21348 Captain America: The Winter Soldier (2014) \n",
|
||||
"25071 Captain America: Civil War (2016) \n",
|
||||
"25061 Ant-Man (2015) \n",
|
||||
"14628 Iron Man 2 (2010) \n",
|
||||
"\n",
|
||||
" genres \\\n",
|
||||
"17067 Action|Adventure|Sci-Fi|IMAX \n",
|
||||
"20513 Action|Adventure|Fantasy|IMAX \n",
|
||||
"25058 Action|Adventure|Sci-Fi \n",
|
||||
"19678 Action|Sci-Fi|Thriller|IMAX \n",
|
||||
"16725 Action|Adventure|Sci-Fi|Thriller|War \n",
|
||||
"16312 Action|Adventure|Drama|Fantasy|IMAX \n",
|
||||
"21348 Action|Adventure|Sci-Fi|IMAX \n",
|
||||
"25071 Action|Sci-Fi|Thriller \n",
|
||||
"25061 Action|Adventure|Sci-Fi \n",
|
||||
"14628 Action|Adventure|Sci-Fi|Thriller|IMAX \n",
|
||||
"\n",
|
||||
" clean_title \n",
|
||||
"17067 Avengers The 2012 \n",
|
||||
"20513 Thor The Dark World 2013 \n",
|
||||
"25058 Avengers Age of Ultron 2015 \n",
|
||||
"19678 Iron Man 3 2013 \n",
|
||||
"16725 Captain America The First Avenger 2011 \n",
|
||||
"16312 Thor 2011 \n",
|
||||
"21348 Captain America The Winter Soldier 2014 \n",
|
||||
"25071 Captain America Civil War 2016 \n",
|
||||
"25061 AntMan 2015 \n",
|
||||
"14628 Iron Man 2 2010 "
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"rec_percentages.head(10).merge(movies, left_index=True, right_on=\"movieId\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "479b52da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def find_similar_movies(movie_id):\n",
|
||||
" similar_users = ratings[(ratings[\"movieId\"] == movie_id) & (ratings[\"rating\"] > 4)][\"userId\"].unique()\n",
|
||||
" similar_user_recs = ratings[(ratings[\"userId\"].isin(similar_users)) & (ratings[\"rating\"] > 4)][\"movieId\"]\n",
|
||||
" similar_user_recs = similar_user_recs.value_counts() / len(similar_users)\n",
|
||||
"\n",
|
||||
" similar_user_recs = similar_user_recs[similar_user_recs > .10]\n",
|
||||
" all_users = ratings[(ratings[\"movieId\"].isin(similar_user_recs.index)) & (ratings[\"rating\"] > 4)]\n",
|
||||
" all_user_recs = all_users[\"movieId\"].value_counts() / len(all_users[\"userId\"].unique())\n",
|
||||
" rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)\n",
|
||||
" rec_percentages.columns = [\"similar\", \"all\"]\n",
|
||||
" \n",
|
||||
" rec_percentages[\"score\"] = rec_percentages[\"similar\"] / rec_percentages[\"all\"]\n",
|
||||
" rec_percentages = rec_percentages.sort_values(\"score\", ascending=False)\n",
|
||||
" return rec_percentages.head(10).merge(movies, left_index=True, right_on=\"movieId\")[[\"score\", \"title\", \"genres\"]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "c54c6d1c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "86f82703a3894fa099d6908444af30d6",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Text(value='Enter Here: ', description='Movie Title:')"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "402eff788a954f6e9a9f667d48798db0",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Output()"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import ipywidgets as widgets\n",
|
||||
"from IPython.display import display\n",
|
||||
"\n",
|
||||
"movie_name_input = widgets.Text(\n",
|
||||
" value='Enter Here: ',\n",
|
||||
" description='Movie Title:',\n",
|
||||
" disabled=False\n",
|
||||
")\n",
|
||||
"recommendation_list = widgets.Output()\n",
|
||||
"\n",
|
||||
"def on_type(data):\n",
|
||||
" with recommendation_list:\n",
|
||||
" recommendation_list.clear_output()\n",
|
||||
" title = data[\"new\"]\n",
|
||||
" if len(title) > 5:\n",
|
||||
" results = search(title)\n",
|
||||
" movie_id = results.iloc[0][\"movieId\"]\n",
|
||||
" display(find_similar_movies(movie_id))\n",
|
||||
"\n",
|
||||
"movie_name_input.observe(on_type, names='value')\n",
|
||||
"\n",
|
||||
"display(movie_name_input, recommendation_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7f3e01c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -1,3 +1,147 @@
|
|||
# List of sections
|
||||
# Movie Recommended System Using ML in Python
|
||||
|
||||
- [Section title](filename.md)
|
||||
To provide a structured breakdown of the concepts used in writing a movie recommendation system, we'll assume a typical example where the system involves data processing, model training, and prediction steps. Here is a comprehensive division of the concepts, assuming a basic content outline for a movie recommendation system:
|
||||
|
||||
## 1. Data Collection and Preprocessing
|
||||
### Importing Libraries
|
||||
- **Purpose:** Use libraries to handle data manipulation, model building, and evaluation.
|
||||
- **Example:**
|
||||
```python
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
```
|
||||
|
||||
### Loading Data
|
||||
- **Purpose:** Load datasets containing movie ratings, movie details, and user information.
|
||||
- **Example:**
|
||||
```python
|
||||
ratings = pd.read_csv('ratings.csv')
|
||||
movies = pd.read_csv('movies.csv')
|
||||
```
|
||||
|
||||
### Data Cleaning
|
||||
- **Purpose:** Handle missing values, duplicates, and inconsistent data.
|
||||
- **Example:**
|
||||
```python
|
||||
ratings.dropna(inplace=True)
|
||||
```
|
||||
|
||||
### Data Merging
|
||||
- **Purpose:** Combine multiple datasets for comprehensive analysis.
|
||||
- **Example:**
|
||||
```python
|
||||
data = pd.merge(ratings, movies, on='movieId')
|
||||
```
|
||||
|
||||
### Exploratory Data Analysis (EDA)
|
||||
- **Purpose:** Gain insights into data through visualizations and statistical analysis.
|
||||
- **Example:**
|
||||
```python
|
||||
import matplotlib.pyplot as plt
|
||||
data['rating'].hist()
|
||||
```
|
||||
|
||||
## 2. Feature Engineering
|
||||
### Encoding Categorical Variables
|
||||
- **Purpose:** Convert non-numeric data into a numeric format suitable for model input.
|
||||
- **Example:**
|
||||
```python
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
le = LabelEncoder()
|
||||
data['movieId'] = le.fit_transform(data['movieId'])
|
||||
data['userId'] = le.fit_transform(data['userId'])
|
||||
```
|
||||
|
||||
### Creating User-Item Matrix
|
||||
- **Purpose:** Create a matrix where rows represent users and columns represent movies, with ratings as values.
|
||||
- **Example:**
|
||||
```python
|
||||
user_item_matrix = data.pivot(index='userId', columns='movieId', values='rating')
|
||||
```
|
||||
|
||||
## 3. Model Building
|
||||
### Choosing a Model
|
||||
- **Common Models:** Collaborative Filtering (User-Based, Item-Based), Matrix Factorization (SVD).
|
||||
- **Example:**
|
||||
```python
|
||||
from sklearn.decomposition import TruncatedSVD
|
||||
```
|
||||
|
||||
### Model Training
|
||||
- **Purpose:** Train the chosen model on the dataset.
|
||||
- **Example:**
|
||||
```python
|
||||
svd = TruncatedSVD(n_components=50)
|
||||
matrix = user_item_matrix.fillna(0)
|
||||
svd.fit(matrix)
|
||||
```
|
||||
|
||||
## 4. Making Predictions
|
||||
### Generating Recommendations
|
||||
- **Purpose:** Use the trained model to predict ratings and recommend movies.
|
||||
- **Example:**
|
||||
```python
|
||||
user_ratings = svd.transform(matrix)
|
||||
predicted_ratings = svd.inverse_transform(user_ratings)
|
||||
```
|
||||
|
||||
### Selecting Top Recommendations
|
||||
- **Purpose:** Identify and rank the top movies for each user.
|
||||
- **Example:**
|
||||
```python
|
||||
def recommend_movies(user_id, num_recommendations):
|
||||
user_index = user_id - 1 # assuming user_id starts from 1
|
||||
sorted_indices = np.argsort(predicted_ratings[user_index])[::-1]
|
||||
top_movies = sorted_indices[:num_recommendations]
|
||||
return top_movies
|
||||
```
|
||||
|
||||
## 5. Model Evaluation
|
||||
### Splitting Data
|
||||
- **Purpose:** Split the data into training and testing sets to evaluate model performance.
|
||||
- **Example:**
|
||||
```python
|
||||
train_data, test_data = train_test_split(data, test_size=0.2)
|
||||
```
|
||||
|
||||
### Evaluation Metrics
|
||||
- **Common Metrics:** RMSE (Root Mean Squared Error), MAE (Mean Absolute Error).
|
||||
- **Example:**
|
||||
```python
|
||||
def calculate_rmse(true_ratings, predicted_ratings):
|
||||
return np.sqrt(mean_squared_error(true_ratings, predicted_ratings))
|
||||
```
|
||||
|
||||
## 6. Deployment
|
||||
### Saving the Model
|
||||
- **Purpose:** Save the trained model for future use.
|
||||
- **Example:**
|
||||
```python
|
||||
import joblib
|
||||
joblib.dump(svd, 'movie_recommendation_model.pkl')
|
||||
```
|
||||
|
||||
### Loading the Model
|
||||
- **Purpose:** Load the saved model to make predictions.
|
||||
- **Example:**
|
||||
```python
|
||||
model = joblib.load('movie_recommendation_model.pkl')
|
||||
```
|
||||
|
||||
### Creating an Interface
|
||||
- **Purpose:** Build a user interface to interact with the recommendation system (e.g., web app).
|
||||
- **Example:** Using Flask for a web application.
|
||||
```python
|
||||
from flask import Flask, request, render_template
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/recommend', methods=['POST'])
|
||||
def recommend():
|
||||
user_id = request.form['user_id']
|
||||
recommendations = recommend_movies(user_id, 10)
|
||||
return render_template('recommendations.html', movies=recommendations)
|
||||
```
|
||||
|
||||
This breakdown provides a comprehensive guide to the various concepts and steps involved in building a movie recommendation system, from data preprocessing to deployment.
|
||||
|
|
Ładowanie…
Reference in New Issue