Merge 6980eb7f9f into 406004d9c9

2024-05-17 17:45:22 +00:00 · 2024-05-17 17:45:22 +00:00 · 4546ddaa72
commit 4546ddaa72
--- a/contrib/machine-learning/DataSets.txt
+++ b/contrib/machine-learning/DataSets.txt
@ -0,0 +1 @@
+Data sets that are used in making this Movie Recommended System -: https://drive.google.com/drive/folders/1cHQFeg3lRb_5c8qIy2xiL-HlWqlKNsuf?usp=drive_link
--- a/contrib/machine-learning/Movie
+++ b/contrib/machine-learning/Movie
@ -0,0 +1,952 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6beaa6b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "movies = pd.read_csv(\"movies.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ac5fe1eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>movieId</th>\n",
+       "      <th>title</th>\n",
+       "      <th>genres</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Jumanji (1995)</td>\n",
+       "      <td>Adventure|Children|Fantasy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Grumpier Old Men (1995)</td>\n",
+       "      <td>Comedy|Romance</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Waiting to Exhale (1995)</td>\n",
+       "      <td>Comedy|Drama|Romance</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>Father of the Bride Part II (1995)</td>\n",
+       "      <td>Comedy</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   movieId                               title  \\\n",
+       "0        1                    Toy Story (1995)   \n",
+       "1        2                      Jumanji (1995)   \n",
+       "2        3             Grumpier Old Men (1995)   \n",
+       "3        4            Waiting to Exhale (1995)   \n",
+       "4        5  Father of the Bride Part II (1995)   \n",
+       "\n",
+       "                                        genres  \n",
+       "0  Adventure|Animation|Children|Comedy|Fantasy  \n",
+       "1                   Adventure|Children|Fantasy  \n",
+       "2                               Comedy|Romance  \n",
+       "3                         Comedy|Drama|Romance  \n",
+       "4                                       Comedy  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "movies.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b6b1ec00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "def clean_title(title):\n",
+    "    title = re.sub(\"[^a-zA-Z0-9 ]\", \"\", title)\n",
+    "    return title"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0fd4cf17",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "movies[\"clean_title\"] = movies[\"title\"].apply(clean_title)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e69c38af",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>movieId</th>\n",
+       "      <th>title</th>\n",
+       "      <th>genres</th>\n",
+       "      <th>clean_title</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
+       "      <td>Toy Story 1995</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Jumanji (1995)</td>\n",
+       "      <td>Adventure|Children|Fantasy</td>\n",
+       "      <td>Jumanji 1995</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Grumpier Old Men (1995)</td>\n",
+       "      <td>Comedy|Romance</td>\n",
+       "      <td>Grumpier Old Men 1995</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Waiting to Exhale (1995)</td>\n",
+       "      <td>Comedy|Drama|Romance</td>\n",
+       "      <td>Waiting to Exhale 1995</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>Father of the Bride Part II (1995)</td>\n",
+       "      <td>Comedy</td>\n",
+       "      <td>Father of the Bride Part II 1995</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62418</th>\n",
+       "      <td>209157</td>\n",
+       "      <td>We (2018)</td>\n",
+       "      <td>Drama</td>\n",
+       "      <td>We 2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62419</th>\n",
+       "      <td>209159</td>\n",
+       "      <td>Window of the Soul (2001)</td>\n",
+       "      <td>Documentary</td>\n",
+       "      <td>Window of the Soul 2001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62420</th>\n",
+       "      <td>209163</td>\n",
+       "      <td>Bad Poems (2018)</td>\n",
+       "      <td>Comedy|Drama</td>\n",
+       "      <td>Bad Poems 2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62421</th>\n",
+       "      <td>209169</td>\n",
+       "      <td>A Girl Thing (2001)</td>\n",
+       "      <td>(no genres listed)</td>\n",
+       "      <td>A Girl Thing 2001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62422</th>\n",
+       "      <td>209171</td>\n",
+       "      <td>Women of Devil's Island (1962)</td>\n",
+       "      <td>Action|Adventure|Drama</td>\n",
+       "      <td>Women of Devils Island 1962</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>62423 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       movieId                               title  \\\n",
+       "0            1                    Toy Story (1995)   \n",
+       "1            2                      Jumanji (1995)   \n",
+       "2            3             Grumpier Old Men (1995)   \n",
+       "3            4            Waiting to Exhale (1995)   \n",
+       "4            5  Father of the Bride Part II (1995)   \n",
+       "...        ...                                 ...   \n",
+       "62418   209157                           We (2018)   \n",
+       "62419   209159           Window of the Soul (2001)   \n",
+       "62420   209163                    Bad Poems (2018)   \n",
+       "62421   209169                 A Girl Thing (2001)   \n",
+       "62422   209171      Women of Devil's Island (1962)   \n",
+       "\n",
+       "                                            genres  \\\n",
+       "0      Adventure|Animation|Children|Comedy|Fantasy   \n",
+       "1                       Adventure|Children|Fantasy   \n",
+       "2                                   Comedy|Romance   \n",
+       "3                             Comedy|Drama|Romance   \n",
+       "4                                           Comedy   \n",
+       "...                                            ...   \n",
+       "62418                                        Drama   \n",
+       "62419                                  Documentary   \n",
+       "62420                                 Comedy|Drama   \n",
+       "62421                           (no genres listed)   \n",
+       "62422                       Action|Adventure|Drama   \n",
+       "\n",
+       "                            clean_title  \n",
+       "0                        Toy Story 1995  \n",
+       "1                          Jumanji 1995  \n",
+       "2                 Grumpier Old Men 1995  \n",
+       "3                Waiting to Exhale 1995  \n",
+       "4      Father of the Bride Part II 1995  \n",
+       "...                                 ...  \n",
+       "62418                           We 2018  \n",
+       "62419           Window of the Soul 2001  \n",
+       "62420                    Bad Poems 2018  \n",
+       "62421                 A Girl Thing 2001  \n",
+       "62422       Women of Devils Island 1962  \n",
+       "\n",
+       "[62423 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "movies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0b2a4752",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "vectorizer = TfidfVectorizer(ngram_range=(1,2))\n",
+    "\n",
+    "tfidf = vectorizer.fit_transform(movies[\"clean_title\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "948d38ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics.pairwise import cosine_similarity\n",
+    "import numpy as np\n",
+    "\n",
+    "def search(title):\n",
+    "    title = clean_title(title)\n",
+    "    query_vec = vectorizer.transform([title])\n",
+    "    similarity = cosine_similarity(query_vec, tfidf).flatten()\n",
+    "    indices = np.argpartition(similarity, -5)[-5:]\n",
+    "    results = movies.iloc[indices].iloc[::-1]\n",
+    "    \n",
+    "    return results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "20445f70",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2ae4cdc198c74ae18922a2bc9fdcd1b8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Text(value='Toy Story', description='Movie Title:')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "da25b587a1c84ea487fab47ba876eeb5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import ipywidgets as widgets\n",
+    "from IPython.display import display\n",
+    "\n",
+    "movie_input = widgets.Text(\n",
+    "    value='Enter Here: ',\n",
+    "    description='Movie Title:',\n",
+    "    disabled=False\n",
+    ")\n",
+    "movie_list = widgets.Output()\n",
+    "\n",
+    "def on_type(data):\n",
+    "    with movie_list:\n",
+    "        movie_list.clear_output()\n",
+    "        title = data[\"new\"]\n",
+    "        if len(title) > 5:\n",
+    "            display(search(title))\n",
+    "\n",
+    "movie_input.observe(on_type, names='value')\n",
+    "\n",
+    "\n",
+    "display(movie_input, movie_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "2901e869",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "movie_id = 89745\n",
+    "\n",
+    "#def find_similar_movies(movie_id):\n",
+    "movie = movies[movies[\"movieId\"] == movie_id]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "7b8d1efa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ratings = pd.read_csv(\"ratings.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "50526290",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "userId         int64\n",
+       "movieId        int64\n",
+       "rating       float64\n",
+       "timestamp      int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ratings.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "b27d0ba0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "similar_users = ratings[(ratings[\"movieId\"] == movie_id) & (ratings[\"rating\"] > 4)][\"userId\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "7c13c44b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "similar_user_recs = ratings[(ratings[\"userId\"].isin(similar_users)) & (ratings[\"rating\"] > 4)][\"movieId\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "165057e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "similar_user_recs = similar_user_recs.value_counts() / len(similar_users)\n",
+    "\n",
+    "similar_user_recs = similar_user_recs[similar_user_recs > .10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "b603f47e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_users = ratings[(ratings[\"movieId\"].isin(similar_user_recs.index)) & (ratings[\"rating\"] > 4)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "1765ebef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_user_recs = all_users[\"movieId\"].value_counts() / len(all_users[\"userId\"].unique())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "65b35c3d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)\n",
+    "rec_percentages.columns = [\"similar\", \"all\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "3dcca5cd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>similar</th>\n",
+       "      <th>all</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>89745</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.040459</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>58559</th>\n",
+       "      <td>0.573393</td>\n",
+       "      <td>0.148256</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>59315</th>\n",
+       "      <td>0.530649</td>\n",
+       "      <td>0.054931</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>79132</th>\n",
+       "      <td>0.519715</td>\n",
+       "      <td>0.132987</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2571</th>\n",
+       "      <td>0.496687</td>\n",
+       "      <td>0.247010</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>47610</th>\n",
+       "      <td>0.103545</td>\n",
+       "      <td>0.022770</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>780</th>\n",
+       "      <td>0.103380</td>\n",
+       "      <td>0.054723</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>88744</th>\n",
+       "      <td>0.103048</td>\n",
+       "      <td>0.010383</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1258</th>\n",
+       "      <td>0.101226</td>\n",
+       "      <td>0.083887</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1193</th>\n",
+       "      <td>0.100895</td>\n",
+       "      <td>0.120244</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>193 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        similar       all\n",
+       "89745  1.000000  0.040459\n",
+       "58559  0.573393  0.148256\n",
+       "59315  0.530649  0.054931\n",
+       "79132  0.519715  0.132987\n",
+       "2571   0.496687  0.247010\n",
+       "...         ...       ...\n",
+       "47610  0.103545  0.022770\n",
+       "780    0.103380  0.054723\n",
+       "88744  0.103048  0.010383\n",
+       "1258   0.101226  0.083887\n",
+       "1193   0.100895  0.120244\n",
+       "\n",
+       "[193 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rec_percentages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "b84c2747",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rec_percentages[\"score\"] = rec_percentages[\"similar\"] / rec_percentages[\"all\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "f2debdc7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rec_percentages = rec_percentages.sort_values(\"score\", ascending=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "ac76b826",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>similar</th>\n",
+       "      <th>all</th>\n",
+       "      <th>score</th>\n",
+       "      <th>movieId</th>\n",
+       "      <th>title</th>\n",
+       "      <th>genres</th>\n",
+       "      <th>clean_title</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>17067</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.040459</td>\n",
+       "      <td>24.716368</td>\n",
+       "      <td>89745</td>\n",
+       "      <td>Avengers, The (2012)</td>\n",
+       "      <td>Action|Adventure|Sci-Fi|IMAX</td>\n",
+       "      <td>Avengers The 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20513</th>\n",
+       "      <td>0.103711</td>\n",
+       "      <td>0.005289</td>\n",
+       "      <td>19.610199</td>\n",
+       "      <td>106072</td>\n",
+       "      <td>Thor: The Dark World (2013)</td>\n",
+       "      <td>Action|Adventure|Fantasy|IMAX</td>\n",
+       "      <td>Thor The Dark World 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25058</th>\n",
+       "      <td>0.241054</td>\n",
+       "      <td>0.012367</td>\n",
+       "      <td>19.491770</td>\n",
+       "      <td>122892</td>\n",
+       "      <td>Avengers: Age of Ultron (2015)</td>\n",
+       "      <td>Action|Adventure|Sci-Fi</td>\n",
+       "      <td>Avengers Age of Ultron 2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19678</th>\n",
+       "      <td>0.216534</td>\n",
+       "      <td>0.012119</td>\n",
+       "      <td>17.867419</td>\n",
+       "      <td>102125</td>\n",
+       "      <td>Iron Man 3 (2013)</td>\n",
+       "      <td>Action|Sci-Fi|Thriller|IMAX</td>\n",
+       "      <td>Iron Man 3 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16725</th>\n",
+       "      <td>0.215043</td>\n",
+       "      <td>0.012052</td>\n",
+       "      <td>17.843074</td>\n",
+       "      <td>88140</td>\n",
+       "      <td>Captain America: The First Avenger (2011)</td>\n",
+       "      <td>Action|Adventure|Sci-Fi|Thriller|War</td>\n",
+       "      <td>Captain America The First Avenger 2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16312</th>\n",
+       "      <td>0.175447</td>\n",
+       "      <td>0.010142</td>\n",
+       "      <td>17.299824</td>\n",
+       "      <td>86332</td>\n",
+       "      <td>Thor (2011)</td>\n",
+       "      <td>Action|Adventure|Drama|Fantasy|IMAX</td>\n",
+       "      <td>Thor 2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21348</th>\n",
+       "      <td>0.287608</td>\n",
+       "      <td>0.016737</td>\n",
+       "      <td>17.183667</td>\n",
+       "      <td>110102</td>\n",
+       "      <td>Captain America: The Winter Soldier (2014)</td>\n",
+       "      <td>Action|Adventure|Sci-Fi|IMAX</td>\n",
+       "      <td>Captain America The Winter Soldier 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25071</th>\n",
+       "      <td>0.214049</td>\n",
+       "      <td>0.012856</td>\n",
+       "      <td>16.649399</td>\n",
+       "      <td>122920</td>\n",
+       "      <td>Captain America: Civil War (2016)</td>\n",
+       "      <td>Action|Sci-Fi|Thriller</td>\n",
+       "      <td>Captain America Civil War 2016</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25061</th>\n",
+       "      <td>0.136017</td>\n",
+       "      <td>0.008573</td>\n",
+       "      <td>15.865628</td>\n",
+       "      <td>122900</td>\n",
+       "      <td>Ant-Man (2015)</td>\n",
+       "      <td>Action|Adventure|Sci-Fi</td>\n",
+       "      <td>AntMan 2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14628</th>\n",
+       "      <td>0.242876</td>\n",
+       "      <td>0.015517</td>\n",
+       "      <td>15.651921</td>\n",
+       "      <td>77561</td>\n",
+       "      <td>Iron Man 2 (2010)</td>\n",
+       "      <td>Action|Adventure|Sci-Fi|Thriller|IMAX</td>\n",
+       "      <td>Iron Man 2 2010</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        similar       all      score  movieId  \\\n",
+       "17067  1.000000  0.040459  24.716368    89745   \n",
+       "20513  0.103711  0.005289  19.610199   106072   \n",
+       "25058  0.241054  0.012367  19.491770   122892   \n",
+       "19678  0.216534  0.012119  17.867419   102125   \n",
+       "16725  0.215043  0.012052  17.843074    88140   \n",
+       "16312  0.175447  0.010142  17.299824    86332   \n",
+       "21348  0.287608  0.016737  17.183667   110102   \n",
+       "25071  0.214049  0.012856  16.649399   122920   \n",
+       "25061  0.136017  0.008573  15.865628   122900   \n",
+       "14628  0.242876  0.015517  15.651921    77561   \n",
+       "\n",
+       "                                            title  \\\n",
+       "17067                        Avengers, The (2012)   \n",
+       "20513                 Thor: The Dark World (2013)   \n",
+       "25058              Avengers: Age of Ultron (2015)   \n",
+       "19678                           Iron Man 3 (2013)   \n",
+       "16725   Captain America: The First Avenger (2011)   \n",
+       "16312                                 Thor (2011)   \n",
+       "21348  Captain America: The Winter Soldier (2014)   \n",
+       "25071           Captain America: Civil War (2016)   \n",
+       "25061                              Ant-Man (2015)   \n",
+       "14628                           Iron Man 2 (2010)   \n",
+       "\n",
+       "                                      genres  \\\n",
+       "17067           Action|Adventure|Sci-Fi|IMAX   \n",
+       "20513          Action|Adventure|Fantasy|IMAX   \n",
+       "25058                Action|Adventure|Sci-Fi   \n",
+       "19678            Action|Sci-Fi|Thriller|IMAX   \n",
+       "16725   Action|Adventure|Sci-Fi|Thriller|War   \n",
+       "16312    Action|Adventure|Drama|Fantasy|IMAX   \n",
+       "21348           Action|Adventure|Sci-Fi|IMAX   \n",
+       "25071                 Action|Sci-Fi|Thriller   \n",
+       "25061                Action|Adventure|Sci-Fi   \n",
+       "14628  Action|Adventure|Sci-Fi|Thriller|IMAX   \n",
+       "\n",
+       "                                   clean_title  \n",
+       "17067                        Avengers The 2012  \n",
+       "20513                 Thor The Dark World 2013  \n",
+       "25058              Avengers Age of Ultron 2015  \n",
+       "19678                          Iron Man 3 2013  \n",
+       "16725   Captain America The First Avenger 2011  \n",
+       "16312                                Thor 2011  \n",
+       "21348  Captain America The Winter Soldier 2014  \n",
+       "25071           Captain America Civil War 2016  \n",
+       "25061                              AntMan 2015  \n",
+       "14628                          Iron Man 2 2010  "
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rec_percentages.head(10).merge(movies, left_index=True, right_on=\"movieId\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "479b52da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_similar_movies(movie_id):\n",
+    "    similar_users = ratings[(ratings[\"movieId\"] == movie_id) & (ratings[\"rating\"] > 4)][\"userId\"].unique()\n",
+    "    similar_user_recs = ratings[(ratings[\"userId\"].isin(similar_users)) & (ratings[\"rating\"] > 4)][\"movieId\"]\n",
+    "    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)\n",
+    "\n",
+    "    similar_user_recs = similar_user_recs[similar_user_recs > .10]\n",
+    "    all_users = ratings[(ratings[\"movieId\"].isin(similar_user_recs.index)) & (ratings[\"rating\"] > 4)]\n",
+    "    all_user_recs = all_users[\"movieId\"].value_counts() / len(all_users[\"userId\"].unique())\n",
+    "    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)\n",
+    "    rec_percentages.columns = [\"similar\", \"all\"]\n",
+    "    \n",
+    "    rec_percentages[\"score\"] = rec_percentages[\"similar\"] / rec_percentages[\"all\"]\n",
+    "    rec_percentages = rec_percentages.sort_values(\"score\", ascending=False)\n",
+    "    return rec_percentages.head(10).merge(movies, left_index=True, right_on=\"movieId\")[[\"score\", \"title\", \"genres\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "c54c6d1c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "86f82703a3894fa099d6908444af30d6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Text(value='Enter Here: ', description='Movie Title:')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "402eff788a954f6e9a9f667d48798db0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import ipywidgets as widgets\n",
+    "from IPython.display import display\n",
+    "\n",
+    "movie_name_input = widgets.Text(\n",
+    "    value='Enter Here: ',\n",
+    "    description='Movie Title:',\n",
+    "    disabled=False\n",
+    ")\n",
+    "recommendation_list = widgets.Output()\n",
+    "\n",
+    "def on_type(data):\n",
+    "    with recommendation_list:\n",
+    "        recommendation_list.clear_output()\n",
+    "        title = data[\"new\"]\n",
+    "        if len(title) > 5:\n",
+    "            results = search(title)\n",
+    "            movie_id = results.iloc[0][\"movieId\"]\n",
+    "            display(find_similar_movies(movie_id))\n",
+    "\n",
+    "movie_name_input.observe(on_type, names='value')\n",
+    "\n",
+    "display(movie_name_input, recommendation_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f3e01c4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/contrib/machine-learning/index.md
+++ b/contrib/machine-learning/index.md
@ -1,3 +1,147 @@
-# List of sections
+# Movie Recommended System Using ML in Python

- [Section title](filename.md)
+To provide a structured breakdown of the concepts used in writing a movie recommendation system, we'll assume a typical example where the system involves data processing, model training, and prediction steps. Here is a comprehensive division of the concepts, assuming a basic content outline for a movie recommendation system:
+
+## 1. Data Collection and Preprocessing
+### Importing Libraries
+- **Purpose:** Use libraries to handle data manipulation, model building, and evaluation.
+- **Example:**
+  ```python
+  import pandas as pd
+  import numpy as np
+  from sklearn.model_selection import train_test_split
+  from sklearn.metrics import mean_squared_error
+  ```
+
+### Loading Data
+- **Purpose:** Load datasets containing movie ratings, movie details, and user information.
+- **Example:**
+  ```python
+  ratings = pd.read_csv('ratings.csv')
+  movies = pd.read_csv('movies.csv')
+  ```
+
+### Data Cleaning
+- **Purpose:** Handle missing values, duplicates, and inconsistent data.
+- **Example:**
+  ```python
+  ratings.dropna(inplace=True)
+  ```
+
+### Data Merging
+- **Purpose:** Combine multiple datasets for comprehensive analysis.
+- **Example:**
+  ```python
+  data = pd.merge(ratings, movies, on='movieId')
+  ```
+
+### Exploratory Data Analysis (EDA)
+- **Purpose:** Gain insights into data through visualizations and statistical analysis.
+- **Example:**
+  ```python
+  import matplotlib.pyplot as plt
+  data['rating'].hist()
+  ```
+
+## 2. Feature Engineering
+### Encoding Categorical Variables
+- **Purpose:** Convert non-numeric data into a numeric format suitable for model input.
+- **Example:**
+  ```python
+  from sklearn.preprocessing import LabelEncoder
+  le = LabelEncoder()
+  data['movieId'] = le.fit_transform(data['movieId'])
+  data['userId'] = le.fit_transform(data['userId'])
+  ```
+
+### Creating User-Item Matrix
+- **Purpose:** Create a matrix where rows represent users and columns represent movies, with ratings as values.
+- **Example:**
+  ```python
+  user_item_matrix = data.pivot(index='userId', columns='movieId', values='rating')
+  ```
+
+## 3. Model Building
+### Choosing a Model
+- **Common Models:** Collaborative Filtering (User-Based, Item-Based), Matrix Factorization (SVD).
+- **Example:**
+  ```python
+  from sklearn.decomposition import TruncatedSVD
+  ```
+
+### Model Training
+- **Purpose:** Train the chosen model on the dataset.
+- **Example:**
+  ```python
+  svd = TruncatedSVD(n_components=50)
+  matrix = user_item_matrix.fillna(0)
+  svd.fit(matrix)
+  ```
+
+## 4. Making Predictions
+### Generating Recommendations
+- **Purpose:** Use the trained model to predict ratings and recommend movies.
+- **Example:**
+  ```python
+  user_ratings = svd.transform(matrix)
+  predicted_ratings = svd.inverse_transform(user_ratings)
+  ```
+
+### Selecting Top Recommendations
+- **Purpose:** Identify and rank the top movies for each user.
+- **Example:**
+  ```python
+  def recommend_movies(user_id, num_recommendations):
+      user_index = user_id - 1  # assuming user_id starts from 1
+      sorted_indices = np.argsort(predicted_ratings[user_index])[::-1]
+      top_movies = sorted_indices[:num_recommendations]
+      return top_movies
+  ```
+
+## 5. Model Evaluation
+### Splitting Data
+- **Purpose:** Split the data into training and testing sets to evaluate model performance.
+- **Example:**
+  ```python
+  train_data, test_data = train_test_split(data, test_size=0.2)
+  ```
+
+### Evaluation Metrics
+- **Common Metrics:** RMSE (Root Mean Squared Error), MAE (Mean Absolute Error).
+- **Example:**
+  ```python
+  def calculate_rmse(true_ratings, predicted_ratings):
+      return np.sqrt(mean_squared_error(true_ratings, predicted_ratings))
+  ```
+
+## 6. Deployment
+### Saving the Model
+- **Purpose:** Save the trained model for future use.
+- **Example:**
+  ```python
+  import joblib
+  joblib.dump(svd, 'movie_recommendation_model.pkl')
+  ```
+
+### Loading the Model
+- **Purpose:** Load the saved model to make predictions.
+- **Example:**
+  ```python
+  model = joblib.load('movie_recommendation_model.pkl')
+  ```
+
+### Creating an Interface
+- **Purpose:** Build a user interface to interact with the recommendation system (e.g., web app).
+- **Example:** Using Flask for a web application.
+  ```python
+  from flask import Flask, request, render_template
+  app = Flask(__name__)
+
+  @app.route('/recommend', methods=['POST'])
+  def recommend():
+      user_id = request.form['user_id']
+      recommendations = recommend_movies(user_id, 10)
+      return render_template('recommendations.html', movies=recommendations)
+  ```
+
+This breakdown provides a comprehensive guide to the various concepts and steps involved in building a movie recommendation system, from data preprocessing to deployment.
				`@ -0,0 +1 @@`
				`Data sets that are used in making this Movie Recommended System -: https://drive.google.com/drive/folders/1cHQFeg3lRb_5c8qIy2xiL-HlWqlKNsuf?usp=drive_link`