{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21c3ffc5-f494-45bb-93bc-cfaeb54f303a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pickle\n",
    "import sqlite3\n",
    "import hdbscan\n",
    "import os\n",
    "import random\n",
    "import pyfpgrowth\n",
    "from tqdm.notebook import tqdm\n",
    "from collections import defaultdict\n",
    "from itertools import combinations\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "import keras\n",
    "import tensorflow as tf\n",
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense\n",
    "from sentence_transformers import SentenceTransformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6447e366-1d59-4521-b2a5-a146b19c3c28",
   "metadata": {},
   "outputs": [],
   "source": [
    "SEED = 42\n",
    "os.environ['TF_DETERMINISTIC_OPS'] = '1'\n",
    "os.environ['PYTHONHASHSEED'] = str(SEED)\n",
    "random.seed(SEED)\n",
    "np.random.seed(SEED)\n",
    "tf.random.set_seed(SEED)\n",
    "print(f\"Global random seed set to {SEED} for reproducibility.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3405e703-05ba-4450-92db-9de9b7491a14",
   "metadata": {},
   "outputs": [],
   "source": [
    "class ItemKNNRecommender:\n",
    "    def __init__(self):\n",
    "        self.cooccurrence_matrix = None\n",
    "        self.train_user_items = None\n",
    "        self.item_index = None\n",
    "        self.unique_items = None\n",
    "        self.most_popular_items = None\n",
    "\n",
    "    def fit(self, train_df: pd.DataFrame):\n",
    "        print(\"Fitting ItemKNN model\")\n",
    "        self.train_user_items = train_df.groupby('user_id')['song_id'].apply(list).to_dict()\n",
    "        train_item_users = train_df.groupby('song_id')['user_id'].apply(set).to_dict()\n",
    "        \n",
    "        item_popularity = train_df.groupby('song_id')['play_count'].sum()\n",
    "        self.most_popular_items = list(item_popularity.sort_values(ascending=False).index)\n",
    "        \n",
    "        self.unique_items = list(train_item_users.keys())\n",
    "        self.item_index = {item: idx for idx, item in enumerate(self.unique_items)}\n",
    "        \n",
    "        item_user_matrix = np.zeros((len(self.unique_items), len(self.train_user_items)), dtype=np.int8)\n",
    "        user_index = {user: idx for idx, user in enumerate(self.train_user_items.keys())}\n",
    "        \n",
    "        for item, users in train_item_users.items():\n",
    "            i = self.item_index[item]\n",
    "            for user in users:\n",
    "                if user in user_index:\n",
    "                    j = user_index[user]\n",
    "                    item_user_matrix[i, j] = 1\n",
    "                    \n",
    "        print(\"Calculating co-occurrence matrix.\")\n",
    "        self.cooccurrence_matrix = item_user_matrix.dot(item_user_matrix.T)\n",
    "        np.fill_diagonal(self.cooccurrence_matrix, 0)\n",
    "        print(\"ItemKNN model fit complete.\")\n",
    "\n",
    "    def recommend(self, user_id: str, K: int = 30) -> dict:\n",
    "        seen_items = self.train_user_items.get(user_id, [])\n",
    "        if not seen_items:\n",
    "            recs = {}\n",
    "            unseen_popular = [item for item in self.most_popular_items if item not in seen_items]\n",
    "            for i, item in enumerate(unseen_popular[:K]):\n",
    "                recs[item] = len(self.most_popular_items) - i \n",
    "            return recs\n",
    "\n",
    "        scores = np.zeros(len(self.unique_items))\n",
    "        for item in seen_items:\n",
    "            if item in self.item_index:\n",
    "                i = self.item_index[item]\n",
    "                scores += self.cooccurrence_matrix[i]\n",
    "                \n",
    "        top_indices = np.argpartition(-scores, K + len(seen_items))[:K + len(seen_items)]\n",
    "        \n",
    "        recommendations = {}\n",
    "        for idx in top_indices[np.argsort(-scores[top_indices])]:\n",
    "            song_id = self.unique_items[idx]\n",
    "            if song_id not in seen_items:\n",
    "                recommendations[song_id] = scores[idx]\n",
    "                if len(recommendations) == K:\n",
    "                    break\n",
    "        return recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a750bcb-a3ff-49bf-b52f-662d7390a77d",
   "metadata": {},
   "outputs": [],
   "source": [
    "class ArmContentRecommender:\n",
    "    def __init__(self):\n",
    "        self.songs_df = None\n",
    "        self.songs_encoded_df = None\n",
    "        self.song_id_to_index = None\n",
    "        self.song_to_cluster_map = None\n",
    "        self.user_profile = None\n",
    "        self.user_predicted_songs = None\n",
    "        self.user_cluster_membership = None\n",
    "        self.antecedent_map = {}\n",
    "\n",
    "    def fit(self, train_df: pd.DataFrame, songs_df_raw: pd.DataFrame, artist_term_df: pd.DataFrame):\n",
    "        print(\"Fitting ArmContentRecommender model.\")\n",
    "        self._engineer_features(songs_df_raw, artist_term_df)\n",
    "        self._mine_association_rules(train_df)\n",
    "        self.user_profile = train_df.groupby('user_id')['song_id'].apply(list).to_dict()\n",
    "        self._generate_rule_based_candidates()\n",
    "        self._calculate_user_cluster_membership(train_df)\n",
    "        print(\"ArmContentRecommender model fit complete.\")\n",
    "\n",
    "    def recommend(self, user_id: str, top_k: int = 30) -> dict:\n",
    "        weights = {'similarity': 0.5, 'confidence': 4.5, 'membership': 0.5}\n",
    "        user_history = self.user_profile.get(user_id, [])\n",
    "        candidate_songs = self.user_predicted_songs.get(user_id, {})\n",
    "        \n",
    "        if not user_history or not candidate_songs: return {}\n",
    "        \n",
    "        candidate_ids = list(candidate_songs.keys())\n",
    "        valid_cands = [(cid, self.song_id_to_index.get(cid)) for cid in candidate_ids if self.song_id_to_index.get(cid) is not None]\n",
    "        if not valid_cands: return {}\n",
    "\n",
    "        valid_c_ids, valid_c_indices = zip(*valid_cands)\n",
    "        candidate_vectors = self.songs_encoded_df.iloc[list(valid_c_indices)].values\n",
    "        \n",
    "        user_hist_indices = [self.song_id_to_index.get(s) for s in user_history if self.song_id_to_index.get(s) is not None]\n",
    "        if not user_hist_indices: return {}\n",
    "        \n",
    "        user_history_vectors = self.songs_encoded_df.iloc[user_hist_indices].values\n",
    "        \n",
    "        avg_similarities = np.mean(cosine_similarity(candidate_vectors, user_history_vectors), axis=1)\n",
    "        \n",
    "        raw_scores = []\n",
    "        for i, song_id in enumerate(valid_c_ids):\n",
    "            cluster = self.song_to_cluster_map.get(song_id)\n",
    "            membership = self.user_cluster_membership.get((user_id, cluster), 0.0) if cluster is not None else 0.0\n",
    "            raw_scores.append({\n",
    "                'song_id': song_id, 'similarity': avg_similarities[i],\n",
    "                'confidence': candidate_songs[song_id].get('confidence', 0),\n",
    "                'membership': membership\n",
    "            })\n",
    "\n",
    "        if not raw_scores: return {}\n",
    "        \n",
    "        scores_df = pd.DataFrame(raw_scores)\n",
    "        scaler = MinMaxScaler()\n",
    "        for col in ['similarity', 'confidence', 'membership']:\n",
    "            scores_df[col + '_norm'] = scaler.fit_transform(scores_df[[col]])\n",
    "        \n",
    "        scores_df['final_score'] = (scores_df['similarity_norm'] * weights['similarity'] + \n",
    "                                    scores_df['confidence_norm'] * weights['confidence'] + \n",
    "                                    scores_df['membership_norm'] * weights['membership'])\n",
    "        \n",
    "        top_songs = scores_df.sort_values('final_score', ascending=False).head(top_k)\n",
    "        return pd.Series(top_songs.final_score.values, index=top_songs.song_id).to_dict()\n",
    "\n",
    "    def _engineer_features(self, songs_df_raw, artist_term_df):\n",
    "        artist_genre = artist_term_df.groupby('artist_id')['term'].agg(lambda x: x.mode()[0]).reset_index()\n",
    "        artist_genre.columns = ['artist_id', 'genre']\n",
    "        songs_df = songs_df_raw.merge(artist_genre, on='artist_id', how='left')\n",
    "\n",
    "        def categorize_duration(duration):\n",
    "            if duration <= 120: return 'Very Short'\n",
    "            elif duration <= 180: return 'Short'\n",
    "            elif duration <= 300: return 'Medium'\n",
    "            elif duration <= 420: return 'Long'\n",
    "            else: return 'Very Long'\n",
    "        songs_df['duration_category'] = songs_df['duration'].apply(categorize_duration)\n",
    "        \n",
    "        le_genre, le_duration = LabelEncoder(), LabelEncoder()\n",
    "        songs_df['genre_encoded'] = le_genre.fit_transform(songs_df['genre'])\n",
    "        songs_df['duration_encoded'] = le_duration.fit_transform(songs_df['duration_category'])\n",
    "        features_ae = songs_df[['genre_encoded', 'duration_encoded']].values\n",
    "        scaled_features_ae = StandardScaler().fit_transform(features_ae)\n",
    "        \n",
    "        input_dim, encoding_dim = scaled_features_ae.shape[1], 4\n",
    "        input_layer = Input(shape=(input_dim,))\n",
    "        encoded = Dense(8, activation='relu')(input_layer)\n",
    "        encoded = Dense(encoding_dim, activation='relu')(encoded)\n",
    "        decoded = Dense(8, activation='relu')(encoded)\n",
    "        decoded = Dense(input_dim, activation='linear')(decoded)\n",
    "        encoder = Model(input_layer, encoded)\n",
    "        autoencoder = Model(input_layer, decoded)\n",
    "        autoencoder.compile(optimizer='adam', loss='mse')\n",
    "        autoencoder.fit(scaled_features_ae, scaled_features_ae, epochs=30, batch_size=128, shuffle=True, verbose=0)\n",
    "        encoded_features = encoder.predict(scaled_features_ae)\n",
    "        \n",
    "        clusterer = hdbscan.HDBSCAN(min_cluster_size=10, metric='euclidean')\n",
    "        songs_df['cluster'] = clusterer.fit_predict(encoded_features)\n",
    "        \n",
    "        model = SentenceTransformer('all-MiniLM-L6-v2')\n",
    "        def create_song_description(row):\n",
    "            year_str = str(int(row['year'])) if pd.notna(row['year']) and row['year'] > 0 else 'an unknown year'\n",
    "            genre_str = row['genre'].lower() if pd.notna(row['genre']) else 'unclassified'\n",
    "            artist_str = row['artist_name'] if pd.notna(row['artist_name']) else 'an unknown artist'\n",
    "            duration_str = row['duration_category'].lower()\n",
    "            return f\"A {genre_str} song by {artist_str}, released in {year_str}, with a {duration_str} duration.\"\n",
    "\n",
    "        songs_df['description'] = songs_df.apply(create_song_description, axis=1)\n",
    "        unique_descriptions = songs_df['description'].unique().tolist()\n",
    "        unique_embeddings = model.encode(unique_descriptions, show_progress_bar=True)\n",
    "        desc_to_embedding = dict(zip(unique_descriptions, unique_embeddings))\n",
    "        description_embeddings = np.array([desc_to_embedding[desc] for desc in songs_df['description']])\n",
    "\n",
    "        numeric_features = ['artist_hotttnesss', 'duration', 'artist_familiarity', 'year']\n",
    "        songs_df[numeric_features] = songs_df[numeric_features].fillna(0)\n",
    "        numeric_scaled = StandardScaler().fit_transform(songs_df[numeric_features])\n",
    "        \n",
    "        self.songs_df = songs_df\n",
    "        self.songs_encoded_df = pd.DataFrame(np.concatenate([numeric_scaled, description_embeddings], axis=1))\n",
    "        self.song_id_to_index = {song_id: i for i, song_id in enumerate(self.songs_df['song_id'])}\n",
    "        self.song_to_cluster_map = pd.Series(self.songs_df.cluster.values, index=self.songs_df.song_id).to_dict()\n",
    "\n",
    "    def _mine_association_rules(self, train_df):\n",
    "        print(\"Mining association rules.\")\n",
    "        # Intra Cluster ARM\n",
    "        MIN_ABSOLUTE_SUPPORT_CLUSTER = 5\n",
    "        all_cluster_rules = []\n",
    "        for cluster_id in tqdm(sorted(self.songs_df['cluster'].unique()), desc=\"Mining ARM per cluster\"):\n",
    "            if cluster_id == -1: continue\n",
    "            song_ids = self.songs_df.loc[self.songs_df['cluster'] == cluster_id, 'song_id']\n",
    "            transactions = train_df[train_df['song_id'].isin(song_ids)].groupby('user_id')['song_id'].apply(list).tolist()\n",
    "            if len(transactions) < MIN_ABSOLUTE_SUPPORT_CLUSTER: continue\n",
    "            patterns = pyfpgrowth.find_frequent_patterns(transactions, MIN_ABSOLUTE_SUPPORT_CLUSTER)\n",
    "            rules = pyfpgrowth.generate_association_rules(patterns, 0.10)\n",
    "            rules_list = [{'antecedents': frozenset(ant), 'consequents': frozenset(con), 'confidence': conf} \n",
    "                          for ant, (con, conf) in rules.items() if len(ant) <= 2 and len(con) == 1]\n",
    "            if rules_list: all_cluster_rules.append(pd.DataFrame(rules_list))\n",
    "        cluster_rules_df = pd.concat(all_cluster_rules, ignore_index=True) if all_cluster_rules else pd.DataFrame()\n",
    "        # Global ARM\n",
    "        global_transactions = train_df.groupby('user_id')['song_id'].apply(list).tolist()\n",
    "        patterns_global = pyfpgrowth.find_frequent_patterns(global_transactions, 10) \n",
    "        rules_global = pyfpgrowth.generate_association_rules(patterns_global, 0.15) \n",
    "        global_rules_list = [{'antecedents': frozenset(ant), 'consequents': frozenset(con), 'confidence': conf} \n",
    "                              for ant, (con, conf) in rules_global.items() if len(ant) <= 2 and len(con) == 1]\n",
    "        global_rules_df = pd.DataFrame(global_rules_list)\n",
    "        # Concatenate Rules\n",
    "        all_rules = pd.concat([global_rules_df, cluster_rules_df], ignore_index=True)\n",
    "        for _, row in all_rules.sort_values('confidence', ascending=False).drop_duplicates(subset=['antecedents']).iterrows():\n",
    "            self.antecedent_map[frozenset(row['antecedents'])] = {\n",
    "                'consequent': next(iter(row['consequents'])), 'confidence': row['confidence']\n",
    "            }\n",
    "\n",
    "    def _generate_rule_based_candidates(self):\n",
    "        self.user_predicted_songs = {}\n",
    "        for user_id, songs in tqdm(self.user_profile.items(), desc=\"Generating rule candidates\"):\n",
    "            user_set = set(songs)\n",
    "            predicted_for_user = {}\n",
    "            possible_antecedents = list(user_set) + list(combinations(user_set, 2))\n",
    "            for ant_tuple in possible_antecedents:\n",
    "                ant_key = frozenset(ant_tuple) if isinstance(ant_tuple, tuple) else frozenset([ant_tuple])\n",
    "                rule_info = self.antecedent_map.get(ant_key)\n",
    "                if rule_info:\n",
    "                    consequent = rule_info['consequent']\n",
    "                    if consequent not in user_set and (consequent not in predicted_for_user or rule_info['confidence'] > predicted_for_user[consequent]['confidence']):\n",
    "                        predicted_for_user[consequent] = {'confidence': rule_info['confidence']}\n",
    "            self.user_predicted_songs[user_id] = predicted_for_user\n",
    "\n",
    "    def _calculate_user_cluster_membership(self, train_df):\n",
    "        print(\"Calculating user-cluster membership.\")\n",
    "        user_cluster_plays = train_df.merge(self.songs_df[['song_id', 'cluster']], on='song_id')\n",
    "        user_cluster_sum = user_cluster_plays.groupby(['user_id', 'cluster'])['play_count'].sum().reset_index()\n",
    "        user_total_sum = user_cluster_sum.groupby('user_id')['play_count'].sum().rename('total_plays').reset_index()\n",
    "        user_cluster_sum = user_cluster_sum.merge(user_total_sum, on='user_id')\n",
    "        user_cluster_sum['membership_ratio'] = user_cluster_sum['play_count'] / user_cluster_sum['total_plays']\n",
    "        self.user_cluster_membership = user_cluster_sum.set_index(['user_id', 'cluster'])['membership_ratio'].to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "878d1ab3-8561-4201-aa21-690ec3d5e86d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data(songs_db_path, artists_db_path, triplets_path):\n",
    "    print(\"--- Loading All Data ---\")\n",
    "    with sqlite3.connect(songs_db_path) as conn:\n",
    "        songs_df = pd.read_sql_query(\"SELECT track_id, song_id, title, duration, artist_id, artist_name, year, artist_familiarity, artist_hotttnesss FROM songs\", conn)\n",
    "    with sqlite3.connect(artists_db_path) as conn:\n",
    "        artist_term_df = pd.read_sql_query(\"SELECT artist_id, term FROM artist_term\", conn)\n",
    "    triplet_df = pd.read_csv(triplets_path, sep='\\t', names=['user_id', 'song_id', 'play_count'])\n",
    "    return songs_df, artist_term_df, triplet_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76043c37-3ad1-4d3d-8ad8-df4f4955f6e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess_and_split_data(triplet_df, sample_frac=0.3, min_song_plays=70, min_user_plays=20):\n",
    "    print(\"--- Preprocessing and Splitting Data ---\")\n",
    "    sampled_user_ids = triplet_df['user_id'].drop_duplicates().sample(frac=sample_frac, random_state=42)\n",
    "    triplet_df = triplet_df[triplet_df['user_id'].isin(sampled_user_ids)].copy()\n",
    "\n",
    "    song_counts = triplet_df['song_id'].value_counts()\n",
    "    popular_songs = song_counts[song_counts >= min_song_plays].index\n",
    "    triplet_df = triplet_df[triplet_df['song_id'].isin(popular_songs)]\n",
    "    \n",
    "    user_counts = triplet_df['user_id'].value_counts()\n",
    "    active_users = user_counts[user_counts >= min_user_plays].index\n",
    "    triplet_df = triplet_df[triplet_df['user_id'].isin(active_users)]\n",
    "    print(f\"Final shape of triplet_df: {triplet_df.shape}\")\n",
    "\n",
    "    train_data, test_data = [], []\n",
    "    for _, group in tqdm(triplet_df.groupby('user_id'), desc=\"Splitting train/test\"):\n",
    "        if len(group) < 5:\n",
    "            train_data.append(group)\n",
    "            continue\n",
    "        train, test = train_test_split(group, test_size=0.2, random_state=42)\n",
    "        train_data.append(train)\n",
    "        test_data.append(test)\n",
    "    \n",
    "    train_df = pd.concat(train_data)\n",
    "    test_set = pd.concat(test_data).groupby('user_id')['song_id'].agg(list).to_dict()\n",
    "    return train_df, test_set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f88cb6c3-e093-49e1-9d7f-8efeef30df50",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_hybrid_recommendations(user_id, custom_recs, knn_recs, w_custom=0.8, w_knn=0.2, top_k=30):\n",
    "    final_scores = defaultdict(float)\n",
    "    if custom_recs:\n",
    "        custom_scores = np.array(list(custom_recs.values())).reshape(-1, 1)\n",
    "        custom_scores_norm = MinMaxScaler().fit_transform(custom_scores).flatten()\n",
    "        for i, song_id in enumerate(custom_recs.keys()):\n",
    "            final_scores[song_id] += w_custom * custom_scores_norm[i]\n",
    "    if knn_recs:\n",
    "        knn_scores = np.array(list(knn_recs.values())).reshape(-1, 1)\n",
    "        knn_scores_norm = MinMaxScaler().fit_transform(knn_scores).flatten()\n",
    "        for i, song_id in enumerate(knn_recs.keys()):\n",
    "            final_scores[song_id] += w_knn * knn_scores_norm[i]\n",
    "    if not final_scores: return []\n",
    "    sorted_recs = sorted(final_scores.items(), key=lambda item: item[1], reverse=True)\n",
    "    return [song_id for song_id, score in sorted_recs[:top_k]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d32d038-4751-4f12-b8f3-233f5be489cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluation Functions\n",
    "def precision_at_k(recommended_items, true_items, k):\n",
    "    top_k = set(recommended_items[:k])\n",
    "    tp = len(top_k & true_items)\n",
    "    return tp / k if k > 0 else 0\n",
    "\n",
    "def recall_at_k(recommended_items, true_items, k):\n",
    "    top_k = set(recommended_items[:k])\n",
    "    tp = len(top_k & true_items)\n",
    "    return tp / len(true_items) if len(true_items) > 0 else 0\n",
    "\n",
    "def f1_at_k(recommended_items, true_items, k):\n",
    "    p = precision_at_k(recommended_items, true_items, k)\n",
    "    r = recall_at_k(recommended_items, true_items, k)\n",
    "    return 2 * (p * r) / (p + r) if p + r > 0 else 0.0\n",
    "\n",
    "def average_precision_at_k(recommended_items, true_items, k):\n",
    "    if not true_items: return 0.0\n",
    "    true_items = set(true_items)\n",
    "    recommended_items = recommended_items[:k]\n",
    "    hits, score = 0, 0.0\n",
    "    for i, p in enumerate(recommended_items):\n",
    "        if p in true_items:\n",
    "            hits += 1\n",
    "            score += hits / (i + 1.0)\n",
    "    return score / min(len(true_items), k)\n",
    "\n",
    "def ndcg_at_k(recommended_items, true_items, k):\n",
    "    true_items = set(true_items)\n",
    "    recommended_items = recommended_items[:k]\n",
    "    relevance = [1 if item in true_items else 0 for item in recommended_items]\n",
    "    dcg = sum(relevance[i] / np.log2(i + 2) for i in range(len(relevance)))\n",
    "    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(true_items), k)))\n",
    "    return dcg / idcg if idcg > 0 else 0.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66bed69a-760f-4b0b-b931-569b25ff724a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_model(recommendations, test_set, ks=[1, 5, 10, 20, 30]):\n",
    "    results = {k: defaultdict(list) for k in ks}\n",
    "    for user_id, recs in recommendations.items():\n",
    "        true_items_set = set(test_set.get(user_id, []))\n",
    "        if not true_items_set: continue\n",
    "        \n",
    "        for k in ks:\n",
    "            results[k]['precision'].append(precision_at_k(recs, true_items_set, k))\n",
    "            results[k]['recall'].append(recall_at_k(recs, true_items_set, k))\n",
    "            results[k]['f1'].append(f1_at_k(recs, true_items_set, k))\n",
    "            results[k]['map'].append(average_precision_at_k(recs, true_items_set, k))\n",
    "            results[k]['ndcg'].append(ndcg_at_k(recs, true_items_set, k))\n",
    "\n",
    "    print(\"\\n--- Evaluation Results ---\")\n",
    "    summary = {}\n",
    "    for k in ks:\n",
    "        summary[f'Top@{k}'] = {\n",
    "            'Precision': np.mean(results[k]['precision']),\n",
    "            'Recall': np.mean(results[k]['recall']),\n",
    "            'F1-Score': np.mean(results[k]['f1']),\n",
    "            'MAP': np.mean(results[k]['map']),\n",
    "            'NDCG': np.mean(results[k]['ndcg']),\n",
    "        }\n",
    "\n",
    "    for level, values in summary.items():\n",
    "        print(f\"--- {level} ---\")\n",
    "        for metric, score in values.items():\n",
    "            print(f\" {metric}: {score*100:.4f}\")\n",
    "    return summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76b934f0-d844-4526-97da-40bf49057fdf",
   "metadata": {},
   "outputs": [],
   "source": [
    "if __name__ == '__main__':\n",
    "    TRIPLETS_PATH = 'db/train_triplets.txt'\n",
    "    SONGS_DB_PATH = 'db/track_metadata.db'\n",
    "    ARTISTS_DB_PATH = 'db/artist_term.db'\n",
    "\n",
    "    songs_df, artist_term_df, triplet_df = load_data(SONGS_DB_PATH, ARTISTS_DB_PATH, TRIPLETS_PATH)\n",
    "    train_df, test_set = preprocess_and_split_data(triplet_df)\n",
    "    \n",
    "    knn_model = ItemKNNRecommender()\n",
    "    knn_model.fit(train_df)\n",
    "    \n",
    "    arm_content_model = ArmContentRecommender()\n",
    "    arm_content_model.fit(train_df, songs_df, artist_term_df)\n",
    "\n",
    "    print(\"\\n--- Generating Hybrid Recommendations ---\")\n",
    "    hybrid_recommendations = {}\n",
    "    for user_id in tqdm(train_df['user_id'].unique(), desc=\"Generating Final Hybrid Recommendations\"):\n",
    "        custom_recs = arm_content_model.recommend(user_id, top_k=30)\n",
    "        knn_recs = knn_model.recommend(user_id, K=30)\n",
    "        final_recs = generate_hybrid_recommendations(\n",
    "            user_id, custom_recs, knn_recs, \n",
    "            w_custom=0.8, w_knn=0.2, top_k=30\n",
    "        )\n",
    "        hybrid_recommendations[user_id] = final_recs\n",
    "        \n",
    "    evaluate_model(hybrid_recommendations, test_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d953f446-f081-48af-8c1d-492816967b19",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77f96750-3494-4786-97d7-1d0e10f945ed",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56449964-ff9e-4d95-8c2a-3812ecd2b124",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
