{"cells":[{"cell_type":"code","execution_count":null,"id":"9xFzY-xh_OHT","metadata":{"colab":{"background_save":true,"base_uri":"https://localhost:8080/"},"id":"9xFzY-xh_OHT","outputId":"677de9a7-0567-4bf2-a8f6-1f28794d3810"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mounted at /content/drive\n","Modules loaded successfully.\n","Preprocessing override applied.\n","Loading dataset: Raw 561D features with global Min-Max scaling...\n","Data loaded: m=30 tasks, d=561 features.\n","Starting experiment: 30 Random Splits, 5-Fold CV for Hyperparameters.\n","--- Split 1/30 ---\n","   [Split 0] ARMUL: 0.0630 | OURS: 0.0132\n","   [Split 1] ARMUL: 0.0484 | OURS: 0.0106\n","   [Split 2] ARMUL: 0.0440 | OURS: 0.0121\n","   [Split 3] ARMUL: 0.0498 | OURS: 0.0109\n","   [Split 4] ARMUL: 0.0513 | OURS: 0.0067\n","--- Split 6/30 ---\n","   [Split 5] ARMUL: 0.0532 | OURS: 0.0145\n","   [Split 6] ARMUL: 0.0513 | OURS: 0.0126\n","   [Split 7] ARMUL: 0.0537 | OURS: 0.0093\n","   [Split 8] ARMUL: 0.0503 | OURS: 0.0101\n","   [Split 9] ARMUL: 0.0523 | OURS: 0.0141\n","--- Split 11/30 ---\n","   [Split 10] ARMUL: 0.0523 | OURS: 0.0117\n","   [Split 11] ARMUL: 0.0562 | OURS: 0.0141\n","   [Split 12] ARMUL: 0.0518 | OURS: 0.0139\n","   [Split 13] ARMUL: 0.0552 | OURS: 0.0144\n","   [Split 14] ARMUL: 0.0581 | OURS: 0.0140\n","--- Split 16/30 ---\n","   [Split 15] ARMUL: 0.0498 | OURS: 0.0162\n","   [Split 16] ARMUL: 0.0498 | OURS: 0.0115\n","   [Split 17] ARMUL: 0.0449 | OURS: 0.0081\n","   [Split 18] ARMUL: 0.0572 | OURS: 0.0129\n","   [Split 19] ARMUL: 0.0479 | OURS: 0.0110\n","--- Split 21/30 ---\n","   [Split 20] ARMUL: 0.0523 | OURS: 0.0126\n","   [Split 21] ARMUL: 0.0513 | OURS: 0.0241\n","   [Split 22] ARMUL: 0.0484 | OURS: 0.0076\n","   [Split 23] ARMUL: 0.0532 | OURS: 0.0155\n","   [Split 24] ARMUL: 0.0562 | OURS: 0.0116\n","--- Split 26/30 ---\n","   [Split 25] ARMUL: 0.0596 | OURS: 0.0108\n","   [Split 26] ARMUL: 0.0523 | OURS: 0.0114\n","   [Split 27] ARMUL: 0.0596 | OURS: 0.0110\n","   [Split 28] ARMUL: 0.0518 | OURS: 0.0163\n","   [Split 29] ARMUL: 0.0474 | OURS: 0.0117\n","\n","============================================================\n","FINAL RESULTS (Held-out Test Error)\n","Protocol: Global Min-Max [0,1] -> 5-fold CV -> Refit -> Test\n","============================================================\n","Method     | Mean Error | Std Dev   \n","------------------------------------------------------------\n","DP         | 0.0761     | 0.0046\n","ITL        | 0.0467     | 0.0051\n","ARMUL      | 0.0524     | 0.0043\n","OURS       | 0.0125     | 0.0032\n","============================================================\n"]}],"source":["# -*- coding: utf-8 -*-\n","\"\"\"\n","Experiment Script for Multi-Task Learning (MTL) on UCI HAR Dataset.\n","\n","This script implements the proposed method alongside baselines (DP, STL/ITL, ARMUL).\n","It performs data loading, preprocessing (global Min-Max scaling), and evaluation\n","using a Monte Carlo cross-validation framework.\n","\n","Dependencies:\n","    - numpy, pandas, scipy, sklearn\n","    - Local modules: ARMUL, MTL, preprocessing\n","\"\"\"\n","\n","import os\n","import sys\n","import math\n","import numpy as np\n","import pandas as pd\n","from scipy.optimize import minimize\n","from scipy.special import expit\n","from sklearn.preprocessing import MinMaxScaler\n","from google.colab import drive\n","\n","# =========================================================\n","# 1. Environment Setup\n","# =========================================================\n","drive.mount('/content/drive')\n","\n","WORK_DIR = '/content/drive/MyDrive/Colab Notebooks/MTLR'\n","os.chdir(WORK_DIR)\n","sys.path.append(WORK_DIR)\n","\n","try:\n","    import preprocessing\n","    import MTL\n","    from ARMUL import ARMUL, Baselines\n","    print(\"Modules loaded successfully.\")\n","except ImportError as e:\n","    print(\"Error: Required modules (ARMUL.py, MTL.py, preprocessing.py) not found in working directory.\")\n","    raise e\n","\n","\n","# =========================================================\n","# 2. Preprocessing Overrides\n","# =========================================================\n","def safe_MTL_preprocessing(\n","    data, link='linear', intercept=True, n_class=1, standardization=True\n","):\n","    \"\"\"\n","    Overrides the default MTL preprocessing to handle non-standardized inputs safely.\n","\n","    When standardization=False, this function initializes means to 0 and stds to 1\n","    to prevent division-by-zero errors in downstream normalization steps.\n","\n","    Args:\n","        data: List containing [X (features), y (labels)].\n","        link: Link function ('linear' or 'logistic').\n","        intercept: Whether to include bias term.\n","        n_class: Number of classes (for classification).\n","        standardization: If True, applies standard scaling (z-score).\n","                         If False, bypasses scaling safely.\n","\n","    Returns:\n","        Processed data structure compatible with the MTL framework.\n","    \"\"\"\n","    m = len(data[0])\n","    d = data[0][0].shape[1]\n","    n_list = np.zeros(m).astype(int)\n","\n","    if not standardization:\n","        # Initialize standardization parameters to identity/null values\n","        X_means = np.zeros((d, 1))\n","        X_stds = np.ones((d, 1))\n","\n","        if intercept:\n","            X_means = np.vstack((np.zeros((1, 1)), X_means))\n","            X_stds = np.vstack((np.ones((1, 1)), X_stds))\n","\n","        y_mean, y_std = 0, 1\n","        X, Y = [], []\n","        for j in range(m):\n","            tmp = data[0][j]\n","            n_list[j] = tmp.shape[0]\n","            if intercept:\n","                tmp = np.hstack((np.ones((n_list[j], 1)), tmp))\n","            X.append(tmp)\n","\n","            d_out = 1\n","            if link == 'logistic':\n","                if n_class == 2:\n","                    for y_dat in data[1]:\n","                        Y.append(y_dat.reshape(-1, 1))\n","                else:\n","                    d_out = n_class\n","            else:\n","                for y_dat in data[1]:\n","                    Y.append(y_dat.reshape(-1, 1))\n","\n","        d_out = 1 if n_class == 2 else n_class\n","        return [X, Y, X_means, X_stds, y_mean, y_std, n_list, d_out]\n","\n","    # Fallback to original implementation for standardized cases\n","    return None\n","\n","# Apply patch to local modules\n","preprocessing.MTL_preprocessing = safe_MTL_preprocessing\n","MTL.MTL_preprocessing = safe_MTL_preprocessing\n","print(\"Preprocessing override applied.\")\n","\n","\n","# =========================================================\n","# 3. Data Loading and Processing\n","# =========================================================\n","def load_raw_minmax(path, label_list=[5]):\n","    \"\"\"\n","    Loads the UCI HAR dataset and applies global Min-Max scaling.\n","\n","    Processing steps:\n","      1. Load raw train/test files.\n","      2. Concatenate splits to form the full dataset.\n","      3. Apply global Min-Max scaling to the range [0, 1].\n","      4. Partition data by subject ID (Task).\n","      5. Binarize labels: 1 if label in label_list, 0 otherwise.\n","\n","    Args:\n","        path: Root directory containing 'UCI_HAR_Dataset'.\n","        label_list: List of activity IDs to treat as the positive class.\n","\n","    Returns:\n","        List: [[X_list, y_list], model_type, n_classes]\n","    \"\"\"\n","    print(\"Loading dataset: Raw 561D features with global Min-Max scaling...\")\n","\n","    base_path = os.path.join(path, 'UCI_HAR_Dataset')\n","    try:\n","        X_train = np.loadtxt(os.path.join(base_path, 'train', 'X_train.txt'))\n","        y_train = np.loadtxt(os.path.join(base_path, 'train', 'y_train.txt'))\n","        sub_train = np.loadtxt(os.path.join(base_path, 'train', 'subject_train.txt'))\n","\n","        X_test = np.loadtxt(os.path.join(base_path, 'test', 'X_test.txt'))\n","        y_test = np.loadtxt(os.path.join(base_path, 'test', 'y_test.txt'))\n","        sub_test = np.loadtxt(os.path.join(base_path, 'test', 'subject_test.txt'))\n","    except OSError:\n","        raise FileNotFoundError(f\"Dataset not found at: {base_path}\")\n","\n","    X_all = np.concatenate((X_train, X_test))\n","    y_all = np.concatenate((y_train, y_test))\n","    sub_all = np.concatenate((sub_train, sub_test)).astype(int)\n","\n","    # Apply Global Min-Max Scaling\n","    scaler = MinMaxScaler(feature_range=(0, 1))\n","    X_final = scaler.fit_transform(X_all)\n","\n","    # Organize by Task (Subject)\n","    X_raw, y_raw = [], []\n","    m_tasks = max(sub_all)\n","    for j in range(1, m_tasks + 1):\n","        indices = np.where(sub_all == j)[0]\n","        if len(indices) > 0:\n","            X_raw.append(X_final[indices])\n","            # Adjust labels to 0-indexed for consistency\n","            y_raw.append(y_all[indices].astype(int) - 1)\n","\n","    # Binarize Labels\n","    n_class = 2\n","    for j in range(len(y_raw)):\n","        y_raw[j] = np.array([1 if (q + 1) in label_list else 0 for q in y_raw[j]]).astype(int)\n","\n","    return [[X_raw, y_raw], 'logistic', n_class]\n","\n","\n","# =========================================================\n","# 4. Proposed Method Optimization (L-BFGS-B)\n","# =========================================================\n","def nll_and_grad_theta(X, y, theta):\n","    \"\"\"\n","    Computes the Logistic Negative Log-Likelihood and its gradient.\n","    \"\"\"\n","    z = X @ theta\n","    p = expit(z)\n","    nll = np.mean(np.logaddexp(0.0, z) - y * z)\n","    grad = (X.T @ (p - y)) / X.shape[0]\n","    return nll, grad\n","\n","\n","def reg_sqrt_value_and_grad(diff, Sigma, lam):\n","    \"\"\"\n","    Computes the regularization term: lambda * sqrt(diff^T * Sigma * diff).\n","    Includes a numerical stability floor (epsilon) for the gradient.\n","    \"\"\"\n","    M = Sigma @ diff\n","    q = float(diff @ M)\n","    s = math.sqrt(max(q, 1e-12))\n","\n","    val = lam * s\n","    grad = (lam / s) * M if s > 1e-12 else np.zeros_like(diff)\n","    return val, grad\n","\n","\n","def fit_OURS_BFGS(data, q=1.0, maxiter=200):\n","    \"\"\"\n","    Jointly optimizes task parameters (Theta) and the centroid (Bar) using L-BFGS-B.\n","\n","    Args:\n","        data: List containing [X_list, y_list]. X must include intercept.\n","        q: Regularization hyperparameter.\n","        maxiter: Maximum iterations for the optimizer.\n","\n","    Returns:\n","        List of optimized parameter vectors (Theta) for each task.\n","    \"\"\"\n","    m = len(data[0])\n","    d = data[0][0].shape[1]\n","    n_list = [len(y) for y in data[1]]\n","\n","    # Precompute Sigmas and Lambda scaling\n","    Sigmas = [(X.T @ X) / n for X, n in zip(data[0], n_list)]\n","    lam_list = [q * math.sqrt(d) * math.sqrt(n) for n in n_list]\n","\n","    def objective_function(params):\n","        Theta = params[:m * d].reshape(m, d)\n","        Bar = params[m * d:]\n","\n","        total_loss = 0.0\n","        gTheta, gBar = np.zeros_like(Theta), np.zeros_like(Bar)\n","\n","        for j in range(m):\n","            # 1. Negative Log Likelihood\n","            nll, g = nll_and_grad_theta(data[0][j], data[1][j].ravel(), Theta[j])\n","            total_loss += n_list[j] * nll\n","            gTheta[j] += n_list[j] * g\n","\n","            # 2. Regularization Penalty\n","            val, g_pen = reg_sqrt_value_and_grad(Theta[j] - Bar, Sigmas[j], lam_list[j])\n","            total_loss += val\n","            gTheta[j] += g_pen\n","            gBar -= g_pen\n","\n","        return total_loss, np.concatenate([gTheta.ravel(), gBar.ravel()])\n","\n","    # Optimization\n","    res = minimize(\n","        objective_function,\n","        np.zeros(m * d + d),\n","        method=\"L-BFGS-B\",\n","        jac=True,\n","        options={\"maxiter\": maxiter},\n","    )\n","\n","    # Extract task-specific parameters\n","    return [res.x[:m * d].reshape(m, d)[j] for j in range(m)]\n","\n","\n","def evaluate_custom(data_test, thetas):\n","    \"\"\"\n","    Evaluates the model on test data and returns the average classification error.\n","    \"\"\"\n","    errs = []\n","    for j in range(len(data_test[0])):\n","        z = data_test[0][j] @ thetas[j]\n","        yhat = (expit(z) >= 0.5).astype(int)\n","        err = np.mean(yhat != data_test[1][j].ravel())\n","        errs.append(err)\n","    return np.mean(errs)\n","\n","\n","# =========================================================\n","# 5. Cross-Validation for Hyperparameter Tuning\n","# =========================================================\n","def run_cv_selection(data_train, model_type, q_list, d_dim, n_fold=5, eta=0.1, t_iter=200):\n","    \"\"\"\n","    Selects the optimal hyperparameter 'q' via k-fold cross-validation and\n","    refits the model on the full training set.\n","\n","    Args:\n","        data_train: Training data [X_list, y_list].\n","        model_type: 'ARMUL' or 'OURS'.\n","        q_list: List of candidate q values.\n","        d_dim: Feature dimensionality.\n","        n_fold: Number of CV folds.\n","        eta: Learning rate (for ARMUL).\n","        t_iter: Number of iterations.\n","\n","    Returns:\n","        Tuple: (Optimized Model/Parameters, Best q value)\n","    \"\"\"\n","    m = len(data_train[0])\n","    n_list = np.array([len(y) for y in data_train[1]])\n","\n","    # Generate CV splits\n","    splits = preprocessing.split_cv(n_list, n_fold, seed=np.random.randint(10000))\n","\n","    best_q = None\n","    best_avg_err = float('inf')\n","\n","    for q in q_list:\n","        fold_errors = []\n","\n","        for k in range(n_fold):\n","            # Construct Train/Validation sets for the fold\n","            X_tr, y_tr, X_val, y_val = [], [], [], []\n","            for j in range(m):\n","                idx_val = splits[j][k]\n","                idx_tr = np.delete(np.arange(n_list[j]), idx_val)\n","                X_tr.append(data_train[0][j][idx_tr])\n","                y_tr.append(data_train[1][j][idx_tr])\n","                X_val.append(data_train[0][j][idx_val])\n","                y_val.append(data_train[1][j][idx_val])\n","\n","            if model_type == 'ARMUL':\n","                model = ARMUL(link='logistic', n_class=2, penalty='new')\n","                # Calculate lambda vector based on sample size\n","                lbd_vec = q * np.sqrt((d_dim + 1) / np.array([len(y) for y in y_tr]))\n","\n","                model.vanilla(\n","                    [X_tr, y_tr],\n","                    lbd=lbd_vec,\n","                    eta_global=eta,\n","                    eta_local=eta,\n","                    T_global=t_iter,\n","                    standardization=False,\n","                    intercept=True,\n","                )\n","                res = model.evaluate([X_val, y_val], model='vanilla')\n","                fold_errors.append(res['average error'])\n","\n","            elif model_type == 'OURS':\n","                # Add intercept explicitly for BFGS\n","                X_tr_int = [np.hstack([np.ones((x.shape[0], 1)), x]) for x in X_tr]\n","                X_val_int = [np.hstack([np.ones((x.shape[0], 1)), x]) for x in X_val]\n","\n","                thetas = fit_OURS_BFGS([X_tr_int, y_tr], q=q, maxiter=t_iter)\n","                err = evaluate_custom([X_val_int, y_val], thetas)\n","                fold_errors.append(err)\n","\n","        avg_err = np.mean(fold_errors)\n","        if avg_err < best_avg_err:\n","            best_avg_err = avg_err\n","            best_q = q\n","\n","    # Refit on the complete training set using the best q\n","    if model_type == 'ARMUL':\n","        model = ARMUL(link='logistic', n_class=2, penalty='new')\n","        lbd_vec = best_q * np.sqrt((d_dim + 1) / n_list)\n","        model.vanilla(\n","            data_train,\n","            lbd=lbd_vec,\n","            eta_global=eta,\n","            eta_local=eta,\n","            T_global=t_iter,\n","            standardization=False,\n","            intercept=True,\n","        )\n","        return model, best_q\n","\n","    elif model_type == 'OURS':\n","        X_tr_int = [np.hstack([np.ones((x.shape[0], 1)), x]) for x in data_train[0]]\n","        final_thetas = fit_OURS_BFGS([X_tr_int, data_train[1]], q=best_q, maxiter=t_iter)\n","        return final_thetas, best_q\n","\n","\n","# =========================================================\n","# 6. Main Experiment Loop\n","# =========================================================\n","def run_experiment():\n","    PATH_TO_HAR_DATA = os.path.join(WORK_DIR, '')\n","\n","    # Load and process data\n","    dataset_info = load_raw_minmax(path=PATH_TO_HAR_DATA, label_list=[5])\n","    data = dataset_info[0]  # [X, y]\n","    m = len(data[0])\n","    d = data[0][0].shape[1]\n","    print(f\"Data loaded: m={m} tasks, d={d} features.\")\n","\n","    # Configuration\n","    K_SPLITS = 30\n","    Q_LIST = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]\n","    ETA = 0.1\n","    T_ITER = 200\n","\n","    results = []\n","\n","    print(\"Starting experiment: 30 Random Splits, 5-Fold CV for Hyperparameters.\")\n","\n","    for k in range(K_SPLITS):\n","        if k % 5 == 0:\n","            print(f\"--- Split {k + 1}/{K_SPLITS} ---\")\n","\n","        # Split into Train/Test (80/20) with seed for reproducibility\n","        seed = k * 100\n","        prop = 0.2 * np.ones(m)\n","        data_train, data_test = preprocessing.split(data, prop=prop, seed=seed)\n","\n","        # 1. Baselines (DP & ITL) - No CV required\n","        base = Baselines(link='logistic', n_class=2)\n","\n","        base.DP_train(data_train, eta=ETA, T=T_ITER, standardization=False, intercept=True)\n","        dp_res = base.evaluate(data_test, model='DP')\n","\n","        base.STL_train(data_train, eta=ETA, T=T_ITER, standardization=False, intercept=True)\n","        itl_res = base.evaluate(data_test, model='STL')\n","\n","        # 2. ARMUL - CV for q, then refit\n","        best_armul_model, best_q_armul = run_cv_selection(\n","            data_train, 'ARMUL', Q_LIST, d, n_fold=5, eta=ETA, t_iter=T_ITER\n","        )\n","        arm_res = best_armul_model.evaluate(data_test, model='vanilla')\n","\n","        # 3. Proposed Method (OURS) - CV for q, then refit\n","        best_ours_thetas, best_q_ours = run_cv_selection(\n","            data_train, 'OURS', Q_LIST, d, n_fold=5, eta=ETA, t_iter=T_ITER\n","        )\n","\n","        # Evaluate Proposed Method\n","        X_te_int = [np.hstack([np.ones((x.shape[0], 1)), x]) for x in data_test[0]]\n","        ours_err = evaluate_custom([X_te_int, data_test[1]], best_ours_thetas)\n","\n","        results.append({\n","            'split': k,\n","            'DP': dp_res['average error'],\n","            'ITL': itl_res['average error'],\n","            'ARMUL': arm_res['average error'],\n","            'q_ARMUL': best_q_armul,\n","            'OURS': ours_err,\n","            'q_OURS': best_q_ours,\n","        })\n","\n","        print(\n","            f\"   [Split {k}] ARMUL: {arm_res['average error']:.4f} | \"\n","            f\"OURS: {ours_err:.4f}\"\n","        )\n","\n","    # Aggregated Results\n","    df = pd.DataFrame(results)\n","\n","    print(\"\\n\" + \"=\" * 60)\n","    print(\"FINAL RESULTS (Held-out Test Error)\")\n","    print(\"Protocol: Global Min-Max [0,1] -> 5-fold CV -> Refit -> Test\")\n","    print(\"=\" * 60)\n","\n","    def get_stats(col):\n","        return df[col].mean(), df[col].std()\n","\n","    dp_m, dp_s = get_stats('DP')\n","    itl_m, itl_s = get_stats('ITL')\n","    arm_m, arm_s = get_stats('ARMUL')\n","    our_m, our_s = get_stats('OURS')\n","\n","    print(f\"{'Method':<10} | {'Mean Error':<10} | {'Std Dev':<10}\")\n","    print(\"-\" * 60)\n","    print(f\"{'DP':<10} | {dp_m:.4f}     | {dp_s:.4f}\")\n","    print(f\"{'ITL':<10} | {itl_m:.4f}     | {itl_s:.4f}\")\n","    print(f\"{'ARMUL':<10} | {arm_m:.4f}     | {arm_s:.4f}\")\n","    print(f\"{'OURS':<10} | {our_m:.4f}     | {our_s:.4f}\")\n","    print(\"=\" * 60)\n","\n","\n","if __name__ == \"__main__\":\n","    run_experiment()"]}],"metadata":{"colab":{"provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.11.8"}},"nbformat":4,"nbformat_minor":5}