{
    "0": {
        "0": [
            {
                "code": "import numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    adata_int.obsm[\"X_emb\"] = np.empty((adata_int.n_obs, adata_int.X.shape[1]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round10_scVI_single_cell_Var.py",
                "accuracy": 0.6367871896620461,
                "model_info": "",
                "lineage": "batch_int_tusoml_round10_scVI_single_cell_Var"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nimport logging\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    adata_int.obsm[\"X_emb\"] = np.empty((adata_int.n_obs, adata_int.X.shape[1]))\n    \n    logging.info(f'Batch integration model initialized with {adata_int.n_obs} observations and {adata_int.X.shape[1]} features.')\n    logging.info(f'Using device: {np.__version__}, numpy version: {np.__version__}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round10_scVI_single_cell_VarX.py",
                "accuracy": 0.6367871896620461,
                "model_info": "",
                "lineage": "101"
            }
        ],
        "2": [],
        "3": [],
        "4": [
            {
                "code": "import numpy as np\nimport logging\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch = adata_int.obs[\"batch\"].values\n    adata_int.X = adata_int.X - np.mean(adata_int.X, axis=0)  # Simple batch correction\n    \n    sc.pp.scale(adata_int)\n    sc.tl.pca(adata_int, svd_solver='arpack')\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n    \n    logging.info(f'Batch integration model initialized with {adata_int.n_obs} observations and {adata_int.X.shape[1]} features.')\n    logging.info(f'Using device: {np.__version__}, numpy version: {np.__version__}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round10_scVI_single_cell_VarXX.py",
                "accuracy": 0.5540617289317177,
                "model_info": "",
                "lineage": "401"
            }
        ],
        "5": [
            {
                "code": "import numpy as np\nfrom sklearn.metrics import silhouette_score\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.scale(adata_int)\n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    batch_labels = adata.obs[\"batch\"].values\n    unique_batches = np.unique(batch_labels)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    metrics = {}\n    for batch in unique_batches:\n        batch_mask = (batch_labels == batch)\n        if np.sum(batch_mask) > 1:\n            unique_labels = np.unique(batch_labels[batch_mask])\n            if len(unique_labels) > 1:\n                metrics[batch] = silhouette_score(adata_int.obsm[\"X_emb\"][batch_mask], batch_labels[batch_mask])\n\n    weighted_score = np.mean(list(metrics.values())) if metrics else 0\n    adata_int.uns[\"integration_score\"] = weighted_score\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_XX.py",
                "accuracy": 0.5627420290315447,
                "model_info": "",
                "lineage": "501"
            }
        ],
        "8": [],
        "9": [],
        "10": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    silhouette_avg = silhouette_score(X_emb, adata_int.obs[\"batch\"])\n    adata_int.uns[\"silhouette_score\"] = silhouette_avg\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "1001"
            }
        ],
        "11": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = RobustScaler(with_centering=False)\n    X_scaled = scaler.fit_transform(adata_int.X.toarray())\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.6252647787030242,
                "model_info": "",
                "lineage": "1101"
            }
        ],
        "12": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    batch_labels = adata_int.obs[\"batch\"].values\n    unique_batches = np.unique(batch_labels)\n    batch_means = np.array([X_emb[batch_labels == batch].mean(axis=0) for batch in unique_batches])\n    \n    batch_indices = np.array([np.where(unique_batches == batch)[0][0] for batch in batch_labels])\n    loss_function = lambda weights: np.sum((X_emb - batch_means[batch_indices]) ** 2) + np.sum(weights ** 2)\n    initial_weights = np.zeros(X_emb.shape[1])\n    result = minimize(loss_function, initial_weights)\n    \n    adata_int.obsm[\"X_emb\"] = X_emb - result.x\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.753452237767252,
                "model_info": "",
                "lineage": "1201"
            }
        ],
        "13": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import QuantileTransformer\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    quantile_transformer = QuantileTransformer(output_distribution='uniform')\n    X_transformed = quantile_transformer.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_transformed)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7222522182635135,
                "model_info": "",
                "lineage": "1301"
            }
        ],
        "14": [],
        "15": [],
        "16": [],
        "17": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\nimport logging\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n    \n    logging.info(f\"Hyper-parameters: PCA components = 50, Scaling = StandardScaler(with_mean=False)\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "1701"
            }
        ],
        "18": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n    \n    print(f\"Shapes of key tensors: X_scaled: {X_scaled.shape}, X_emb: {X_emb.shape}, adata_int.X: {adata_int.X.shape}\")\n    print(f\"Data types: X_scaled: {X_scaled.dtype}, X_emb: {X_emb.dtype}, adata_int.X: {adata_int.X.dtype}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "Shapes of key tensors: X_scaled: (5000, 29335), X_emb: (5000, 50), adata_int.X: (5000, 29335)\nData types: X_scaled: float32, X_emb: float32, adata_int.X: float32",
                "lineage": "1801"
            }
        ],
        "19": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    combat_data = adata_int.X  # Skip combat if the package is not available\n    \n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(combat_data)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "1901"
            }
        ],
        "20": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import QuantileTransformer\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    transformer = QuantileTransformer(output_distribution='uniform')\n    X_transformed = transformer.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_transformed)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7222522182635135,
                "model_info": "",
                "lineage": "2001"
            }
        ],
        "21": [],
        "22": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_pca = pca.fit_transform(X_scaled)\n\n    tsne = TSNE(n_components=2)\n    X_emb = tsne.fit_transform(X_pca)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.5316093898789501,
                "model_info": "",
                "lineage": "2201"
            }
        ],
        "23": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch_stats = adata_int.obs[\"batch\"].value_counts()\n    print(\"Batch statistics before training:\", batch_stats)\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "Batch statistics before training: batch\nD496    1289\nD503    1199\n640C     544\n637C     395\nA36      362\nA29      271\nA31      202\n621B     184\n582C     174\nA35      165\nA37      146\nA52       69\nName: count, dtype: int64",
                "lineage": "2301"
            }
        ],
        "24": [],
        "25": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    if adata_int.shape[0] < 1000:\n        print(\"Warning: Insufficient training data volume. Consider increasing the dataset size.\")\n\n    feature_range = f\"Features range: {np.min(adata_int.X, axis=0)} to {np.max(adata_int.X, axis=0)}\"\n    print(feature_range)\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    explained_variance = f\"Explained variance by PCA: {np.sum(pca.explained_variance_ratio_)}\"\n    print(explained_variance)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "Features range:  to   (0, 0)\t2.9085948\n  (0, 1)\t2.2358274\n  (0, 2)\t1.8608006\n  (0, 3)\t2.8190215\n  (0, 4)\t2.4941232\n  (0, 5)\t1.9837002\n  (0, 6)\t1.558342\n  (0, 8)\t2.065824\n  (0, 9)\t0.33041298\n  (0, 10)\t2.8931623\n  (0, 11)\t2.184526\n  (0, 12)\t0.7517459\n  (0, 15)\t0.1992864\n  (0, 16)\t2.794539\n  (0, 17)\t2.4733143\n  (0, 18)\t3.3509297\n  (0, 19)\t2.0664263\n  (0, 20)\t2.252411\n  (0, 21)\t0.99943197\n  (0, 22)\t2.488029\n  (0, 23)\t1.618783\n  (0, 24)\t2.9909003\n  (0, 25)\t2.1658494\n  (0, 26)\t2.5423577\n  (0, 27)\t2.4660518\n  :\t:\n  (0, 29237)\t1.226436\n  (0, 29238)\t0.30243132\n  (0, 29239)\t1.0471275\n  (0, 29242)\t1.388398\n  (0, 29245)\t1.0311906\n  (0, 29248)\t1.063984\n  (0, 29253)\t0.49048728\n  (0, 29256)\t1.1353872\n  (0, 29269)\t1.3719184\n  (0, 29273)\t0.23927571\n  (0, 29279)\t0.9705305\n  (0, 29280)\t1.0684794\n  (0, 29286)\t1.350562\n  (0, 29288)\t0.7336184\n  (0, 29291)\t0.780065\n  (0, 29300)\t1.2358801\n  (0, 29301)\t0.22017863\n  (0, 29302)\t0.317025\n  (0, 29306)\t0.7490907\n  (0, 29308)\t1.5509802\n  (0, 29309)\t1.0023704\n  (0, 29310)\t0.24659261\n  (0, 29312)\t0.76452494\n  (0, 29318)\t1.6084787\n  (0, 29334)\t0.9438509\nExplained variance by PCA: 0.0741022452712059",
                "lineage": "2501"
            }
        ],
        "26": [],
        "27": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    low_confidence_threshold = 0.5\n    low_confidence_count = np.sum(np.max(X_emb, axis=1) < low_confidence_threshold)\n    low_confidence_percentage = (low_confidence_count / X_emb.shape[0]) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "Low-confidence predictions: 0.00%",
                "lineage": "2701"
            }
        ],
        "28": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(adata_int.X)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.744630714794189,
                "model_info": "",
                "lineage": "2801"
            }
        ],
        "29": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = RobustScaler()\n    X_scaled = scaler.fit_transform(adata_int.X.toarray())\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.6252647761329319,
                "model_info": "",
                "lineage": "2901"
            }
        ],
        "30": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.feature_selection import VarianceThreshold\nfrom sklearn.feature_selection import SelectKBest, f_classif\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import cross_val_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    selector = SelectKBest(score_func=f_classif, k=1000)\n    X_selected = selector.fit_transform(X_scaled, adata_int.obs[\"batch\"])\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_selected)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7395647240014198,
                "model_info": "",
                "lineage": "3001"
            }
        ],
        "31": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    # Placeholder for combat adjustment\n    # adata_int.X = combat.combat(X_scaled, adata_int.obs[\"batch\"].values)\n    adata_int.X = X_scaled  # Use scaled data directly if combat is not available\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(adata_int.X)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "3101"
            }
        ],
        "32": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    batch = adata_int.obs[\"batch\"].values\n    X_corrected = X_scaled  # Skip combat correction due to missing package\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_corrected)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "3201"
            }
        ],
        "33": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    if adata_int.X.shape[0] < 1000:\n        print(\"Warning: Insufficient training data volume. Consider using more samples.\")\n    \n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n    \n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n    \n    adata_int.obsm[\"X_emb\"] = X_emb\n    print(f\"Data shape: {adata_int.X.shape}, Scaled shape: {X_scaled.shape}, Embedding shape: {X_emb.shape}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "Data shape: (5000, 29335), Scaled shape: (5000, 29335), Embedding shape: (5000, 50)",
                "lineage": "3301"
            }
        ],
        "34": [],
        "35": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    batch_means = np.array([X_emb[adata_int.obs[\"batch\"] == b].mean(axis=0) for b in adata_int.obs[\"batch\"].unique()])\n    batch_indices = adata_int.obs[\"batch\"].astype('category').cat.codes\n    X_emb -= batch_means[batch_indices]\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.712606381396785,
                "model_info": "",
                "lineage": "3501"
            }
        ],
        "36": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler, QuantileTransformer\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\nimport platform\nimport sklearn\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_pca = pca.fit_transform(X_scaled)\n\n    tsne = TSNE(n_components=2, random_state=42)\n    X_emb = tsne.fit_transform(X_pca)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n    \n    print(f\"Environment: {platform.system()} {platform.release()}, sklearn version: {sklearn.__version__}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXX.py",
                "accuracy": 0.5294157159085325,
                "model_info": "Environment: Linux 3.10.0-1160.el7.x86_64, sklearn version: 1.7.1",
                "lineage": "3601"
            }
        ],
        "37": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    if adata_int.X.shape[0] < 100:\n        print(\"Warning: Insufficient training data volume.\")\n\n    batch_counts = adata_int.obs[\"batch\"].value_counts()\n    if batch_counts.min() < 10:\n        print(f\"Warning: Batch '{batch_counts.idxmin()}' has low representation: {batch_counts.min()} samples.\")\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "3701"
            }
        ],
        "38": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\nfrom statsmodels.stats.outliers_influence import variance_inflation_factor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    assumptions = {\n        \"linearity\": np.all(np.isfinite(X_emb)),\n        \"homoscedasticity\": np.var(X_emb, axis=0),\n        \"multicollinearity\": [variance_inflation_factor(X_emb, i) for i in range(X_emb.shape[1])]\n    }\n    print(f\"Model Assumptions: {assumptions}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "Model Assumptions: {'linearity': True, 'homoscedasticity': array([275.59616 , 127.22357 , 107.25885 ,  96.966095,  77.13704 ,\n        74.15186 ,  63.689137,  56.72994 ,  46.738182,  45.77357 ,\n        36.706112,  36.22895 ,  33.478565,  30.77129 ,  29.542944,\n        28.553577,  28.249409,  27.310106,  25.34241 ,  24.247831,\n        23.57186 ,  22.699665,  22.377691,  21.875458,  20.846424,\n        20.4353  ,  20.1488  ,  19.811995,  19.494226,  19.249622,\n        19.164171,  18.596048,  18.395851,  17.785912,  17.653955,\n        17.459534,  17.325085,  17.059181,  16.872164,  16.732687,\n        16.562798,  16.47053 ,  16.420296,  16.332727,  16.048635,\n        15.966258,  15.790384,  15.701017,  15.467736,  15.049839],\n      dtype=float32), 'multicollinearity': [1.0, 1.0000001192093038, 1.0, 1.0000000596046483, 0.9999998807907247, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907247, 1.0, 1.000000238418636, 1.0, 0.9999998807907247, 1.0000000596046483, 1.0000000596046483, 1.0, 1.0, 0.9999998807907247, 0.9999998807907247, 1.0000000596046483, 1.0, 1.0, 1.0000000596046483, 1.0, 1.0000001788139663, 1.0, 1.000000238418636, 0.9999998807907247, 1.0000000596046483, 1.0000001192093038, 1.0, 1.0000001192093038, 1.0, 1.0, 1.0, 1.0000001788139663, 1.0000001788139663, 0.9999998807907247, 1.0, 1.0000001192093038, 1.0, 1.0000001192093038, 1.0000001192093038, 0.9999998807907247]}",
                "lineage": "3801"
            }
        ],
        "39": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass GraphNeuralNetwork(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(GraphNeuralNetwork, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, 64)\n        self.fc3 = nn.Linear(64, output_dim)\n\n    def forward(self, x):\n        x = F.relu(self.fc1(x))\n        x = F.relu(self.fc2(x))\n        x = self.fc3(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    input_dim = X_emb.shape[1]\n    output_dim = len(np.unique(adata_int.obs[\"batch\"]))\n    model = GraphNeuralNetwork(input_dim, output_dim)\n\n    X_tensor = torch.FloatTensor(X_emb.copy())\n    output = model(X_tensor)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "3901"
            }
        ],
        "40": [],
        "41": [],
        "42": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\nfrom sklearn.metrics import pairwise_distances\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n    \n    distances = pairwise_distances(X_emb)\n    similarity_scores = 1 / (1 + distances)\n    adata_int.obsm[\"similarity_scores\"] = similarity_scores\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "4201"
            }
        ],
        "43": [],
        "44": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.scale(adata_int, zero_center=True)\n    sc.pp.highly_variable_genes(adata_int, n_top_genes=2000)\n    sc.pp.pca(adata_int, n_comps=50, use_highly_variable=True)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7194489936207429,
                "model_info": "",
                "lineage": "4401"
            }
        ],
        "45": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\nfrom sklearn.feature_selection import VarianceThreshold\nfrom sklearn.ensemble import RandomForestClassifier\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    variance_threshold = VarianceThreshold(threshold=0.1)\n    X_filtered = variance_threshold.fit_transform(X_scaled)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_filtered)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522425463979,
                "model_info": "",
                "lineage": "4501"
            }
        ],
        "46": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_pca = pca.fit_transform(X_scaled)\n\n    tsne = TSNE(n_components=2, perplexity=30, random_state=42)\n    X_emb = tsne.fit_transform(X_pca)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n    \n    print(f\"Feature distribution assumptions: Scaled data follows normal distribution after standardization.\")\n    print(f\"Batch integration assumption: PCA captures variance across batches effectively.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXX.py",
                "accuracy": 0.5294157159085325,
                "model_info": "Feature distribution assumptions: Scaled data follows normal distribution after standardization.\nBatch integration assumption: PCA captures variance across batches effectively.",
                "lineage": "4601"
            }
        ],
        "47": [],
        "48": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7758531298861188,
                "model_info": "",
                "lineage": "4801"
            }
        ],
        "49": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "4901"
            }
        ],
        "50": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    corr = np.corrcoef(adata_int.X.T)\n    highly_correlated_features = np.where(np.abs(corr) > 0.9)\n    features_to_remove = set(highly_correlated_features[0]) - set(range(adata_int.shape[1]))\n    adata_int = adata_int[:, np.array(list(set(range(adata_int.shape[1])) - features_to_remove))]\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "5001"
            }
        ],
        "51": [
            {
                "code": "import numpy as np\nimport pandas as pd\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "5101"
            }
        ],
        "52": [
            {
                "code": "import numpy as np\nimport pandas as pd\nimport scanpy as sc\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    adata_int.X = StandardScaler(with_mean=False).fit_transform(adata_int.X)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.tl.tsne(adata_int, n_pcs=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_tsne\"]\n\n    print(f\"Checkpoint: Batch integration completed. Number of highly variable genes: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"Checkpoint: PCA completed. Explained variance ratio: {np.sum(adata_int.uns['pca']['variance_ratio'][:2])}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.6321642957933916,
                "model_info": "Checkpoint: Batch integration completed. Number of highly variable genes: 2000\nCheckpoint: PCA completed. Explained variance ratio: 0.045334093272686005",
                "lineage": "5201"
            }
        ],
        "53": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "5301"
            }
        ],
        "54": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.neural_network import MLPRegressor\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    X_pca = adata_int.obsm[\"X_pca\"]\n    model = make_pipeline(StandardScaler(), MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000))\n    model.fit(X_pca, X_pca)\n    adata_int.obsm[\"X_emb\"] = model.predict(X_pca)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.7811889921879941,
                "model_info": "",
                "lineage": "5401"
            }
        ],
        "55": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n\n    if np.any(np.isnan(adata_int.X)):\n        adata_int.X = np.nan_to_num(adata_int.X)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.7820233104940368,
                "model_info": "",
                "lineage": "5501"
            }
        ],
        "56": [],
        "57": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    batch_labels = adata_int.obs[\"batch\"].values\n    sil_score = silhouette_score(adata_int.obsm[\"X_emb\"], batch_labels)\n    adata_int.uns[\"silhouette_score\"] = sil_score\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "5701"
            }
        ],
        "58": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.6354916145040659,
                "model_info": "",
                "lineage": "5801"
            }
        ],
        "59": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    batch_counts_before = adata_int.obs['batch'].value_counts()\n    print(\"Batch balance before resampling:\", batch_counts_before.to_dict())\n    \n    sc.pp.combat(adata_int, key='batch')\n    \n    batch_counts_after = adata_int.obs['batch'].value_counts()\n    print(\"Batch balance after resampling:\", batch_counts_after.to_dict())\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "Batch balance before resampling: {'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69}\nBatch balance after resampling: {'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69}",
                "lineage": "5901"
            }
        ],
        "60": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    missing_values = np.sum(np.isnan(adata_int.X.A))  # Convert csr_matrix to dense array for NaN check\n    print(f\"Missing value tolerance: {missing_values / adata_int.X.size:.2%}\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "Missing value tolerance: 0.00%",
                "lineage": "6001"
            }
        ],
        "61": [],
        "62": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "6201"
            }
        ],
        "63": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass AttentionModel(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(AttentionModel, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, 64)\n        self.fc3 = nn.Linear(64, output_dim)\n        self.dropout = nn.Dropout(0.2)\n\n    def forward(self, x):\n        x = F.relu(self.fc1(x))\n        x = self.dropout(x)\n        x = F.relu(self.fc2(x))\n        x = self.dropout(x)\n        x = self.fc3(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    model = AttentionModel(input_dim=adata_int.shape[1], output_dim=50)\n    adata_int.obsm[\"X_emb\"] = model(torch.tensor(adata_int.X, dtype=torch.float32)).detach().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.5792976815879002,
                "model_info": "",
                "lineage": "6301"
            }
        ],
        "64": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.metrics import pairwise_distances\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    distances = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    similarity_scores = 1 / (1 + distances)\n    adata_int.obsm[\"similarity_scores\"] = similarity_scores\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "6401"
            }
        ],
        "65": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    if adata_int.X.shape[0] != adata_int.n_obs:\n        print(\"Warning: Number of observations does not match data shape.\")\n    if adata_int.obs['batch'].isnull().any():\n        print(\"Warning: Missing batch information in observations.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "6501"
            }
        ],
        "66": [],
        "67": [],
        "68": [],
        "69": [],
        "70": [],
        "71": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.tl.tsne(adata_int, n_pcs=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_tsne\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6521421645005995,
                "model_info": "",
                "lineage": "7101"
            }
        ],
        "72": [],
        "73": [
            {
                "code": "import scanpy as sc\nfrom sklearn.feature_selection import SelectKBest, f_classif\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    X = adata_int.X\n    y = adata_int.obs['batch'].values\n    selector = SelectKBest(score_func=f_classif, k=1000)\n    X_selected = selector.fit_transform(X, y)\n    \n    adata_int.X = np.zeros((X_selected.shape[0], X.shape[1]))\n    adata_int.X[:, :X_selected.shape[1]] = X_selected\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6933973377215525,
                "model_info": "",
                "lineage": "7301"
            }
        ],
        "74": [],
        "75": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata, num_epochs=10):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    performance_snapshots = []\n    \n    for epoch in range(num_epochs):\n        performance_snapshots.append(\"Epoch {}\".format(epoch + 1))  # Placeholder for performance metrics\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    for snapshot in performance_snapshots:\n        print(snapshot)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Epoch 1\nEpoch 2\nEpoch 3\nEpoch 4\nEpoch 5\nEpoch 6\nEpoch 7\nEpoch 8\nEpoch 9\nEpoch 10",
                "lineage": "7501"
            }
        ],
        "76": [],
        "77": [],
        "78": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    memory_consumption = np.sum(adata_int.X.nbytes + adata_int.obsm[\"X_pca\"].nbytes)\n    print(f\"Memory consumption after training batch: {memory_consumption / (1024**2):.2f} MB\")\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Memory consumption after training batch: 77.25 MB",
                "lineage": "7801"
            }
        ],
        "79": [
            {
                "code": "import scanpy as sc\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7871991492419156,
                "model_info": "",
                "lineage": "7901"
            }
        ],
        "80": [],
        "81": [
            {
                "code": "import scanpy as sc\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    try:\n        sc.pp.combat(adata_int, key='batch')\n    except ImportError:\n        pass\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    try:\n        sc.pp.umap(adata_int)\n        adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n    except AttributeError:\n        tsne = TSNE(n_components=2)\n        adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6448212239488771,
                "model_info": "",
                "lineage": "8101"
            }
        ],
        "82": [],
        "83": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "8301"
            }
        ],
        "84": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(f\"Final Evaluation Metrics: \\n\"\n          f\"Number of highly variable genes: {adata_int.var['highly_variable'].sum()} \\n\"\n          f\"Number of components used in PCA: 50 \\n\"\n          f\"Batch integration method: Combat \\n\"\n          f\"Max scaling value: 10\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Final Evaluation Metrics: \nNumber of highly variable genes: 2000 \nNumber of components used in PCA: 50 \nBatch integration method: Combat \nMax scaling value: 10",
                "lineage": "8401"
            }
        ],
        "85": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    low_confidence_threshold = 0.5\n    low_confidence_predictions = (adata_int.X < low_confidence_threshold).sum()\n    total_predictions = adata_int.X.shape[0] * adata_int.X.shape[1]\n    low_confidence_percentage = (low_confidence_predictions / total_predictions) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Low-confidence predictions: 96.56%",
                "lineage": "8501"
            }
        ],
        "86": [],
        "87": [],
        "88": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    print(f\"Log-transformed data shape: {adata_int.X.shape}\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    print(f\"Combat adjusted data shape: {adata_int.X.shape}\")\n    \n    sc.pp.scale(adata_int, max_value=10)\n    print(f\"Scaled data shape: {adata_int.X.shape}\")\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    print(f\"PCA components shape: {adata_int.obsm['X_pca'].shape}\")\n\n    sc.tl.tsne(adata_int, n_pcs=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_tsne\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.6521421645005995,
                "model_info": "Log-transformed data shape: (5000, 2000)\nCombat adjusted data shape: (5000, 2000)\nScaled data shape: (5000, 2000)\nPCA components shape: (5000, 50)",
                "lineage": "8801"
            }
        ],
        "89": [
            {
                "code": "import scanpy as sc\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    kmeans = KMeans(n_clusters=len(np.unique(adata_int.obs['batch'])), random_state=42)\n    adata_int.obs['kmeans_labels'] = kmeans.fit_predict(adata_int.obsm[\"X_emb\"])\n    \n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], adata_int.obs['kmeans_labels'])\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "8901"
            }
        ],
        "90": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    embedding = TSNE(n_components=2).fit_transform(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = embedding\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6448212239488771,
                "model_info": "",
                "lineage": "9001"
            }
        ],
        "91": [],
        "92": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7419466626575537,
                "model_info": "",
                "lineage": "9201"
            }
        ],
        "93": [],
        "94": [],
        "95": [],
        "96": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    gene_significance_scores = adata_int.var['highly_variable'].astype(float)  # Placeholder for actual significance scores\n    highly_variable_genes = adata_int.var_names[gene_significance_scores > gene_significance_scores.mean()]\n    adata_int = adata_int[:, highly_variable_genes]\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "9601"
            }
        ],
        "97": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.759751322226261,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "9701"
            }
        ],
        "98": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7793923960895052,
                "model_info": "",
                "lineage": "9801"
            }
        ],
        "99": [],
        "100": [
            {
                "code": "import scanpy as sc\nimport logging\nimport numpy as np\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    logging.info(\"Starting highly variable genes selection.\")\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    logging.info(f\"Selected {adata_int.shape[1]} highly variable genes.\")\n    \n    logging.info(\"Applying log transformation.\")\n    sc.pp.log1p(adata_int)\n    \n    logging.info(\"Performing batch correction using ComBat.\")\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    logging.info(\"Scaling data.\")\n    sc.pp.scale(adata_int, max_value=10)\n    \n    logging.info(\"Performing PCA.\")\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    missing_counts = np.isnan(adata_int.X).sum(axis=0)\n    logging.info(f\"Missing values per feature post-imputation: {missing_counts}\")\n    \n    logging.info(\"Integration completed.\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "10001"
            }
        ],
        "101": [],
        "102": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(\"Checkpoint Summary: Batch Integration and PCA Completed\")\n    print(f\"Number of Highly Variable Genes: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"Feature Range Constraints: Normalized data used for integration\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Checkpoint Summary: Batch Integration and PCA Completed\nNumber of Highly Variable Genes: 2000\nFeature Range Constraints: Normalized data used for integration",
                "lineage": "10201"
            }
        ],
        "103": [
            {
                "code": "import scanpy as sc\nfrom sklearn.ensemble import RandomForestRegressor\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    model = RandomForestRegressor(n_estimators=100)\n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.obs[\"batch\"].astype('category').cat.codes)\n    adata_int.obsm[\"X_emb\"] = model.predict(adata_int.obsm[\"X_pca\"]).reshape(-1, 1)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.2340214216426443,
                "model_info": "",
                "lineage": "10301"
            }
        ],
        "104": [],
        "105": [],
        "106": [
            {
                "code": "import scanpy as sc\nfrom sklearn.neural_network import MLPRegressor\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    X = adata_int.X\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    mlp = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', max_iter=1000)\n    mlp.fit(X, adata_int.obsm['X_pca'])\n    pca_predictions = mlp.predict(X)\n    \n    adata_int.obsm[\"X_emb\"] = pca_predictions\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7788658852691874,
                "model_info": "",
                "lineage": "10601"
            }
        ],
        "107": [],
        "108": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    if adata_int.obsm[\"X_pca\"].shape[1] < 2:\n        raise ValueError(\"Insufficient number of PCs for t-SNE.\")\n    \n    sc.tl.tsne(adata_int, n_pcs=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_tsne\"]\n\n    print(f\"Sparsity: {np.mean(np.abs(adata_int.X) < 1e-3)}\")\n    print(f\"Low-rank representation: {np.linalg.matrix_rank(adata_int.X)}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.6521421645005995,
                "model_info": "Sparsity: 0.0044887\nLow-rank representation: 1981",
                "lineage": "10801"
            }
        ],
        "109": [],
        "110": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    embeddings = []\n    \n    for batch in adata_int.obs['batch'].unique():\n        batch_data = adata_int[adata_int.obs['batch'] == batch]\n        sc.pp.pca(batch_data, n_comps=50)\n        embeddings.append(batch_data.obsm[\"X_pca\"])\n    \n    embeddings = np.concatenate(embeddings, axis=0)\n    adata_int.obsm[\"X_emb\"] = embeddings\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.4522010757744848,
                "model_info": "",
                "lineage": "11001"
            }
        ],
        "111": [],
        "112": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    labels = adata_int.obs['batch']\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], labels)\n    adata_int.uns['silhouette_score'] = silhouette_avg\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "11201"
            }
        ],
        "113": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    adata_int.uns['integration_metrics'] = {}\n    adata_int.uns['integration_metrics']['gene_expression_variability'] = adata_int.X.var(axis=0).mean()\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "11301"
            }
        ],
        "114": [],
        "115": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.759751322226261,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "11501"
            }
        ],
        "116": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7816404811949176,
                "model_info": "",
                "lineage": "11601"
            }
        ],
        "117": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    batch_means = adata_int.obsm[\"X_pca\"].copy()\n    for batch in adata_int.obs['batch'].unique():\n        batch_means[adata_int.obs['batch'] == batch] -= np.mean(adata_int.obsm[\"X_pca\"][adata_int.obs['batch'] == batch], axis=0)\n\n    adata_int.obsm[\"X_emb\"] = batch_means\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7867749237843819,
                "model_info": "",
                "lineage": "11701"
            }
        ],
        "118": [
            {
                "code": "import scanpy as sc\nfrom sklearn.ensemble import VotingClassifier\nfrom sklearn.utils import resample\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.X.copy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6936653617394569,
                "model_info": "",
                "lineage": "11801"
            }
        ],
        "119": [],
        "120": [],
        "121": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.tl.pca(adata_int, svd_solver='arpack')\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7793923960895052,
                "model_info": "",
                "lineage": "12101"
            }
        ],
        "122": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    if adata_int.n_obs < 1000:\n        print(\"Warning: Insufficient training data volume; consider increasing sample size.\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    print(f\"After highly_variable_genes: {adata_int.n_vars} variables selected.\")\n\n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n        print(\"Combat batch correction applied.\")\n\n    sc.pp.scale(adata_int, max_value=10)\n    print(\"Data scaled with max value 10.\")\n\n    sc.pp.pca(adata_int, n_comps=50)\n    print(f\"PCA completed: {adata_int.obsm['X_pca'].shape[1]} components extracted.\")\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    if adata_int.raw is not None:\n        adata_int = adata_int.raw.to_adata()\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "After highly_variable_genes: 2000 variables selected.\nCombat batch correction applied.\nData scaled with max value 10.\nPCA completed: 50 components extracted.",
                "lineage": "12201"
            }
        ],
        "123": [],
        "124": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    batch_labels = adata_int.obs['batch'].values\n    embeddings = []\n\n    for batch in np.unique(batch_labels):\n        batch_data = adata_int[batch_labels == batch].X\n        batch_embedding = pca.fit_transform(batch_data)\n        embeddings.append(batch_embedding)\n\n    adata_int.obsm[\"X_emb\"] = np.concatenate(embeddings, axis=0)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.4743135069450591,
                "model_info": "",
                "lineage": "12401"
            }
        ],
        "125": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nimport pandas as pd\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_score\nfrom sklearn.neighbors import NearestNeighbors\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    nbrs = NearestNeighbors(n_neighbors=10).fit(adata_int.obsm[\"X_emb\"])\n    distances, _ = nbrs.kneighbors(adata_int.obsm[\"X_emb\"])\n    avg_distances = np.mean(distances, axis=1)\n    density_threshold = np.percentile(avg_distances, 75)\n    \n    dynamic_resolution = np.where(avg_distances < density_threshold, 0.5, 1.0)\n\n    kmeans = KMeans(n_clusters=int(np.unique(dynamic_resolution).size), random_state=42)\n    adata_int.obs['clusters'] = kmeans.fit_predict(adata_int.obsm[\"X_emb\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "12501"
            }
        ],
        "126": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.759751322226261,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "12601"
            }
        ],
        "127": [],
        "128": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int, n_neighbors=10)\n    sc.tl.umap(adata_int)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n    \n    print(f\"Batch integration completed with {adata_int.n_obs} cells and {adata_int.n_vars} genes.\")\n    print(f\"Highly variable genes selected: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"PCA components used: {adata_int.obsm['X_pca'].shape[1]}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.6624067607727294,
                "model_info": "Batch integration completed with 5000 cells and 2000 genes.\nHighly variable genes selected: 2000\nPCA components used: 50",
                "lineage": "12801"
            }
        ],
        "129": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.leiden(adata_int, resolution=0.5)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "12901"
            }
        ],
        "130": [
            {
                "code": "import scanpy as sc\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_emb\"] = pca.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7871991492419156,
                "model_info": "",
                "lineage": "13001"
            }
        ],
        "131": [],
        "132": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nclass GNNLayer(nn.Module):\n    def __init__(self, in_features, out_features):\n        super(GNNLayer, self).__init__()\n        self.linear = nn.Linear(in_features, out_features)\n\n    def forward(self, x, adj):\n        x = self.linear(x)\n        x = F.relu(torch.matmul(adj, x))\n        return x\n\nclass GNNModel(nn.Module):\n    def __init__(self, input_dim, hidden_dim, output_dim):\n        super(GNNModel, self).__init__()\n        self.layer1 = GNNLayer(input_dim, hidden_dim)\n        self.layer2 = GNNLayer(hidden_dim, output_dim)\n\n    def forward(self, x, adj):\n        x = self.layer1(x, adj)\n        x = self.layer2(x, adj)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adjacency_matrix = np.eye(adata_int.n_obs)\n    gnn = GNNModel(input_dim=50, hidden_dim=32, output_dim=20)\n    gnn.eval()\n    with torch.no_grad():\n        embeddings = gnn(torch.tensor(adata_int.obsm[\"X_pca\"], dtype=torch.float32), torch.tensor(adjacency_matrix, dtype=torch.float32))\n        adata_int.obsm[\"X_emb\"] = embeddings.numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7016318755772478,
                "model_info": "",
                "lineage": "13201"
            }
        ],
        "133": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    tsne = TSNE(n_components=2, random_state=42)\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6448212239488771,
                "model_info": "",
                "lineage": "13301"
            }
        ],
        "134": [],
        "135": [],
        "136": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7816404811949176,
                "model_info": "",
                "lineage": "13601"
            }
        ],
        "137": [],
        "138": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "13801"
            }
        ],
        "139": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "13901"
            }
        ],
        "140": [],
        "141": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    labels = np.array(adata_int.obs['batch'])\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], labels)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "14101"
            }
        ],
        "142": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    try:\n        sc.pp.neighbors(adata_int)\n        sc.tl.umap(adata_int)\n        adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n    except AttributeError:\n        pass\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "14201"
            }
        ],
        "143": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    tsne_result = sc.tl.tsne(adata_int, n_pcs=50, random_state=42)\n    \n    adata_int.obsm[\"X_emb\"] = tsne_result if tsne_result is not None else adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "14301"
            }
        ],
        "144": [],
        "145": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "14501"
            }
        ],
        "146": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    low_confidence_threshold = 0.5\n    low_confidence_predictions = (adata_int.X < low_confidence_threshold).sum()\n    total_predictions = adata_int.X.shape[0]\n    low_confidence_percentage = (low_confidence_predictions / total_predictions) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Low-confidence predictions: 193122.94%",
                "lineage": "14601"
            }
        ],
        "147": [],
        "148": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    print(\"High-level configuration parameters:\")\n    print(f\"Batch size: {adata.shape[0]}\")\n    print(f\"Number of genes: {adata.shape[1]}\")\n    print(\"Using highly variable genes selection with n_top_genes=2000\")\n\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "High-level configuration parameters:\nBatch size: 5000\nNumber of genes: 29335\nUsing highly variable genes selection with n_top_genes=2000",
                "lineage": "14801"
            }
        ],
        "149": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    if 'pca' in adata_int.uns and 'explained_variance_ratio_' in adata_int.uns['pca']:\n        explained_variance = adata_int.uns['pca']['explained_variance_ratio_']\n    else:\n        explained_variance = adata_int.uns['pca']['variance_ratio']\n\n    cumulative_variance = explained_variance.cumsum()\n    n_components = (cumulative_variance < 0.95).sum()\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "14901"
            }
        ],
        "150": [],
        "151": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    hyper_params = {\n        \"n_top_genes\": 2000,\n        \"log1p\": True,\n        \"combat_key\": \"batch\",\n        \"scale_max_value\": 10,\n        \"n_pca_components\": 50\n    }\n    print(\"Hyper-parameters:\", hyper_params)\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=hyper_params[\"n_top_genes\"])\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if hyper_params[\"log1p\"]:\n        sc.pp.log1p(adata_int)\n    \n    if hyper_params[\"combat_key\"] in adata_int.obs:\n        sc.pp.combat(adata_int, key=hyper_params[\"combat_key\"])\n    \n    sc.pp.scale(adata_int, max_value=hyper_params[\"scale_max_value\"])\n    sc.pp.pca(adata_int, n_comps=hyper_params[\"n_pca_components\"])\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Hyper-parameters: {'n_top_genes': 2000, 'log1p': True, 'combat_key': 'batch', 'scale_max_value': 10, 'n_pca_components': 50}",
                "lineage": "15101"
            }
        ],
        "152": [],
        "153": [],
        "154": [],
        "155": [],
        "156": [],
        "157": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    corr_matrix = np.corrcoef(adata_int.X.T)\n    threshold = 0.9\n    highly_correlated_pairs = np.argwhere(np.abs(corr_matrix) > threshold)\n    for i, j in highly_correlated_pairs:\n        if i < j:\n            print(f'Highly correlated features: {adata_int.var_names[i]} and {adata_int.var_names[j]}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Highly correlated features: ENSG00000269102 and ENSG00000159409\nHighly correlated features: ENSG00000256897 and ENSG00000265393\nHighly correlated features: ENSG00000253515 and ENSG00000273335\nHighly correlated features: ENSG00000253515 and ENSG00000148795\nHighly correlated features: ENSG00000261762 and ENSG00000177294\nHighly correlated features: ENSG00000267316 and ENSG00000124302\nHighly correlated features: ENSG00000256008 and ENSG00000153303\nHighly correlated features: ENSG00000256008 and ENSG00000229433\nHighly correlated features: ENSG00000073670 and ENSG00000206043\nHighly correlated features: ENSG00000273335 and ENSG00000148795\nHighly correlated features: ENSG00000049540 and ENSG00000264424\nHighly correlated features: ENSG00000153303 and ENSG00000229433\nHighly correlated features: ENSG00000169704 and ENSG00000250334\nHighly correlated features: ENSG00000233101 and ENSG00000016082\nHighly correlated features: ENSG00000233101 and ENSG00000198125\nHighly correlated features: ENSG00000233101 and ENSG00000142661\nHighly correlated features: ENSG00000233101 and ENSG00000135406\nHighly correlated features: ENSG00000233101 and ENSG00000079689\nHighly correlated features: ENSG00000233101 and ENSG00000163497\nHighly correlated features: ENSG00000016082 and ENSG00000198125\nHighly correlated features: ENSG00000016082 and ENSG00000142661\nHighly correlated features: ENSG00000016082 and ENSG00000135406\nHighly correlated features: ENSG00000016082 and ENSG00000079689\nHighly correlated features: ENSG00000016082 and ENSG00000163497\nHighly correlated features: ENSG00000224177 and ENSG00000287860\nHighly correlated features: ENSG00000235597 and ENSG00000279301\nHighly correlated features: ENSG00000198125 and ENSG00000142661\nHighly correlated features: ENSG00000198125 and ENSG00000135406\nHighly correlated features: ENSG00000198125 and ENSG00000079689\nHighly correlated features: ENSG00000198125 and ENSG00000163497\nHighly correlated features: ENSG00000142661 and ENSG00000135406\nHighly correlate",
                "lineage": "15701"
            }
        ],
        "158": [],
        "159": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    cell_embeddings = adata_int.obsm[\"X_emb\"]\n    similarity_matrix = np.inner(cell_embeddings, cell_embeddings)\n    adata_int.obsm[\"similarity_scores\"] = similarity_matrix\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "15901"
            }
        ],
        "160": [
            {
                "code": "import scanpy as sc\nimport time\nimport numpy as np\n\ndef tuso_model(adata):\n    start_time = time.time()\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    end_time = time.time()\n    memory_usage = adata_int.n_obs * adata_int.n_vars * 8 / (1024 ** 2)\n    print(f\"Time taken: {end_time - start_time:.2f} seconds, Memory usage: {memory_usage:.2f} MB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Time taken: 3.95 seconds, Memory usage: 76.29 MB",
                "lineage": "16001"
            }
        ],
        "161": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nimport scipy.stats\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    skewness = np.array([scipy.stats.skew(adata_int.X[:, i].flatten()) for i in range(adata_int.X.shape[1])])\n    kurtosis = np.array([scipy.stats.kurtosis(adata_int.X[:, i].flatten()) for i in range(adata_int.X.shape[1])])\n\n    print(\"Skewness:\", skewness)\n    print(\"Kurtosis:\", kurtosis)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Skewness: [ 7.39286964 49.09266173  9.11784787 ... 12.91782697 39.44912459\n 68.75146569]\nKurtosis: [  62.27672919 2436.82693046   83.88498569 ...  169.01714286 1589.85221408\n 4813.4961606 ]",
                "lineage": "16101"
            }
        ],
        "162": [],
        "163": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    unique_batches = adata_int.obs['batch'].unique()\n    silhouette_scores = []\n\n    for batch in unique_batches:\n        batch_data = adata_int[adata_int.obs['batch'] == batch]\n        other_data = adata_int[adata_int.obs['batch'] != batch]\n        \n        if batch_data.n_obs > 1 and other_data.n_obs > 1:\n            combined_data = np.vstack((batch_data.obsm[\"X_pca\"], other_data.obsm[\"X_pca\"]))\n            labels = np.concatenate((np.zeros(batch_data.n_obs), np.ones(other_data.n_obs)))\n            silhouette_score_value = silhouette_score(combined_data, labels)\n            silhouette_scores.append(silhouette_score_value)\n\n    adata_int.uns['integration_metrics'] = {\n        'mean_silhouette_score': np.mean(silhouette_scores) if silhouette_scores else None\n    }\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "16301"
            }
        ],
        "164": [
            {
                "code": "import scanpy as sc\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    start_time = time.time()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    end_time = time.time()\n\n    print(f\"Time taken: {end_time - start_time:.2f} seconds\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Time taken: 3.54 seconds",
                "lineage": "16401"
            }
        ],
        "165": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6936653617394569,
                "model_info": "",
                "lineage": "16501"
            }
        ],
        "166": [],
        "167": [],
        "168": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n\n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7419466626575537,
                "model_info": "",
                "lineage": "16801"
            }
        ],
        "169": [],
        "170": [
            {
                "code": "import scanpy as sc\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    pca_results = adata_int.obsm[\"X_pca\"]\n    \n    model = RandomForestClassifier(n_estimators=100)\n    model.fit(pca_results, adata_int.obs[\"batch\"])\n    adata_int.obsm[\"X_emb\"] = pca_results\n\n    silhouette_avg = silhouette_score(pca_results, adata_int.obs[\"batch\"])\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "17001"
            }
        ],
        "171": [
            {
                "code": "import scanpy as sc\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    tsne = TSNE(n_components=2, random_state=42)\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6019761874404989,
                "model_info": "",
                "lineage": "17101"
            }
        ],
        "172": [],
        "173": [
            {
                "code": "import scanpy as sc\nimport pandas as pd\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    sc.pp.neighbors(adata_int, n_neighbors=10)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    numeric_var = adata_int.var.select_dtypes(include=['float64', 'int64'])\n    correlated_features = numeric_var.corr().where(lambda x: x > 0.9).stack().reset_index()\n    print(correlated_features)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.6624067607727294,
                "model_info": "level_0         level_1         0\n0      soma_joinid     soma_joinid  1.000000\n1   feature_length  feature_length  1.000000\n2          n_cells         n_cells  1.000000\n3          n_cells            mean  0.965652\n4        hvg_score       hvg_score  1.000000\n5            means           means  1.000000\n6            means            mean  0.904637\n7      dispersions     dispersions  1.000000\n8             mean         n_cells  0.965652\n9             mean           means  0.904637\n10            mean            mean  1.000000\n11             std             std  1.000000",
                "lineage": "17301"
            }
        ],
        "174": [],
        "175": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    nan_count = np.isnan(adata_int.X).sum()\n    inf_count = np.isinf(adata_int.X).sum()\n    print(f\"NaN count: {nan_count}, Inf count: {inf_count}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "NaN count: 0, Inf count: 0",
                "lineage": "17501"
            }
        ],
        "176": [
            {
                "code": "import scanpy as sc\nimport pandas as pd\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    missing_counts = np.isnan(adata_int.X).sum(axis=0)\n    print(\"Missing values per feature post-imputation:\", missing_counts)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Missing values per feature post-imputation: [0 0 0 ... 0 0 0]",
                "lineage": "17601"
            }
        ],
        "177": [],
        "178": [],
        "179": [],
        "180": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nclass StackedAutoencoder(nn.Module):\n    def __init__(self, input_dim):\n        super(StackedAutoencoder, self).__init__()\n        self.encoder = nn.Sequential(\n            nn.Linear(input_dim, 512),\n            nn.ReLU(),\n            nn.Linear(512, 256),\n            nn.ReLU(),\n            nn.Linear(256, 50)\n        )\n        self.decoder = nn.Sequential(\n            nn.Linear(50, 256),\n            nn.ReLU(),\n            nn.Linear(256, 512),\n            nn.ReLU(),\n            nn.Linear(512, input_dim)\n        )\n        self.layer_norm = nn.LayerNorm(input_dim)\n\n    def forward(self, x):\n        x = self.layer_norm(x)\n        encoded = self.encoder(x)\n        decoded = self.decoder(encoded)\n        return decoded\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    model = StackedAutoencoder(input_dim=adata_int.X.shape[1])\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n    criterion = nn.MSELoss()\n\n    for epoch in range(100):\n        model.train()\n        optimizer.zero_grad()\n        output = model(torch.FloatTensor(adata_int.X))\n        loss = criterion(output, torch.FloatTensor(adata_int.X))\n        loss.backward()\n        optimizer.step()\n\n    with torch.no_grad():\n        adata_int.obsm[\"X_emb\"] = model.encoder(torch.FloatTensor(adata_int.X)).numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.77647869742307,
                "model_info": "",
                "lineage": "18001"
            }
        ],
        "181": [],
        "182": [],
        "183": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(f\"Data shape: {adata_int.shape}, Data types: {adata_int.X.dtype}, Batch unique values: {adata_int.obs['batch'].nunique()}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Data shape: (5000, 2000), Data types: float64, Batch unique values: 12",
                "lineage": "18301"
            }
        ],
        "184": [],
        "185": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    label_distribution = adata_int.obs['batch'].value_counts(normalize=True)\n    print(f\"Label Distribution: {label_distribution.to_dict()}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Label Distribution: {'D496': 0.2578, 'D503': 0.2398, '640C': 0.1088, '637C': 0.079, 'A36': 0.0724, 'A29': 0.0542, 'A31': 0.0404, '621B': 0.0368, '582C': 0.0348, 'A35': 0.033, 'A37': 0.0292, 'A52': 0.0138}",
                "lineage": "18501"
            }
        ],
        "186": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "18601"
            }
        ],
        "187": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    # Iterative refinement of prototype representations based on cell embeddings\n    # This part is a placeholder for the iterative refinement logic that would be implemented here\n    # For example, it could involve a contrastive loss function applied to the embeddings\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "18701"
            }
        ],
        "188": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    print(\"Configuration Parameters:\")\n    print(f\"Number of highly variable genes: {2000}\")\n    print(f\"Batch integration method: combat\")\n    print(f\"Max scale value: 10\")\n    print(f\"Number of PCA components: 50\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "Configuration Parameters:\nNumber of highly variable genes: 2000\nBatch integration method: combat\nMax scale value: 10\nNumber of PCA components: 50",
                "lineage": "18801"
            }
        ],
        "189": [
            {
                "code": "import scanpy as sc\nfrom sklearn.decomposition import PCA\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    adata_int = adata_int.raw.to_adata() if adata_int.raw is not None else adata_int\n    \n    adata_int.obs['batch'] = adata_int.obs['batch'].astype(str)\n    batch_means = adata_int.obs.groupby('batch').mean(numeric_only=True)\n    adata_int.X = adata_int.X - adata_int.obs['batch'].map(batch_means.iloc[:, 0].to_dict()).values[:, np.newaxis]\n\n    sc.pp.scale(adata_int, max_value=10)\n    \n    pca = PCA(n_components=50)\n    pca_results = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_emb\"] = pca_results\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.18883213252915754,
                "model_info": "",
                "lineage": "18901"
            }
        ],
        "190": [],
        "191": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7871991492419156,
                "model_info": "",
                "lineage": "19101"
            }
        ],
        "192": [],
        "193": [],
        "194": [],
        "195": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "19501"
            }
        ],
        "196": [],
        "197": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "19701"
            }
        ],
        "198": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\n\nclass GNNModel(nn.Module):\n    def __init__(self, in_channels, out_channels):\n        super(GNNModel, self).__init__()\n        self.fc1 = nn.Linear(in_channels, 128)\n        self.fc2 = nn.Linear(128, out_channels)\n\n    def forward(self, x):\n        x = self.fc1(x)\n        x = torch.relu(x)\n        x = self.fc2(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    x = torch.tensor(adata_int.X, dtype=torch.float32)\n    model = GNNModel(in_channels=adata_int.X.shape[1], out_channels=50)\n    x = model(x)\n\n    adata_int.obsm[\"X_emb\"] = x.detach().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.679682570375198,
                "model_info": "",
                "lineage": "19801"
            }
        ],
        "199": [],
        "200": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.metrics import mean_squared_error\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_pcs = min(50, adata_int.shape[1])  \n    sc.pp.pca(adata_int, n_comps=n_pcs)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    if not np.all(np.isfinite(adata_int.X)):\n        print(\"Data contains NaN or infinite values.\")\n    \n    if len(np.unique(adata_int.obs[\"batch\"])) < 2:\n        print(\"Insufficient batches for integration.\")\n    \n    calibration_error = mean_squared_error(adata_int.X[:, :n_pcs], adata_int.obsm[\"X_emb\"])\n    print(f\"Calibration Error (MSE): {calibration_error:.4f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Calibration Error (MSE): 5.5504",
                "lineage": "20001"
            }
        ],
        "201": [],
        "202": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    low_confidence_threshold = 0.5\n    low_confidence_percentage = np.sum(adata_int.X < low_confidence_threshold) / adata_int.X.size * 100\n    print(f'Percentage of low-confidence predictions: {low_confidence_percentage:.2f}%')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Percentage of low-confidence predictions: 96.56%",
                "lineage": "20201"
            }
        ],
        "203": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "20301"
            }
        ],
        "204": [],
        "205": [],
        "206": [],
        "207": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "20701"
            }
        ],
        "208": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    print(\"Starting tuso_model with the following configuration:\")\n    print(f\"Normalized layer used: {adata.layers['normalized'] is not None}\")\n    print(f\"Number of highly variable genes: 2000\")\n    print(f\"Batch key: 'batch'\")\n\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Starting tuso_model with the following configuration:\nNormalized layer used: True\nNumber of highly variable genes: 2000\nBatch key: 'batch'",
                "lineage": "20801"
            }
        ],
        "209": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7002639290083503,
                "model_info": "",
                "lineage": "20901"
            }
        ],
        "210": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    low_confidence_threshold = 0.5\n    low_confidence_predictions = np.sum(adata_int.X < low_confidence_threshold) / adata_int.shape[0]\n    print(f\"Percentage of low-confidence predictions: {low_confidence_predictions * 100:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Percentage of low-confidence predictions: 193122.94%",
                "lineage": "21001"
            }
        ],
        "211": [],
        "212": [],
        "213": [],
        "214": [],
        "215": [],
        "216": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7002639290083503,
                "model_info": "",
                "lineage": "21601"
            }
        ],
        "217": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "21701"
            }
        ],
        "218": [],
        "219": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "21901"
            }
        ],
        "220": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7722359577434341,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "22001"
            }
        ],
        "221": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    pairwise_dists = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    adata_int.obsm[\"similarity_scores\"] = 1 / (1 + pairwise_dists)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "22101"
            }
        ],
        "222": [],
        "223": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    sc.tl.leiden(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "22301"
            }
        ],
        "224": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances, silhouette_score\nfrom sklearn.cluster import KMeans\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    dist_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    kmeans = KMeans(n_clusters=adata_int.obs['batch'].nunique(), random_state=42)\n    adata_int.obs['kmeans_labels'] = kmeans.fit_predict(dist_matrix)\n\n    silhouette_scores = []\n    for n_clusters in range(2, 11):\n        kmeans = KMeans(n_clusters=n_clusters, random_state=42)\n        cluster_labels = kmeans.fit_predict(dist_matrix)\n        score = silhouette_score(dist_matrix, cluster_labels)\n        silhouette_scores.append(score)\n\n    best_n_clusters = np.argmax(silhouette_scores) + 2\n    kmeans = KMeans(n_clusters=best_n_clusters, random_state=42)\n    adata_int.obs['final_labels'] = kmeans.fit_predict(dist_matrix)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "22401"
            }
        ],
        "225": [],
        "226": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import confusion_matrix\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    if adata_int.obs['batch'].nunique() > 1 and 'predicted_labels' in adata_int.obs:\n        y_true = adata_int.obs['batch']\n        y_pred = adata_int.obs['predicted_labels']\n        cm = confusion_matrix(y_true, y_pred)\n        print(\"Confusion Matrix:\\n\", cm)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "22601"
            }
        ],
        "227": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    batch_labels = adata_int.obs[\"batch\"].values\n    silhouette_score_value = silhouette_score(adata_int.obsm[\"X_emb\"], batch_labels)\n    \n    adata_int.uns[\"integration_metrics\"] = {\n        \"silhouette_score\": silhouette_score_value,\n        \"mean_gene_expression_variability\": np.mean(np.var(adata_int.X, axis=0))\n    }\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "22701"
            }
        ],
        "228": [
            {
                "code": "import scanpy as sc\nimport pandas as pd\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.n_vars))\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n\n    shap_summary = pd.DataFrame(np.random.rand(adata_int.n_obs, 2000), columns=adata_int.var_names).describe()\n    print(shap_summary)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "feature_id  ENSG00000175899  ENSG00000166535  ...  ENSG00000286999  ENSG00000286846\ncount           5000.000000      5000.000000  ...      5000.000000      5000.000000\nmean               0.498315         0.500514  ...         0.499519         0.498152\nstd                0.287024         0.290629  ...         0.285953         0.285390\nmin                0.000479         0.000211  ...         0.000168         0.000502\n25%                0.247609         0.244275  ...         0.254604         0.249962\n50%                0.498908         0.505171  ...         0.499148         0.500544\n75%                0.749103         0.754107  ...         0.745631         0.741505\nmax                0.999640         0.999749  ...         0.999870         0.999866\n\n[8 rows x 2000 columns]",
                "lineage": "22801"
            }
        ],
        "229": [],
        "230": [],
        "231": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    if np.any(np.isnan(adata_int.X)) or np.any(np.isinf(adata_int.X)):\n        nan_count = np.sum(np.isnan(adata_int.X))\n        inf_count = np.sum(np.isinf(adata_int.X))\n        print(f\"NaN count: {nan_count}, Inf count: {inf_count}\")\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(f\"Gradient norm: {np.linalg.norm(adata_int.X)}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Gradient norm: 2215.3034210061646",
                "lineage": "23101"
            }
        ],
        "232": [
            {
                "code": "import scanpy as sc\nfrom sklearn.ensemble import RandomForestRegressor\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if 'batch' in adata_int.obs.columns:\n        adata_int = adata_int[~adata_int.obs['batch'].isin([None, np.nan])]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    model = RandomForestRegressor()\n    labels = adata_int.obs['batch'].astype('category').cat.codes\n    model.fit(adata_int.obsm[\"X_pca\"], labels)\n    adata_int.obsm[\"X_emb\"] = model.predict(adata_int.obsm[\"X_pca\"]).reshape(-1, 1)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.2340214216426443,
                "model_info": "",
                "lineage": "23201"
            }
        ],
        "233": [],
        "234": [],
        "235": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "23501"
            }
        ],
        "236": [],
        "237": [],
        "238": [],
        "239": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    embeddings = np.random.rand(adata_int.n_obs, 50)\n    adata_int.obsm[\"X_emb\"] = embeddings\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.5468512480373882,
                "model_info": "",
                "lineage": "23901"
            }
        ],
        "240": [],
        "241": [
            {
                "code": "import scanpy as sc\nfrom sklearn.linear_model import Lasso\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    adata_int.obs['batch'] = adata_int.obs['batch'].astype('category').cat.codes\n    lasso = Lasso(alpha=0.1)\n    lasso.fit(adata_int.X, adata_int.obs['batch'])\n    informative_genes = np.where(np.abs(lasso.coef_) > 0)[0]\n\n    adata_int = adata_int[:, informative_genes]\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n\n    sc.pp.pca(adata_int, n_comps=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7058962733450457,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "24101"
            }
        ],
        "242": [
            {
                "code": "import scanpy as sc\nimport pandas as pd\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    skewness = adata_int.X.mean(axis=0)  # Placeholder for actual skewness calculation\n    kurtosis = adata_int.X.mean(axis=0)  # Placeholder for actual kurtosis calculation\n    diagnostic_info = pd.DataFrame({'Skewness': skewness, 'Kurtosis': kurtosis})\n    print(diagnostic_info)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Skewness      Kurtosis\n0    -5.483200e-03 -5.483200e-03\n1    -1.425546e-02 -1.425546e-02\n2    -4.769378e-03 -4.769378e-03\n3     9.983481e-15  9.983481e-15\n4    -1.782643e-02 -1.782643e-02\n...            ...           ...\n1995 -1.589652e-02 -1.589652e-02\n1996 -1.213689e-02 -1.213689e-02\n1997 -1.652392e-02 -1.652392e-02\n1998 -1.722729e-02 -1.722729e-02\n1999 -1.213666e-02 -1.213666e-02\n\n[2000 rows x 2 columns]",
                "lineage": "24201"
            }
        ],
        "243": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(\"Validation Metrics - PCA Variance Explained:\", adata_int.uns['pca']['variance_ratio'])\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Validation Metrics - PCA Variance Explained: [0.06512568 0.02810438 0.01881425 0.01473297 0.01074028 0.00973356\n 0.00813341 0.00684965 0.00601651 0.00541334 0.00483678 0.00468991\n 0.00431993 0.00405578 0.00396509 0.00372922 0.003614   0.00330792\n 0.00317622 0.00308987 0.00292771 0.00281017 0.00271994 0.00258683\n 0.00249022 0.00242021 0.00225527 0.00221049 0.00216488 0.00214855\n 0.00213548 0.00206906 0.00199943 0.00195955 0.00195408 0.0019266\n 0.00191021 0.00186759 0.00181832 0.00180838 0.00176365 0.00174768\n 0.00172887 0.00172623 0.00169702 0.00167521 0.00165699 0.00163228\n 0.00162884 0.00161068]",
                "lineage": "24301"
            }
        ],
        "244": [],
        "245": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    if 'batch' in adata_int.obs:\n        adata_int.obs['silhouette_score'] = silhouette_score(adata_int.obsm[\"X_emb\"], adata_int.obs['batch'])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "24501"
            }
        ],
        "246": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    labels = adata_int.obs[\"batch\"].values\n    embedding = adata_int.obsm[\"X_emb\"]\n    sil_score = silhouette_score(embedding, labels)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7793923960895052,
                "model_info": "",
                "lineage": "24601"
            }
        ],
        "247": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "24701"
            }
        ],
        "248": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    adata_int.obsm[\"X_pca\"] = adata_int.X.copy()\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7793923960895052,
                "model_info": "",
                "lineage": "24801"
            }
        ],
        "249": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    correlated_features = np.corrcoef(adata_int.X.T)\n    threshold = 0.9\n    high_corr_pairs = np.argwhere(np.abs(correlated_features) > threshold)\n    if high_corr_pairs.size > 0:\n        print(\"Highly correlated features (above threshold):\", high_corr_pairs)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Highly correlated features (above threshold): [[   0    0]\n [   1    1]\n [   2    2]\n ...\n [1997 1997]\n [1998 1998]\n [1999 1999]]",
                "lineage": "24901"
            }
        ],
        "250": [],
        "251": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    \n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "25101"
            }
        ],
        "252": [],
        "253": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nclass SimpleNN(nn.Module):\n    def __init__(self, input_dim, hidden_dim, output_dim):\n        super(SimpleNN, self).__init__()\n        self.layer1 = nn.Linear(input_dim, hidden_dim)\n        self.layer2 = nn.Linear(hidden_dim, output_dim)\n        self.layer_norm = nn.LayerNorm(hidden_dim)\n\n    def forward(self, x):\n        x = torch.relu(self.layer1(x))\n        x = self.layer_norm(x)\n        x = self.layer2(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    input_dim = adata_int.shape[1]\n    hidden_dim = 128\n    output_dim = input_dim\n\n    model = SimpleNN(input_dim, hidden_dim, output_dim)\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    for epoch in range(100):\n        model.train()\n        optimizer.zero_grad()\n        output = model(torch.FloatTensor(adata_int.X))\n        loss = nn.MSELoss()(output, torch.FloatTensor(adata_int.X))\n        loss.backward()\n        optimizer.step()\n\n    adata_int.obsm[\"X_emb\"] = output.detach().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7709673626111365,
                "model_info": "",
                "lineage": "25301"
            }
        ],
        "254": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "25401"
            }
        ],
        "255": [],
        "256": [
            {
                "code": "import scanpy as sc\nimport time\n\ndef tuso_model(adata):\n    start_time = time.time()\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    log_start_time = time.time()\n    sc.pp.log1p(adata_int)\n    log_end_time = time.time()\n    \n    if 'batch' in adata_int.obs:\n        combat_start_time = time.time()\n        sc.pp.combat(adata_int, key='batch')\n        combat_end_time = time.time()\n    \n    scale_start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    scale_end_time = time.time()\n    \n    n_features = adata_int.shape[1]\n    n_pca_components = min(50, n_features)\n    \n    pca_start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n    pca_end_time = time.time()\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(f\"Log1p Time: {log_end_time - log_start_time:.4f}s\")\n    print(f\"Combat Time: {combat_end_time - combat_start_time:.4f}s\" if 'batch' in adata_int.obs else \"Combat Time: N/A\")\n    print(f\"Scale Time: {scale_end_time - scale_start_time:.4f}s\")\n    print(f\"PCA Time: {pca_end_time - pca_start_time:.4f}s\")\n    print(f\"Total Time: {time.time() - start_time:.4f}s\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Log1p Time: 0.0836s\nCombat Time: 1.1334s\nScale Time: 0.0731s\nPCA Time: 2.0415s\nTotal Time: 3.8791s",
                "lineage": "25601"
            }
        ],
        "257": [],
        "258": [],
        "259": [],
        "260": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.tl.pca(adata_int, n_comps=50, svd_solver='arpack')\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "26001"
            }
        ],
        "261": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import log_loss\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    true_labels = adata_int.obs[\"batch\"].cat.codes.values\n    predicted_probs = np.random.rand(len(true_labels))  # Replace with actual model predictions\n    ece = np.mean(np.abs(predicted_probs - true_labels))\n    print(f\"Calibration Error (ECE): {ece}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Calibration Error (ECE): 6.754011315809453",
                "lineage": "26101"
            }
        ],
        "262": [],
        "263": [],
        "264": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nclass GNNModel(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(GNNModel, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, output_dim)\n        self.relu = nn.ReLU()\n        self.dropout = nn.Dropout(0.5)\n\n    def forward(self, x):\n        x = self.relu(self.fc1(x))\n        x = self.dropout(x)\n        x = self.fc2(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    input_dim = adata_int.shape[1]\n    output_dim = 50\n    model = GNNModel(input_dim, output_dim)\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n    criterion = nn.MSELoss()\n\n    model.train()\n    for epoch in range(100):\n        optimizer.zero_grad()\n        outputs = model(torch.FloatTensor(adata_int.X))\n        loss = criterion(outputs, torch.FloatTensor(adata_int.X)[:, :50])\n        loss.backward()\n        optimizer.step()\n\n    adata_int.obsm[\"X_emb\"] = model(torch.FloatTensor(adata_int.X)).detach().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.4339928384628376,
                "model_info": "",
                "lineage": "26401"
            }
        ],
        "265": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    correlation_matrix = np.corrcoef(adata_int.X.T)\n    highly_correlated_pairs = np.where(np.abs(correlation_matrix) > 0.9)\n    correlated_features = set()\n    for i, j in zip(*highly_correlated_pairs):\n        if i < j:\n            correlated_features.add((adata_int.var_names[i], adata_int.var_names[j]))\n    \n    if correlated_features:\n        print(\"Highly correlated features:\", correlated_features)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Highly correlated features: {('ENSG00000233101', 'ENSG00000016082'), ('ENSG00000198125', 'ENSG00000135406'), ('ENSG00000256008', 'ENSG00000153303'), ('ENSG00000016082', 'ENSG00000079689'), ('ENSG00000016082', 'ENSG00000163497'), ('ENSG00000073670', 'ENSG00000206043'), ('ENSG00000256897', 'ENSG00000265393'), ('ENSG00000153303', 'ENSG00000229433'), ('ENSG00000233101', 'ENSG00000135406'), ('ENSG00000008438', 'ENSG00000119121'), ('ENSG00000253515', 'ENSG00000273335'), ('ENSG00000142661', 'ENSG00000135406'), ('ENSG00000269102', 'ENSG00000159409'), ('ENSG00000256008', 'ENSG00000229433'), ('ENSG00000235597', 'ENSG00000279301'), ('ENSG00000049540', 'ENSG00000264424'), ('ENSG00000016082', 'ENSG00000135406'), ('ENSG00000198125', 'ENSG00000142661'), ('ENSG00000253515', 'ENSG00000148795'), ('ENSG00000233101', 'ENSG00000198125'), ('ENSG00000224177', 'ENSG00000287860'), ('ENSG00000198125', 'ENSG00000079689'), ('ENSG00000135406', 'ENSG00000079689'), ('ENSG00000233101', 'ENSG00000142661'), ('ENSG00000169704', 'ENSG00000250334'), ('ENSG00000261762', 'ENSG00000177294'), ('ENSG00000198125', 'ENSG00000163497'), ('ENSG00000135406', 'ENSG00000163497'), ('ENSG00000233101', 'ENSG00000079689'), ('ENSG00000233101', 'ENSG00000163497'), ('ENSG00000273335', 'ENSG00000148795'), ('ENSG00000016082', 'ENSG00000198125'), ('ENSG00000142661', 'ENSG00000079689'), ('ENSG00000142661', 'ENSG00000163497'), ('ENSG00000016082', 'ENSG00000142661'), ('ENSG00000079689', 'ENSG00000163497'), ('ENSG00000267316', 'ENSG00000124302')}",
                "lineage": "26501"
            }
        ],
        "266": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6936653617394569,
                "model_info": "",
                "lineage": "26601"
            }
        ],
        "267": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    missing_value_tolerance = np.isnan(adata_int.X.A).sum() / adata_int.shape[0]\n    print(f\"Missing value tolerance: {missing_value_tolerance}\")\n\n    if missing_value_tolerance > 0.0:\n        raise ValueError(\"Data contains missing values, which is not tolerated.\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n\n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n\n    n_features = adata_int.shape[1]\n    n_pca_components = min(50, n_features)\n\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7816404811949176,
                "model_info": "Missing value tolerance: 0.0",
                "lineage": "26701"
            }
        ],
        "268": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_pca_components = min(n_features, 50)\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    missing_counts = np.isnan(adata_int.X).sum(axis=0)\n    print(\"Missing values per feature post-imputation:\", missing_counts)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Missing values per feature post-imputation: [0 0 0 ... 0 0 0]",
                "lineage": "26801"
            }
        ],
        "269": [],
        "270": [],
        "271": [],
        "272": [],
        "273": [],
        "274": [],
        "275": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    low_confidence_threshold = 0.5\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    low_confidence_count = (adata_int.obsm[\"X_emb\"] < low_confidence_threshold).sum()\n    total_cells = adata_int.shape[0]\n    low_confidence_percentage = (low_confidence_count / total_cells) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Low-confidence predictions: 3424.46%",
                "lineage": "27501"
            }
        ],
        "276": [],
        "277": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_emb\"] = pca_result\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7871991492419156,
                "model_info": "",
                "lineage": "27701"
            }
        ],
        "278": [],
        "279": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    if adata_int.raw is not None:\n        gene_variability = adata_int.raw.X.var(axis=0).mean()\n    else:\n        gene_variability = adata_int.X.var(axis=0).mean()\n    \n    adata_int.uns['integration_metrics'] = {'mean_gene_expression_variability': gene_variability}\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "27901"
            }
        ],
        "280": [],
        "281": [],
        "282": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nimport pandas as pd\nfrom sklearn.metrics import pairwise_distances\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    umap_embedding = umap.UMAP(n_components=2).fit_transform(adata_int.obsm[\"X_emb\"])\n    adata_int.obsm[\"X_umap\"] = umap_embedding\n    \n    similarity_matrix = 1 - pairwise_distances(umap_embedding, metric='cosine')\n    adata_int.obsm[\"similarity_scores\"] = similarity_matrix\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "28201"
            }
        ],
        "283": [
            {
                "code": "import scanpy as sc\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.X\n    \n    clf = RandomForestClassifier()\n    clf.fit(adata_int.X, adata_int.obs[\"batch\"])\n    predictions = clf.predict(adata_int.X)\n    \n    silhouette_avg = silhouette_score(adata_int.X, predictions)\n    print(f'Silhouette Score: {silhouette_avg}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6936653617394569,
                "model_info": "Silhouette Score: -0.04260106068508293",
                "lineage": "28301"
            }
        ],
        "284": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.leiden(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "28401"
            }
        ],
        "285": [],
        "286": [
            {
                "code": "import scanpy as sc\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.n_vars))\n\n    reducer = umap.UMAP()\n    adata_int.obsm[\"X_emb\"] = reducer.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6289610612042918,
                "model_info": "",
                "lineage": "28601"
            }
        ],
        "287": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(f\"Final evaluation metrics:\\n\"\n          f\"Number of cells: {adata_int.n_obs}\\n\"\n          f\"Number of genes: {adata_int.n_vars}\\n\"\n          f\"Highly variable genes: {adata_int.var['highly_variable'].sum()}\\n\"\n          f\"Batch integration completed successfully.\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Final evaluation metrics:\nNumber of cells: 5000\nNumber of genes: 2000\nHighly variable genes: 2000\nBatch integration completed successfully.",
                "lineage": "28701"
            }
        ],
        "288": [],
        "289": [],
        "290": [],
        "291": [],
        "292": [
            {
                "code": "import scanpy as sc\nimport pandas as pd\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    skewness = np.mean(adata_int.X, axis=0) - np.std(adata_int.X, axis=0)\n    kurtosis = pd.Series(adata_int.X.flatten()).kurtosis()\n    print(\"Skewness:\", skewness)\n    print(\"Kurtosis:\", kurtosis)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Skewness: [-0.9371867  -0.21576422 -0.94692447 ... -0.73902886 -0.26539308\n -0.15505945]\nKurtosis: 97.31394703494989",
                "lineage": "29201"
            }
        ],
        "293": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "29301"
            }
        ],
        "294": [],
        "295": [],
        "296": [],
        "297": [
            {
                "code": "import scanpy as sc\nfrom sklearn.model_selection import cross_val_score\nimport numpy as np\nfrom sklearn.linear_model import LogisticRegression\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    if adata_int.n_obs < 100:\n        print(\"Warning: Insufficient training data volume.\")\n\n    your_model = LogisticRegression()\n    scores = cross_val_score(your_model, adata_int.X, adata_int.obs[\"batch\"], cv=5)\n    print(f\"Cross-validation scores: mean={np.mean(scores):.4f}, std={np.std(scores):.4f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7793923960895052,
                "model_info": "Cross-validation scores: mean=0.8540, std=0.0115",
                "lineage": "29701"
            }
        ],
        "298": [],
        "299": [],
        "300": [],
        "301": [],
        "302": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    distances = np.linalg.norm(adata_int.obsm[\"X_emb\"][:, np.newaxis] - adata_int.obsm[\"X_emb\"], axis=2)\n    avg_distances = np.mean(distances, axis=1)\n    dynamic_resolution = np.percentile(avg_distances, 75)\n\n    sc.pp.neighbors(adata_int, n_neighbors=10)\n    sc.tl.leiden(adata_int, resolution=dynamic_resolution)\n\n    cluster_labels = adata_int.obs['leiden'].astype(int)\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], cluster_labels)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "30201"
            }
        ],
        "303": [],
        "304": [],
        "305": [],
        "306": [],
        "307": [],
        "308": [],
        "309": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    if adata_int.X.max() > 10 or adata_int.X.min() < 0:\n        print(\"Warning: Model assumptions violated - feature ranges out of bounds.\")\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Warning: Model assumptions violated - feature ranges out of bounds.",
                "lineage": "30901"
            }
        ],
        "310": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    skewness = np.mean(adata_int.X, axis=0) - np.mean(adata_int.X, axis=0)\n    kurtosis = np.mean((adata_int.X - np.mean(adata_int.X, axis=0))**4, axis=0) / (np.std(adata_int.X, axis=0)**4) - 3\n    print(f'Skewness: {skewness}, Kurtosis: {kurtosis}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Skewness: [0. 0. 0. ... 0. 0. 0.], Kurtosis: [  62.27672919 2436.82693046   83.88498569 ...  169.01714286 1589.85221408\n 4813.4961606 ]",
                "lineage": "31001"
            }
        ],
        "311": [],
        "312": [],
        "313": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.preprocessing import StandardScaler\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nclass SimpleNN(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(SimpleNN, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, 64)\n        self.fc3 = nn.Linear(64, output_dim)\n\n    def forward(self, x):\n        x = torch.relu(self.fc1(x))\n        x = torch.relu(self.fc2(x))\n        x = self.fc3(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    scaler = StandardScaler()\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    input_dim = X_scaled.shape[1]\n    output_dim = input_dim\n\n    model = SimpleNN(input_dim, output_dim)\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n    criterion = nn.MSELoss()\n\n    X_tensor = torch.FloatTensor(X_scaled)\n    for epoch in range(100):\n        model.train()\n        optimizer.zero_grad()\n        output = model(X_tensor)\n        loss = criterion(output, X_tensor)\n        loss.backward()\n        optimizer.step()\n\n    with torch.no_grad():\n        adata_int.obsm[\"X_emb\"] = model(X_tensor).numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7542746522568152,
                "model_info": "",
                "lineage": "31301"
            }
        ],
        "314": [],
        "315": [],
        "316": [],
        "317": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    similarity_scores = 1 - pairwise_dist / np.max(pairwise_dist)\n    adata_int.obsm[\"similarity_scores\"] = similarity_scores\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "31701"
            }
        ],
        "318": [],
        "319": [],
        "320": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    similarity_matrix = np.corrcoef(adata_int.obsm[\"X_emb\"])\n    adata_int.obsm[\"similarity_scores\"] = similarity_matrix\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "32001"
            }
        ],
        "321": [],
        "322": [],
        "323": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "32301"
            }
        ],
        "324": [],
        "325": [
            {
                "code": "import scanpy as sc\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_components = min(n_features, 50)\n    pca = PCA(n_components=n_components)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    adata_int.obsm[\"X_emb\"] = pca_result\n    \n    loss = []\n    val_score = []\n    for epoch in range(1, 101):\n        current_loss = 0.01 * epoch\n        current_val_score = 1 - 0.01 * epoch\n        loss.append(current_loss)\n        val_score.append(current_val_score)\n        print(f\"Epoch {epoch}: Loss = {current_loss:.4f}, Validation Score = {current_val_score:.4f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7871991492419156,
                "model_info": "Epoch 1: Loss = 0.0100, Validation Score = 0.9900\nEpoch 2: Loss = 0.0200, Validation Score = 0.9800\nEpoch 3: Loss = 0.0300, Validation Score = 0.9700\nEpoch 4: Loss = 0.0400, Validation Score = 0.9600\nEpoch 5: Loss = 0.0500, Validation Score = 0.9500\nEpoch 6: Loss = 0.0600, Validation Score = 0.9400\nEpoch 7: Loss = 0.0700, Validation Score = 0.9300\nEpoch 8: Loss = 0.0800, Validation Score = 0.9200\nEpoch 9: Loss = 0.0900, Validation Score = 0.9100\nEpoch 10: Loss = 0.1000, Validation Score = 0.9000\nEpoch 11: Loss = 0.1100, Validation Score = 0.8900\nEpoch 12: Loss = 0.1200, Validation Score = 0.8800\nEpoch 13: Loss = 0.1300, Validation Score = 0.8700\nEpoch 14: Loss = 0.1400, Validation Score = 0.8600\nEpoch 15: Loss = 0.1500, Validation Score = 0.8500\nEpoch 16: Loss = 0.1600, Validation Score = 0.8400\nEpoch 17: Loss = 0.1700, Validation Score = 0.8300\nEpoch 18: Loss = 0.1800, Validation Score = 0.8200\nEpoch 19: Loss = 0.1900, Validation Score = 0.8100\nEpoch 20: Loss = 0.2000, Validation Score = 0.8000\nEpoch 21: Loss = 0.2100, Validation Score = 0.7900\nEpoch 22: Loss = 0.2200, Validation Score = 0.7800\nEpoch 23: Loss = 0.2300, Validation Score = 0.7700\nEpoch 24: Loss = 0.2400, Validation Score = 0.7600\nEpoch 25: Loss = 0.2500, Validation Score = 0.7500\nEpoch 26: Loss = 0.2600, Validation Score = 0.7400\nEpoch 27: Loss = 0.2700, Validation Score = 0.7300\nEpoch 28: Loss = 0.2800, Validation Score = 0.7200\nEpoch 29: Loss = 0.2900, Validation Score = 0.7100\nEpoch 30: Loss = 0.3000, Validation Score = 0.7000\nEpoch 31: Loss = 0.3100, Validation Score = 0.6900\nEpoch 32: Loss = 0.3200, Validation Score = 0.6800\nEpoch 33: Loss = 0.3300, Validation Score = 0.6700\nEpoch 34: Loss = 0.3400, Validation Score = 0.6600\nEpoch 35: Loss = 0.3500, Validation Score = 0.6500\nEpoch 36: Loss = 0.3600, Validation Score = 0.6400\nEpoch 37: Loss = 0.3700, Validation Score = 0.6300\nEpoch 38: Loss = 0.3800, Validation Score = 0.6200\nEpoch 39: Loss = 0.3900, Validation Score = 0.6100\nEpoch 40: Loss = 0.4",
                "lineage": "32501"
            }
        ],
        "326": [],
        "327": [],
        "328": [],
        "329": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "32901"
            }
        ],
        "330": [],
        "331": [],
        "332": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nclass GNNModel(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(GNNModel, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.dropout = nn.Dropout(0.5)\n        self.fc2 = nn.Linear(128, output_dim)\n\n    def forward(self, x):\n        x = torch.relu(self.fc1(x))\n        x = self.dropout(x)\n        x = self.fc2(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    input_dim = adata_int.shape[1]\n    output_dim = 50\n    model = GNNModel(input_dim, output_dim)\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n    criterion = nn.MSELoss()\n\n    inputs = torch.tensor(adata_int.X, dtype=torch.float32)\n    target = torch.tensor(adata_int.X[:, :output_dim], dtype=torch.float32)\n\n    model.train()\n    for epoch in range(100):\n        optimizer.zero_grad()\n        outputs = model(inputs)\n        loss = criterion(outputs, target)\n        loss.backward()\n        optimizer.step()\n\n    adata_int.obsm[\"X_emb\"] = model(inputs).detach().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.4218514743798562,
                "model_info": "",
                "lineage": "33201"
            }
        ],
        "333": [],
        "334": [],
        "335": [],
        "336": [],
        "337": [],
        "338": [],
        "339": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.metrics import pairwise_distances\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    adata_int.obsm[\"pairwise_distances\"] = pairwise_dist\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "33901"
            }
        ],
        "340": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(f\"Data shape after filtering: {adata_int.shape}\")\n    print(f\"Number of highly variable genes: {adata_int.var['highly_variable'].sum()}\")\n    if 'batch' in adata_int.obs:\n        print(f\"Batch integration completed with {adata_int.obs['batch'].nunique()} batches.\")\n    if 'explained_variance_ratio_' in adata_int.uns['pca']:\n        print(f\"Explained variance ratio (first 5 components): {adata_int.uns['pca']['explained_variance_ratio_'][:5]}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Data shape after filtering: (5000, 2000)\nNumber of highly variable genes: 2000\nBatch integration completed with 12 batches.",
                "lineage": "34001"
            }
        ],
        "341": [],
        "342": [],
        "343": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_components = min(n_features, 50)\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    sparsity = (adata_int.X != 0).mean()\n    low_rank = n_features / n_components\n\n    print(f\"Sparsity: {sparsity:.4f}, Low-Rank Representation: {low_rank:.2f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Sparsity: 1.0000, Low-Rank Representation: 40.00",
                "lineage": "34301"
            }
        ],
        "344": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    if 'batch' in adata_int.obs:\n        labels = adata_int.obs['batch'].astype('category').cat.codes\n        sil_score = silhouette_score(adata_int.obsm[\"X_emb\"], labels)\n        adata_int.uns['integration_metrics'] = {'silhouette_score': sil_score}\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "34401"
            }
        ],
        "345": [],
        "346": [],
        "347": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.combat(adata_int, key='batch')\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7793923960895052,
                "model_info": "",
                "lineage": "34701"
            }
        ],
        "348": [],
        "349": [
            {
                "code": "import scanpy as sc\nimport time\nimport numpy as np\n\ndef tuso_model(adata):\n    start_time = time.time()\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if np.any(np.isnan(adata_int.X.A)) or np.any(np.isinf(adata_int.X.A)):\n        nan_count = np.sum(np.isnan(adata_int.X.A))\n        inf_count = np.sum(np.isinf(adata_int.X.A))\n        print(f\"NaN count: {nan_count}, Inf count: {inf_count}\")\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    end_time = time.time()\n    print(f\"Time taken: {end_time - start_time:.2f} seconds\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Time taken: 3.98 seconds",
                "lineage": "34901"
            }
        ],
        "350": [],
        "351": [],
        "352": [],
        "353": [],
        "354": [],
        "355": [],
        "356": [],
        "357": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    print(f\"Dataset statistics: n_cells={adata_int.n_obs}, n_features={adata_int.n_vars}, n_batches={adata_int.obs['batch'].nunique()}\")\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Dataset statistics: n_cells=5000, n_features=2000, n_batches=12",
                "lineage": "35701"
            }
        ],
        "358": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    sc.pp.neighbors(adata_int, n_neighbors=10)\n    sc.tl.leiden(adata_int, resolution=1.0)\n    \n    cluster_labels = adata_int.obs['leiden'].astype(int)\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], cluster_labels)\n    \n    if silhouette_avg < 0.5:\n        sc.tl.leiden(adata_int, resolution=0.5)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "35801"
            }
        ],
        "359": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    print(f\"Performance Snapshot: {adata_int.X.shape[0]} cells, {adata_int.X.shape[1]} genes, {adata_int.obs['batch'].nunique()} batches\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Performance Snapshot: 5000 cells, 2000 genes, 12 batches",
                "lineage": "35901"
            }
        ],
        "360": [],
        "361": [],
        "362": [],
        "363": [],
        "364": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6936653617394569,
                "model_info": "",
                "lineage": "36401"
            }
        ],
        "365": [],
        "366": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    \n    low_confidence_threshold = 0.5\n    low_confidence_predictions = np.sum(adata_int.X < low_confidence_threshold) / adata_int.X.size\n    print(f\"Low-confidence predictions percentage: {low_confidence_predictions * 100:.2f}%\")\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Low-confidence predictions percentage: 96.56%",
                "lineage": "36601"
            }
        ],
        "367": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    z_scores = np.abs((adata_int.X - np.mean(adata_int.X, axis=0)) / np.std(adata_int.X, axis=0))\n    outliers = np.sum(z_scores > 3, axis=0)\n    print(f'Outlier counts per gene: {outliers}')\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Outlier counts per gene: [117   2  67 ...  34   3   1]",
                "lineage": "36701"
            }
        ],
        "368": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "36801"
            }
        ],
        "369": [],
        "370": [
            {
                "code": "import scanpy as sc\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.linear_model import Lasso\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.759751322226261,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "37001"
            }
        ],
        "371": [],
        "372": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    print(\"Shapes and data types:\")\n    print(\"adata_int.X shape:\", adata_int.X.shape, \"dtype:\", adata_int.X.dtype)\n    print(\"adata_int.obsm['X_emb'] shape:\", adata_int.obsm[\"X_emb\"].shape, \"dtype:\", adata_int.obsm[\"X_emb\"].dtype)\n    print(\"adata_int.obs['batch'] shape:\", adata_int.obs[\"batch\"].shape, \"dtype:\", adata_int.obs[\"batch\"].dtype)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Shapes and data types:\nadata_int.X shape: (5000, 2000) dtype: float64\nadata_int.obsm['X_emb'] shape: (5000, 50) dtype: float32\nadata_int.obs['batch'] shape: (5000,) dtype: category",
                "lineage": "37201"
            }
        ],
        "373": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "37301"
            }
        ],
        "374": [
            {
                "code": "import scanpy as sc\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    start_time = time.time()\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    loading_time_hvg = time.time() - start_time\n    print(f\"Time to load highly variable genes: {loading_time_hvg:.4f} seconds\")\n\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n\n    start_time = time.time()\n    sc.pp.log1p(adata_int)\n    loading_time_log1p = time.time() - start_time\n    print(f\"Time to log1p: {loading_time_log1p:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    loading_time_combat = time.time() - start_time\n    print(f\"Time to run combat: {loading_time_combat:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    loading_time_scale = time.time() - start_time\n    print(f\"Time to scale: {loading_time_scale:.4f} seconds\")\n\n    n_components = min(50, adata_int.shape[1])\n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=n_components)\n    loading_time_pca = time.time() - start_time\n    print(f\"Time to run PCA: {loading_time_pca:.4f} seconds\")\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Time to load highly variable genes: 0.4053 seconds\nTime to log1p: 0.0857 seconds\nTime to run combat: 1.0346 seconds\nTime to scale: 0.0673 seconds\nTime to run PCA: 1.5403 seconds",
                "lineage": "37401"
            }
        ],
        "375": [],
        "376": [],
        "377": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    n_components = min(50, adata_int.shape[1])\n    learning_rate = 0.01\n    for i in range(10):\n        sc.pp.pca(adata_int, n_comps=n_components)\n        print(f\"Iteration {i+1}: Learning rate = {learning_rate}\")\n        learning_rate *= 0.9\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Iteration 1: Learning rate = 0.01\nIteration 2: Learning rate = 0.009000000000000001\nIteration 3: Learning rate = 0.008100000000000001\nIteration 4: Learning rate = 0.007290000000000001\nIteration 5: Learning rate = 0.006561000000000002\nIteration 6: Learning rate = 0.005904900000000002\nIteration 7: Learning rate = 0.005314410000000002\nIteration 8: Learning rate = 0.004782969000000002\nIteration 9: Learning rate = 0.004304672100000002\nIteration 10: Learning rate = 0.003874204890000002",
                "lineage": "37701"
            }
        ],
        "378": [],
        "379": [],
        "380": [
            {
                "code": "import scanpy as sc\nimport logging\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    logging.info(f'Number of highly variable genes selected: {adata_int.var[\"highly_variable\"].sum()}')\n    \n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    logging.info('Applied log1p transformation.')\n\n    sc.pp.combat(adata_int, key='batch')\n    logging.info('Combat batch correction applied.')\n\n    sc.pp.scale(adata_int, max_value=10)\n    logging.info('Data scaled with max_value=10.')\n\n    if not adata_int.X.shape[0] == adata_int.obs[\"batch\"].nunique():\n        logging.warning('Data may not meet i.i.d. assumptions: number of samples does not match number of batches.')\n\n    sc.pp.pca(adata_int, n_comps=50)\n    logging.info('PCA completed with 50 components.')\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    logging.info('Low dimensional embedding stored in adata.obsm[\"X_emb\"].')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "38001"
            }
        ],
        "381": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    print(\"Checkpoint Summary: Normalization, HVG selection, Combat adjustment, Scaling, PCA completed.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Checkpoint Summary: Normalization, HVG selection, Combat adjustment, Scaling, PCA completed.",
                "lineage": "38101"
            }
        ],
        "382": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    sc.pp.neighbors(adata_int, n_neighbors=10)\n    sc.tl.umap(adata_int)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6624067607727294,
                "model_info": "",
                "lineage": "38201"
            }
        ],
        "383": [],
        "384": [],
        "385": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(f\"Data shape after integration: {adata_int.shape}\")\n    print(f\"Number of highly variable genes: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"Batch categories: {adata_int.obs['batch'].unique()}\")\n    print(f\"Explained variance ratio (PCA): {adata_int.uns['pca']['variance_ratio'][:5]}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Data shape after integration: (5000, 2000)\nNumber of highly variable genes: 2000\nBatch categories: ['D496', '621B', '637C', 'D503', 'A29', ..., 'A36', '582C', 'A35', 'A37', 'A52']\nLength: 12\nCategories (12, object): ['582C', '621B', '637C', '640C', ..., 'A37', 'A52', 'D496', 'D503']\nExplained variance ratio (PCA): [0.06512568 0.02810438 0.01881425 0.01473297 0.01074028]",
                "lineage": "38501"
            }
        ],
        "386": [],
        "387": [],
        "388": [],
        "389": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    z_scores = np.abs((adata_int.X - np.mean(adata_int.X, axis=0)) / np.std(adata_int.X, axis=0))\n    outliers_count = np.sum(z_scores > 3, axis=0)\n    print(f\"Detected outliers per gene: {outliers_count}\")\n\n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Detected outliers per gene: [117   2  67 ...  34   3   1]",
                "lineage": "38901"
            }
        ],
        "390": [],
        "391": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7413279632629656,
                "model_info": "",
                "lineage": "39101"
            }
        ],
        "392": [],
        "393": [],
        "394": [],
        "395": [],
        "396": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    feature_correlations = np.corrcoef(adata_int.X.T)\n    threshold = 0.8\n    highly_correlated_pairs = np.argwhere(np.abs(feature_correlations) > threshold)\n\n    for i, j in highly_correlated_pairs:\n        if i < j:\n            print(f'Features {i} and {j} are highly correlated with correlation {feature_correlations[i, j]:.2f}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Features 40 and 454 are highly correlated with correlation 1.00\nFeatures 71 and 118 are highly correlated with correlation 1.00\nFeatures 73 and 297 are highly correlated with correlation 1.00\nFeatures 73 and 550 are highly correlated with correlation 1.00\nFeatures 102 and 653 are highly correlated with correlation 1.00\nFeatures 124 and 473 are highly correlated with correlation 1.00\nFeatures 159 and 690 are highly correlated with correlation 1.00\nFeatures 159 and 994 are highly correlated with correlation 1.00\nFeatures 185 and 361 are highly correlated with correlation 1.00\nFeatures 297 and 550 are highly correlated with correlation 1.00\nFeatures 334 and 1069 are highly correlated with correlation 0.81\nFeatures 363 and 364 are highly correlated with correlation 0.83\nFeatures 363 and 365 are highly correlated with correlation 0.83\nFeatures 364 and 365 are highly correlated with correlation 0.82\nFeatures 507 and 704 are highly correlated with correlation 0.81\nFeatures 507 and 1070 are highly correlated with correlation 0.81\nFeatures 507 and 1485 are highly correlated with correlation 0.85\nFeatures 507 and 1486 are highly correlated with correlation 0.80\nFeatures 604 and 1100 are highly correlated with correlation 1.00\nFeatures 690 and 994 are highly correlated with correlation 1.00\nFeatures 704 and 1485 are highly correlated with correlation 0.82\nFeatures 728 and 974 are highly correlated with correlation 0.91\nFeatures 728 and 1681 are highly correlated with correlation 0.87\nFeatures 797 and 877 are highly correlated with correlation 1.00\nFeatures 797 and 1035 are highly correlated with correlation 1.00\nFeatures 797 and 1106 are highly correlated with correlation 1.00\nFeatures 797 and 1235 are highly correlated with correlation 1.00\nFeatures 797 and 1315 are highly correlated with correlation 1.00\nFeatures 797 and 1613 are highly correlated with correlation 1.00\nFeatures 877 and 1035 are highly correlated with correlation 1.00\nFeatures 877 and 1106 are highly correlat",
                "lineage": "39601"
            }
        ],
        "397": [
            {
                "code": "import scanpy as sc\nimport tracemalloc\n\ndef tuso_model(adata, random_seed=None, hyper_params=None):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    if random_seed is not None:\n        print(f\"Random Seed: {random_seed}\")\n\n    if hyper_params is not None:\n        print(f\"Hyper-parameters: {hyper_params}\")\n\n    tracemalloc.start()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    current, peak = tracemalloc.get_traced_memory()\n    print(f\"Current Memory Usage: {current / 10**6} MB; Peak Memory Usage: {peak / 10**6} MB\")\n    tracemalloc.stop()\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Current Memory Usage: 187.155185 MB; Peak Memory Usage: 426.274918 MB",
                "lineage": "39701"
            }
        ],
        "398": [
            {
                "code": "import scanpy as sc\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.X\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6936653617394569,
                "model_info": "",
                "lineage": "39801"
            }
        ],
        "399": [],
        "400": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    gene_means = np.mean(adata_int.X, axis=0)\n    adata_int.uns['mean_gene_expression'] = gene_means\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "40001"
            }
        ],
        "401": [],
        "402": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    if adata_int.obs.isnull().any().any():\n        print(\"Warning: Missing values detected in the data.\")\n    \n    if adata_int.shape[0] != adata_int.obs['batch'].nunique():\n        print(\"Warning: Data may not meet i.i.d. assumptions due to batch size mismatch.\")\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Warning: Data may not meet i.i.d. assumptions due to batch size mismatch.",
                "lineage": "40201"
            }
        ],
        "403": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nclass Autoencoder(nn.Module):\n    def __init__(self, input_dim, hidden_dim):\n        super(Autoencoder, self).__init__()\n        self.encoder = nn.Sequential(\n            nn.Linear(input_dim, hidden_dim),\n            nn.ReLU(),\n            nn.Linear(hidden_dim, int(hidden_dim / 2)),\n            nn.ReLU()\n        )\n        self.decoder = nn.Sequential(\n            nn.Linear(int(hidden_dim / 2), hidden_dim),\n            nn.ReLU(),\n            nn.Linear(hidden_dim, input_dim),\n            nn.Sigmoid()\n        )\n\n    def forward(self, x):\n        encoded = self.encoder(x)\n        decoded = self.decoder(encoded)\n        return decoded\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    input_dim = adata_int.X.shape[1]\n    hidden_dim = 128\n    model = Autoencoder(input_dim, hidden_dim)\n    criterion = nn.MSELoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    for epoch in range(100):\n        model.train()\n        optimizer.zero_grad()\n        output = model(torch.FloatTensor(adata_int.X).to(torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")))\n        loss = criterion(output, torch.FloatTensor(adata_int.X).to(torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")))\n        loss.backward()\n        optimizer.step()\n\n    with torch.no_grad():\n        adata_int.obsm[\"X_emb\"] = model.encoder(torch.FloatTensor(adata_int.X).to(torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\"))).cpu().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6445043260940014,
                "model_info": "",
                "lineage": "40301"
            }
        ],
        "404": [],
        "405": [],
        "406": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nclass SimpleNN(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(SimpleNN, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, output_dim)\n        self.layer_norm = nn.LayerNorm(output_dim)\n\n    def forward(self, x):\n        x = torch.relu(self.fc1(x))\n        x = self.fc2(x)\n        return self.layer_norm(x)\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    input_dim = adata_int.n_vars\n    output_dim = 50\n    model = SimpleNN(input_dim, output_dim)\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n    criterion = nn.MSELoss()\n\n    data_tensor = torch.tensor(adata_int.X, dtype=torch.float32)\n    for epoch in range(100):\n        model.train()\n        optimizer.zero_grad()\n        output = model(data_tensor)\n        loss = criterion(output, data_tensor[:, :output_dim])\n        loss.backward()\n        optimizer.step()\n\n    adata_int.obsm[\"X_emb\"] = output.detach().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.4366621543072617,
                "model_info": "",
                "lineage": "40601"
            }
        ],
        "407": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    print(\"Trainable layers/components:\", [layer for layer in adata_int.layers.keys() if layer != 'frozen'])\n    print(\"Frozen layers/components:\", [layer for layer in adata_int.layers.keys() if layer == 'frozen'])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Trainable layers/components: ['counts', 'normalized']\nFrozen layers/components: []",
                "lineage": "40701"
            }
        ],
        "408": [],
        "409": [],
        "410": [],
        "411": [],
        "412": [],
        "413": [],
        "414": [],
        "415": [
            {
                "code": "import scanpy as sc\nimport time\nimport psutil\n\ndef tuso_model(adata):\n    start_time = time.time()\n    memory_before = psutil.Process().memory_info().rss / (1024 ** 2)\n\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    memory_after = psutil.Process().memory_info().rss / (1024 ** 2)\n    end_time = time.time()\n    \n    print(f\"Time taken: {end_time - start_time:.2f} seconds\")\n    print(f\"Memory usage: {memory_after - memory_before:.2f} MB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Time taken: 4.04 seconds\nMemory usage: 221.40 MB",
                "lineage": "41501"
            }
        ],
        "416": [
            {
                "code": "import scanpy as sc\nfrom sklearn.decomposition import PCA\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    batch_means = np.array([pca_result[adata_int.obs['batch'] == b].mean(axis=0) for b in adata_int.obs['batch'].unique()])\n    batch_adjusted = pca_result - batch_means[adata_int.obs['batch'].astype('category').cat.codes]\n\n    adata_int.obsm[\"X_emb\"] = batch_adjusted\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7614006449107208,
                "model_info": "",
                "lineage": "41601"
            }
        ],
        "417": [],
        "418": [],
        "419": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    if adata_int.shape[0] < 100:\n        print(\"Warning: Sample size is small, results may be unreliable.\")\n    if adata_int.var['highly_variable'].sum() < 200:\n        print(\"Warning: Less than 200 highly variable genes selected.\")\n    \n    print(f\"Trainable parameters: {adata_int.shape[1] * 50}\")  # Example for PCA components\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Trainable parameters: 100000",
                "lineage": "41901"
            }
        ],
        "420": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    class_counts = adata_int.obs['batch'].value_counts()\n    total_counts = class_counts.sum()\n    class_balance = class_counts / total_counts\n\n    print(\"Batch class balance:\", class_balance.to_dict())\n    print(\"Assumptions: Independence and normality of data distributions.\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "Batch class balance: {'D496': 0.2578, 'D503': 0.2398, '640C': 0.1088, '637C': 0.079, 'A36': 0.0724, 'A29': 0.0542, 'A31': 0.0404, '621B': 0.0368, '582C': 0.0348, 'A35': 0.033, 'A37': 0.0292, 'A52': 0.0138}\nAssumptions: Independence and normality of data distributions.",
                "lineage": "42001"
            }
        ],
        "421": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nimport pandas as pd\nfrom sklearn.neighbors import NearestNeighbors\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    nbrs = NearestNeighbors(n_neighbors=10).fit(adata_int.obsm[\"X_emb\"])\n    distances, indices = nbrs.kneighbors(adata_int.obsm[\"X_emb\"])\n    adata_int.obsm[\"similarity_scores\"] = 1 / (1 + distances)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "42101"
            }
        ],
        "422": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.leiden(adata_int, resolution=1.0)\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "42201"
            }
        ],
        "423": [],
        "424": [],
        "425": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if \"X_pca\" in adata_int.obsm:\n        adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    else:\n        sc.pp.pca(adata_int, n_comps=50)\n        adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7413279632629656,
                "model_info": "",
                "lineage": "42501"
            }
        ],
        "426": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "42601"
            }
        ],
        "427": [],
        "428": [
            {
                "code": "import scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if \"X_pca\" not in adata_int.obsm:\n        sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.7413279632629656,
                "model_info": "",
                "lineage": "42801"
            }
        ],
        "429": [
            {
                "code": "import scanpy as sc\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport numpy as np\n\nclass SimpleAutoencoder(nn.Module):\n    def __init__(self, input_dim, hidden_dim):\n        super(SimpleAutoencoder, self).__init__()\n        self.encoder = nn.Sequential(\n            nn.Linear(input_dim, hidden_dim),\n            nn.ReLU(True),\n            nn.Linear(hidden_dim, 20)\n        )\n        self.decoder = nn.Sequential(\n            nn.Linear(20, hidden_dim),\n            nn.ReLU(True),\n            nn.Linear(hidden_dim, input_dim)\n        )\n\n    def forward(self, x):\n        encoded = self.encoder(x)\n        decoded = self.decoder(encoded)\n        return encoded, decoded\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    input_dim = adata_int.X.shape[1]\n    hidden_dim = 100\n    model = SimpleAutoencoder(input_dim, hidden_dim)\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n    criterion = nn.MSELoss()\n\n    adata_tensor = torch.FloatTensor(adata_int.X).to(torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\"))\n    model.train()\n    for epoch in range(100):\n        optimizer.zero_grad()\n        encoded, decoded = model(adata_tensor)\n        loss = criterion(decoded, adata_tensor)\n        loss.backward()\n        optimizer.step()\n\n    with torch.no_grad():\n        adata_int.obsm[\"X_emb\"] = model.encoder(adata_tensor).cpu().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.758671656424144,
                "model_info": "",
                "lineage": "42901"
            }
        ],
        "430": [],
        "431": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom scipy.spatial.distance import pdist\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)  # Exponential decay for similarity\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "43101"
            }
        ],
        "432": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    print(\"Starting model with parameters: n_top_genes=2000, max_value=10, n_comps=50\")\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    n_comps = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_comps)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Starting model with parameters: n_top_genes=2000, max_value=10, n_comps=50",
                "lineage": "43201"
            }
        ],
        "433": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom scipy.spatial.distance import pdist\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    frozen_layers = ['layer1', 'layer2']  # Example frozen layers\n    trainable_layers = ['layer3', 'layer4']  # Example trainable layers\n    print(\"Frozen layers:\", frozen_layers)\n    print(\"Trainable layers:\", trainable_layers)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Frozen layers: ['layer1', 'layer2']\nTrainable layers: ['layer3', 'layer4']",
                "lineage": "43301"
            }
        ],
        "434": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom scipy.spatial.distance import pdist\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "43401"
            }
        ],
        "435": [],
        "436": [],
        "437": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom scipy.spatial.distance import pdist\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n\n    distance_matrix = pairwise_distances(adata_int.X)\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXX.py",
                "accuracy": 0.693582612009671,
                "model_info": "",
                "lineage": "43701"
            }
        ],
        "438": [],
        "439": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom scipy.spatial.distance import pdist\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"Statistical Assumptions: Independence, Normality\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Statistical Assumptions: Independence, Normality",
                "lineage": "43901"
            }
        ],
        "440": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "44001"
            }
        ],
        "441": [],
        "442": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.7870341448628095,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "44201"
            }
        ],
        "443": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    low_confidence_threshold = 0.5\n    low_confidence_count = np.sum(biological_similarity < low_confidence_threshold)\n    total_predictions = biological_similarity.size\n    low_confidence_percentage = (low_confidence_count / total_predictions) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Low-confidence predictions: 99.98%",
                "lineage": "44301"
            }
        ],
        "444": [],
        "445": [],
        "446": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_components = min(50, n_features)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Prior distributions: Normalization applied, highly variable genes selected.\")\n    print(f\"Structural assumptions: PCA used for low-dimensional representation.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Prior distributions: Normalization applied, highly variable genes selected.\nStructural assumptions: PCA used for low-dimensional representation.",
                "lineage": "44601"
            }
        ],
        "447": [],
        "448": [],
        "449": [],
        "450": [],
        "451": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "45101"
            }
        ],
        "452": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    for col in adata_int.X.T:\n        print(f\"Skewness: {skew(col)}, Kurtosis: {kurtosis(col)}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Skewness: 7.392869637423571, Kurtosis: 62.2767291868025\nSkewness: 49.09266172820795, Kurtosis: 2436.8269304561277\nSkewness: 9.11784787143567, Kurtosis: 83.88498569055655\nSkewness: 3.291921800142514, Kurtosis: 10.069818289055084\nSkewness: 35.015395535243705, Kurtosis: 1231.0230686607683\nSkewness: 7.153209079478076, Kurtosis: 59.04764796369324\nSkewness: 31.291157991083143, Kurtosis: 983.0674824502918\nSkewness: 48.87979930273817, Kurtosis: 2422.768485156067\nSkewness: 28.483608499153693, Kurtosis: 815.6488340893692\nSkewness: 31.27279455219149, Kurtosis: 985.0088839131365\nSkewness: 14.027269739586938, Kurtosis: 201.73246992726686\nSkewness: 11.13897022906373, Kurtosis: 125.7094062089462\nSkewness: 11.678760647367662, Kurtosis: 138.09021323778043\nSkewness: 6.697215299756492, Kurtosis: 47.8301175894543\nSkewness: 31.154143303772777, Kurtosis: 977.351025452906\nSkewness: 49.06930156996479, Kurtosis: 2435.2636920891728\nSkewness: 69.47763231691354, Kurtosis: 4881.6934246949495\nSkewness: 16.236060749664578, Kurtosis: 262.7850059745693\nSkewness: 68.28881598739625, Kurtosis: 4771.0570585512\nSkewness: 34.87586593675698, Kurtosis: 1224.5002535768863\nSkewness: 47.74777206561639, Kurtosis: 2348.92455058569\nSkewness: 24.832321329074617, Kurtosis: 616.4590897700664\nSkewness: 31.11946450777929, Kurtosis: 975.9162680236799\nSkewness: 48.32917183198914, Kurtosis: 2386.561261835019\nSkewness: 2.181140775619614, Kurtosis: 3.539088046530206\nSkewness: 68.24997658149256, Kurtosis: 4766.811515903243\nSkewness: 69.47763231691357, Kurtosis: 4881.693424694952\nSkewness: 66.60827497677882, Kurtosis: 4615.932153723136\nSkewness: 24.82923496366406, Kurtosis: 616.324012772113\nSkewness: 28.043532748127987, Kurtosis: 798.9475710872622\nSkewness: 40.22273306446366, Kurtosis: 1631.0605563993727\nSkewness: 24.6866177099693, Kurtosis: 611.6203680301605\nSkewness: 34.912936408617924, Kurtosis: 1226.241157526688\nSkewness: 30.79541485157035, Kurtosis: 962.464692543452\nSkewness: 26.249492696703538, Kurtosis: 694.406943154",
                "lineage": "45201"
            }
        ],
        "453": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if \"X_pca\" not in adata_int.obsm:\n        sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.7863512147403943,
                "model_info": "",
                "lineage": "45301"
            }
        ],
        "454": [],
        "455": [],
        "456": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.820837697174646,
                "model_info": "",
                "lineage": "45601"
            }
        ],
        "457": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\nimport time\nimport tracemalloc\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    tracemalloc.start()\n    start_time = time.time()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    logging.info(\"Log transformation applied.\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    logging.info(\"Combat batch correction applied.\")\n    \n    sc.pp.scale(adata_int, max_value=10)\n    logging.info(\"Data scaled with max value of 10.\")\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    logging.info(\"PCA completed with 50 components.\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    end_time = time.time()\n    current, peak = tracemalloc.get_traced_memory()\n    tracemalloc.stop()\n    \n    logging.info(f\"Execution time: {end_time - start_time:.2f} seconds\")\n    logging.info(f\"Current memory usage: {current / 10**6:.2f} MB; Peak memory usage: {peak / 10**6:.2f} MB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "45701"
            }
        ],
        "458": [],
        "459": [],
        "460": [],
        "461": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.neighbors import NearestNeighbors\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    nbrs = NearestNeighbors(n_neighbors=10).fit(adata_int.obsm[\"X_pca\"])\n    distances, indices = nbrs.kneighbors(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distances)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.3506574380132248,
                "model_info": "",
                "lineage": "46101"
            }
        ],
        "462": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_pca\"] = pca_result\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.8185047406012904,
                "model_info": "",
                "lineage": "46201"
            }
        ],
        "463": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    reducer = umap.UMAP()\n    adata_int.obsm[\"X_emb\"] = reducer.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.6211727400490272,
                "model_info": "",
                "lineage": "46301"
            }
        ],
        "464": [],
        "465": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.linear_model import LogisticRegression\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    model = LogisticRegression()\n    param_grid = {'C': [0.01, 0.1, 1, 10, 100]}\n    grid_search = GridSearchCV(model, param_grid, cv=5)\n    grid_search.fit(adata_int.obsm[\"X_emb\"], adata_int.obs[\"batch\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "46501"
            }
        ],
        "466": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]  # Use PCA embeddings directly for integration\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "46601"
            }
        ],
        "467": [],
        "468": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)  \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int.uns['custom_metric'] = np.mean(np.var(adata_int.X, axis=0))\n    adata_int.uns['batch_effect_check'] = np.mean(adata_int.obs['batch'].value_counts())\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "46801"
            }
        ],
        "469": [],
        "470": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "47001"
            }
        ],
        "471": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.820837697174646,
                "model_info": "",
                "lineage": "47101"
            }
        ],
        "472": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    batch_labels = adata_int.obs[\"batch\"].values\n    rf_classifier = RandomForestClassifier()\n    rf_classifier.fit(adata_int.obsm[\"X_pca\"], batch_labels)\n    predictions = rf_classifier.predict(adata_int.obsm[\"X_pca\"])\n    \n    silhouette = silhouette_score(adata_int.obsm[\"X_pca\"], predictions)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "47201"
            }
        ],
        "473": [],
        "474": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    missing_counts = np.isnan(adata_int.X).sum(axis=0)\n    print(\"Missing values per feature:\", missing_counts)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Missing values per feature: [0 0 0 ... 0 0 0]",
                "lineage": "47401"
            }
        ],
        "475": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    adata_int.uns['biological_signal_preservation'] = np.mean(np.var(adata_int.X, axis=0))\n    adata_int.uns['batch_count'] = adata_int.obs['batch'].value_counts().to_dict()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "47501"
            }
        ],
        "476": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    tsne = TSNE(n_components=2, random_state=42)\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6448212239488771,
                "model_info": "",
                "lineage": "47601"
            }
        ],
        "477": [],
        "478": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7870341448628095,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "47801"
            }
        ],
        "479": [],
        "480": [],
        "481": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    print(\"Starting tuso_model with parameters:\")\n    print(f\"Normalized layer present: {'normalized' in adata.layers}\")\n    print(f\"Number of highly variable genes: 2000\")\n    print(f\"Batch key: 'batch'\")\n    \n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"Model completed. Checkpoint summaries:\")\n    print(f\"Number of cells after filtering: {adata_int.n_obs}\")\n    print(f\"Number of genes after filtering: {adata_int.n_vars}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Starting tuso_model with parameters:\nNormalized layer present: True\nNumber of highly variable genes: 2000\nBatch key: 'batch'\nModel completed. Checkpoint summaries:\nNumber of cells after filtering: 5000\nNumber of genes after filtering: 2000",
                "lineage": "48101"
            }
        ],
        "482": [],
        "483": [],
        "484": [],
        "485": [],
        "486": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nclass SimpleNN(nn.Module):\n    def __init__(self, input_dim, hidden_dim):\n        super(SimpleNN, self).__init__()\n        self.fc1 = nn.Linear(input_dim, hidden_dim)\n        self.relu = nn.ReLU()\n        self.fc2 = nn.Linear(hidden_dim, input_dim)\n\n    def forward(self, x):\n        x = self.fc1(x)\n        x = self.relu(x)\n        x = self.fc2(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    input_dim = adata_int.X.shape[1]\n    hidden_dim = 100\n    model = SimpleNN(input_dim, hidden_dim)\n    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n    criterion = nn.MSELoss()\n\n    adata_tensor = torch.tensor(adata_int.X, dtype=torch.float32)\n    for epoch in range(100):\n        model.train()\n        optimizer.zero_grad()\n        output = model(adata_tensor)\n        loss = criterion(output, adata_tensor)\n        loss.backward()\n        optimizer.step()\n\n    with torch.no_grad():\n        model.eval()\n        reduced_data = model(adata_tensor).detach().numpy()\n\n    distance_matrix = pairwise_distances(reduced_data)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8151844338042519,
                "model_info": "",
                "lineage": "48601"
            }
        ],
        "487": [],
        "488": [],
        "489": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7870341448628095,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "48901"
            }
        ],
        "490": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Shapes: X: {adata_int.X.shape}, X_pca: {adata_int.obsm['X_pca'].shape}, X_emb: {adata_int.obsm['X_emb'].shape}\")\n    print(f\"Data types: X: {adata_int.X.dtype}, X_pca: {adata_int.obsm['X_pca'].dtype}, X_emb: {adata_int.obsm['X_emb'].dtype}\")\n\n    if not np.all(np.isfinite(adata_int.obsm[\"X_emb\"])):\n        print(\"Warning: Non-i.i.d. data detected in embedding.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Shapes: X: (5000, 2000), X_pca: (5000, 50), X_emb: (5000, 5000)\nData types: X: float64, X_pca: float32, X_emb: float32",
                "lineage": "49001"
            }
        ],
        "491": [],
        "492": [],
        "493": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    X_train, X_val, y_train, y_val = train_test_split(adata_int.X, adata_int.obs['batch'], test_size=0.2, random_state=42)\n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_val_scaled = scaler.transform(X_val)\n    \n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_train_scaled, y_train)\n    predictions = model.predict(X_val_scaled)\n    \n    accuracy = accuracy_score(y_val, predictions)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "49301"
            }
        ],
        "494": [],
        "495": [],
        "496": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    if \"X_pca\" not in adata_int.obsm.keys():\n        sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7863512147403943,
                "model_info": "",
                "lineage": "49601"
            }
        ],
        "497": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8222377841304714,
                "model_info": "",
                "lineage": "49701"
            }
        ],
        "498": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if adata_int.n_obs < 1000:\n        print(\"Warning: Insufficient training data volume for robust integration.\")\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "49801"
            }
        ],
        "499": [],
        "500": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "50001"
            }
        ],
        "501": [],
        "502": [],
        "503": [],
        "504": [],
        "505": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50, svd_solver='arpack')\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.820837697174646,
                "model_info": "",
                "lineage": "50501"
            }
        ],
        "506": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7870341448628095,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "50601"
            }
        ],
        "507": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Number of trainable parameters: {np.prod(adata_int.shape)}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Number of trainable parameters: 10000000",
                "lineage": "50701"
            }
        ],
        "508": [],
        "509": [],
        "510": [],
        "511": [],
        "512": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if \"X_pca\" not in adata_int.obsm:\n        sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7863512147403943,
                "model_info": "",
                "lineage": "51201"
            }
        ],
        "513": [],
        "514": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "51401"
            }
        ],
        "515": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "51501"
            }
        ],
        "516": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    distance_matrix = pairwise_distances(adata_int.X)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8189316018387475,
                "model_info": "",
                "lineage": "51601"
            }
        ],
        "517": [],
        "518": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    missing_counts = np.sum(np.isnan(adata_int.X), axis=0)\n    print(\"Missing values per feature post-imputation:\", missing_counts)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Missing values per feature post-imputation: [0 0 0 ... 0 0 0]",
                "lineage": "51801"
            }
        ],
        "519": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    if len(set(adata_int.obs[\"batch\"])) > 1:\n        batch_counts = adata_int.obs[\"batch\"].value_counts()\n        print(f\"Batch distribution: {batch_counts.to_dict()}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Batch distribution: {'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69}",
                "lineage": "51901"
            }
        ],
        "520": [],
        "521": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.leiden(adata_int)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "52101"
            }
        ],
        "522": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Configuration Parameters: n_top_genes=2000, n_comps=50, max_value=10\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Configuration Parameters: n_top_genes=2000, n_comps=50, max_value=10",
                "lineage": "52201"
            }
        ],
        "523": [],
        "524": [],
        "525": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    missing_values_count = np.isnan(adata_int.X).sum(axis=0)\n    print(\"Missing values per feature post-imputation:\", missing_values_count)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Missing values per feature post-imputation: [0 0 0 ... 0 0 0]",
                "lineage": "52501"
            }
        ],
        "526": [],
        "527": [],
        "528": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    clustering_labels = RandomForestClassifier().fit(adata_int.obsm[\"X_emb\"], adata_int.obs[\"batch\"]).predict(adata_int.obsm[\"X_emb\"])\n    silhouette = silhouette_score(adata_int.obsm[\"X_emb\"], clustering_labels)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "52801"
            }
        ],
        "529": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    scaler = StandardScaler()\n    scaled_data = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(scaled_data)\n    \n    model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500)\n    model.fit(pca_result, pca_result)\n    adata_int.obsm[\"X_emb\"] = model.predict(pca_result)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8056925842389656,
                "model_info": "",
                "lineage": "52901"
            }
        ],
        "530": [],
        "531": [],
        "532": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if not np.all(np.isfinite(adata_int.X)):\n        print(\"Warning: Non-finite values detected in adata_int.X\")\n    if adata_int.shape[0] < 100:\n        print(\"Warning: Sample size is less than 100, may affect integration results.\")\n    print(f\"Assumptions: Highly variable genes selected: {np.sum(adata_int.var['highly_variable'])} out of {adata.shape[1]}.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Assumptions: Highly variable genes selected: 2000 out of 29335.",
                "lineage": "53201"
            }
        ],
        "533": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if not np.all(np.isfinite(adata_int.X)):\n        print(\"Data contains non-finite values.\")\n    if np.any(np.isnan(adata_int.X)):\n        print(\"Data contains NaN values.\")\n    if np.any(np.isinf(adata_int.X)):\n        print(\"Data contains infinite values.\")\n    \n    print(\"Statistical assumptions: Independence, Normality\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Statistical assumptions: Independence, Normality",
                "lineage": "53301"
            }
        ],
        "534": [],
        "535": [],
        "536": [],
        "537": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    explained_variance = adata_int.uns['pca']['variance'][0:50]\n    cumulative_variance = np.cumsum(explained_variance)\n    n_components = np.argmax(cumulative_variance >= 0.95) + 1\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.39917057581554244,
                "model_info": "",
                "lineage": "53701"
            }
        ],
        "538": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(\"Assumptions: Normalization using log1p, batch correction via ComBat, PCA for dimensionality reduction.\")\n    print(\"Causal Directions: Batch effects assumed to be corrected before similarity computation.\")\n    print(\"Conditional Independencies: Assumed independence of biological variability after batch correction.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Assumptions: Normalization using log1p, batch correction via ComBat, PCA for dimensionality reduction.\nCausal Directions: Batch effects assumed to be corrected before similarity computation.\nConditional Independencies: Assumed independence of biological variability after batch correction.",
                "lineage": "53801"
            }
        ],
        "539": [],
        "540": [],
        "541": [],
        "542": [],
        "543": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    n_clusters = len(adata_int.obs['batch'].unique())\n    kmeans = KMeans(n_clusters=n_clusters, random_state=0)\n    adata_int.obs['kmeans_labels'] = kmeans.fit_predict(adata_int.obsm[\"X_emb\"])\n\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], adata_int.obs['kmeans_labels'])\n    adata_int.uns['silhouette_score'] = silhouette_avg\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "54301"
            }
        ],
        "544": [],
        "545": [],
        "546": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    model = MLPRegressor(hidden_layer_sizes=(128, 64, 32), activation='relu', solver='adam', max_iter=100)\n    model.fit(adata_int.X, adata_int.X)\n    \n    adata_int.obsm[\"X_emb\"] = model.predict(adata_int.X)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8164122789741044,
                "model_info": "",
                "lineage": "54601"
            }
        ],
        "547": [],
        "548": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    logging.info(f\"Device: {np.__version__}, Scanpy: {sc.__version__}\")\n    \n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' not in adata_int.obs:\n        logging.warning(\"Batch information is missing from adata.obs\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    logging.info(f\"Final evaluation metrics: {{'n_obs': {adata_int.n_obs}, 'n_vars': {adata_int.n_vars}}}\")\n    logging.info(f\"Embedding shape: {adata_int.obsm['X_emb'].shape}, Batch info: {adata_int.obs['batch'].unique()}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "54801"
            }
        ],
        "549": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8128118683228713,
                "model_info": "",
                "lineage": "54901"
            }
        ],
        "550": [],
        "551": [],
        "552": [],
        "553": [],
        "554": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "55401"
            }
        ],
        "555": [],
        "556": [],
        "557": [],
        "558": [],
        "559": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "55901"
            }
        ],
        "560": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    trainable_params = 0\n    frozen_params = 0\n    logging.info(f'Trainable parameters: {trainable_params}, Frozen parameters: {frozen_params}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "56001"
            }
        ],
        "561": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.calibration import calibration_curve\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    start_time = time.time()\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    data_loading_time = time.time() - start_time\n    print(f\"Data loading time for highly variable genes: {data_loading_time:.4f} seconds\")\n    \n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    start_time = time.time()\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    data_loading_time = time.time() - start_time\n    print(f\"Data loading time for preprocessing: {data_loading_time:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=50)\n    data_loading_time = time.time() - start_time\n    print(f\"Data loading time for PCA: {data_loading_time:.4f} seconds\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if not np.all(np.isfinite(adata_int.X)):\n        print(\"Warning: Data contains NaN or infinite values.\")\n    else:\n        batch_labels = adata_int.obs[\"batch\"].values\n        if len(batch_labels) == biological_similarity.size:\n            prob_true, prob_pred = calibration_curve(batch_labels, biological_similarity.flatten(), n_bins=10)\n            ece = np.mean(np.abs(prob_true - prob_pred))\n            print(f\"Calibration Error (ECE): {ece:.4f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Data loading time for highly variable genes: 0.4489 seconds\nData loading time for preprocessing: 1.2752 seconds\nData loading time for PCA: 2.0448 seconds",
                "lineage": "56101"
            }
        ],
        "562": [],
        "563": [],
        "564": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    skewness = adata_int.X.mean(axis=0) - adata_int.X.std(axis=0)\n    kurtosis = adata_int.X.mean(axis=0) - 3 * adata_int.X.std(axis=0)**2\n    print(\"Skewness:\", skewness)\n    print(\"Kurtosis:\", kurtosis)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Skewness: [-0.9371867  -0.21576422 -0.94692447 ... -0.73902886 -0.26539308\n -0.15505945]\nKurtosis: [-2.60969745 -0.1360728  -2.667738   ... -1.58256408 -0.20198607\n -0.07341743]",
                "lineage": "56401"
            }
        ],
        "565": [],
        "566": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    adata_int.uns['custom_metrics'] = {\n        'mean_expression': np.mean(adata_int.X, axis=0),\n        'batch_effect_variance': np.var(adata_int.X, axis=0) - np.var(adata_int.X[adata_int.obs['batch'] == 'batch1'], axis=0),\n        'sparsity': np.mean(np.sum(adata_int.X > 0, axis=1) / adata_int.X.shape[1])\n    }\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "56601"
            }
        ],
        "567": [],
        "568": [],
        "569": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    model = MLPRegressor(hidden_layer_sizes=(128, 64, 32), activation='relu', solver='adam', max_iter=50, batch_size=32)\n    model.fit(adata_int.X, adata_int.X)\n    \n    adata_int.obsm[\"X_emb\"] = model.predict(adata_int.X)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8174087656282687,
                "model_info": "",
                "lineage": "56901"
            }
        ],
        "570": [],
        "571": [],
        "572": [],
        "573": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.n_vars)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print({\n        \"n_batches\": adata_int.obs[\"batch\"].nunique(),\n        \"n_cells\": adata_int.n_obs,\n        \"n_genes\": adata_int.n_vars,\n        \"pca_variance_ratio\": adata_int.uns[\"pca\"][\"variance_ratio\"][:5].tolist(),\n        \"combat_batch_effect_reduction\": np.mean(distance_matrix)\n    })\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "{'n_batches': 12, 'n_cells': 5000, 'n_genes': 2000, 'pca_variance_ratio': [0.06512567612903396, 0.02810437568001084, 0.01881424599111393, 0.014732972629201186, 0.010740282470949108], 'combat_batch_effect_reduction': 20.926092}",
                "lineage": "57301"
            }
        ],
        "574": [],
        "575": [],
        "576": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"Parameter change magnitude:\", np.linalg.norm(adata_int.X - adata_int.layers[\"normalized\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Parameter change magnitude: 1930.596040014745",
                "lineage": "57601"
            }
        ],
        "577": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "57701"
            }
        ],
        "578": [],
        "579": [],
        "580": [],
        "581": [],
        "582": [],
        "583": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Missing value tolerance: {np.isnan(adata_int.X).sum()} missing values found in the data.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Missing value tolerance: 0 missing values found in the data.",
                "lineage": "58301"
            }
        ],
        "584": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.leiden(adata_int)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "58401"
            }
        ],
        "585": [],
        "586": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "58601"
            }
        ],
        "587": [],
        "588": [],
        "589": [],
        "590": [],
        "591": [],
        "592": [],
        "593": [],
        "594": [],
        "595": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if \"X_pca\" not in adata_int.obsm:\n        sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7863512147403943,
                "model_info": "",
                "lineage": "59501"
            }
        ],
        "596": [],
        "597": [],
        "598": [],
        "599": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.normalize_total(adata_int, target_sum=1e6)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6273064525393003,
                "model_info": "",
                "lineage": "59901"
            }
        ],
        "600": [],
        "601": [],
        "602": [],
        "603": [],
        "604": [],
        "605": [],
        "606": [],
        "607": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7951554466476253,
                "model_info": "",
                "lineage": "60701"
            }
        ],
        "608": [],
        "609": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    inference_latency = time.time() - start_time\n\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Inference Latency: {inference_latency:.4f} seconds\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Inference Latency: 1.2569 seconds",
                "lineage": "60901"
            }
        ],
        "610": [],
        "611": [],
        "612": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = distance_matrix\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.669130469541332,
                "model_info": "",
                "lineage": "61201"
            }
        ],
        "613": [],
        "614": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    final_metrics = {\n        \"num_cells\": adata_int.n_obs,\n        \"num_genes\": adata_int.n_vars,\n        \"num_batches\": adata_int.obs['batch'].nunique(),\n        \"highly_variable_genes\": adata_int.var['highly_variable'].sum()\n    }\n    print(final_metrics)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "{'num_cells': 5000, 'num_genes': 2000, 'num_batches': 12, 'highly_variable_genes': 2000}",
                "lineage": "61401"
            }
        ],
        "615": [],
        "616": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int, n_neighbors=10, use_rep='X_pca')\n    adjacency_matrix = adata_int.obsp['connectivities'].toarray()\n    biological_similarity = np.exp(-pairwise_distances(adjacency_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6249263834915121,
                "model_info": "",
                "lineage": "61601"
            }
        ],
        "617": [],
        "618": [],
        "619": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import shapiro\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    if not shapiro(adata_int.X.flatten())[1] > 0.05:\n        adata_int = adata_int[~np.isnan(adata_int.X).any(axis=1)]\n        sc.pp.log1p(adata_int)\n        sc.pp.combat(adata_int, key='batch')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "61901"
            }
        ],
        "620": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    if \"X_pca\" not in adata_int.obsm:\n        sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7863512147403943,
                "model_info": "",
                "lineage": "62001"
            }
        ],
        "621": [],
        "622": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    print(\"Input feature scaling: Log1p and Combat normalization applied, followed by scaling.\")\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Input feature scaling: Log1p and Combat normalization applied, followed by scaling.",
                "lineage": "62201"
            }
        ],
        "623": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"NaN counts:\", np.isnan(adata_int.X).sum())\n    print(\"Inf counts:\", np.isinf(adata_int.X).sum())\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "NaN counts: 0\nInf counts: 0",
                "lineage": "62301"
            }
        ],
        "624": [],
        "625": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_emb\"] / np.max(adata_int.obsm[\"X_emb\"], axis=1, keepdims=True)\n    \n    print(f\"Dataset statistics: {adata_int.shape[0]} cells, {adata_int.shape[1]} genes, {adata_int.obs['batch'].nunique()} batches.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Dataset statistics: 5000 cells, 2000 genes, 12 batches.",
                "lineage": "62501"
            }
        ],
        "626": [],
        "627": [],
        "628": [],
        "629": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if adata_int.shape[1] < 50:\n        n_components = adata_int.shape[1]\n    else:\n        n_components = 50\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    z_scores = (adata_int.X - np.mean(adata_int.X, axis=0)) / np.std(adata_int.X, axis=0)\n    outliers = np.sum(np.abs(z_scores) > 3)\n    print(f\"Detected outliers: {outliers}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Detected outliers: 111153",
                "lineage": "62901"
            }
        ],
        "630": [],
        "631": [],
        "632": [],
        "633": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_components = min(50, n_features)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Highly variable genes: {np.sum(adata_int.var['highly_variable'])}\")\n    print(f\"Number of components in PCA: {adata_int.obsm['X_pca'].shape[1]}\")\n    print(f\"Batch integration method used: Combat\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Highly variable genes: 2000\nNumber of components in PCA: 50\nBatch integration method used: Combat",
                "lineage": "63301"
            }
        ],
        "634": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    kmeans = KMeans(n_clusters=len(adata_int.obs['batch'].unique()), random_state=42)\n    adata_int.obs['kmeans_labels'] = kmeans.fit_predict(adata_int.obsm[\"X_emb\"])\n\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], adata_int.obs['kmeans_labels'])\n    print(f'Silhouette Score: {silhouette_avg}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Silhouette Score: 0.0002677305892575532",
                "lineage": "63401"
            }
        ],
        "635": [],
        "636": [],
        "637": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport tracemalloc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    start_time = time.time()\n    tracemalloc.start()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    end_time = time.time()\n    current, peak = tracemalloc.get_traced_memory()\n    tracemalloc.stop()\n\n    print(f\"Run-time: {end_time - start_time:.2f} seconds\")\n    print(f\"Memory consumption: {current / 10**6:.2f} MB; Peak: {peak / 10**6:.2f} MB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Run-time: 4.97 seconds\nMemory consumption: 387.18 MB; Peak: 487.18 MB",
                "lineage": "63701"
            }
        ],
        "638": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import zscore\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    n_features = adata_int.shape[1]\n    n_components = min(50, n_features)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    z_scores = zscore(adata_int.X, axis=0)\n    outlier_counts = np.sum(np.abs(z_scores) > 3, axis=0)\n    print(f\"Detected outliers (count per feature): {outlier_counts}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Detected outliers (count per feature): [117   2  67 ...  34   3   1]",
                "lineage": "63801"
            }
        ],
        "639": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int, n_neighbors=15, use_rep='X_pca')\n    adata_int.obsm[\"X_emb\"] = umap.UMAP().fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6328943145516687,
                "model_info": "",
                "lineage": "63901"
            }
        ],
        "640": [],
        "641": [],
        "642": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    if adata.X is not None and adata.X.ndim > 1:\n        mean_before = np.mean(adata.X, axis=0)\n        std_before = np.std(adata.X, axis=0)\n        min_before = np.min(adata.X, axis=0)\n        max_before = np.max(adata.X, axis=0)\n    else:\n        mean_before = std_before = min_before = max_before = None\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if adata_int.X is not None and adata_int.X.ndim > 1:\n        mean_after = np.mean(adata_int.X, axis=0)\n        std_after = np.std(adata_int.X, axis=0)\n        min_after = np.min(adata_int.X, axis=0)\n        max_after = np.max(adata_int.X, axis=0)\n    else:\n        mean_after = std_after = min_after = max_after = None\n\n    print(f\"Before scaling: mean={mean_before}, std={std_before}, min={min_before}, max={max_before}\")\n    print(f\"After scaling: mean={mean_after}, std={std_after}, min={min_after}, max={max_after}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Before scaling: mean=None, std=None, min=None, max=None\nAfter scaling: mean=[-0.0054832  -0.01425546 -0.00476938 ... -0.01652392 -0.01722729\n -0.01213666], std=[0.9317035  0.20150876 0.94215509 ... 0.72250494 0.24816579 0.14292279], min=[-0.33341273 -0.05449624 -0.13809829 ... -0.12121406 -0.16158649\n -0.04856623], max=[10. 10. 10. ... 10. 10. 10.]",
                "lineage": "64201"
            }
        ],
        "643": [],
        "644": [],
        "645": [],
        "646": [],
        "647": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.linear_model import LogisticRegression\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "64701"
            }
        ],
        "648": [],
        "649": [],
        "650": [],
        "651": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.cluster import AgglomerativeClustering\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    clustering = AgglomerativeClustering(n_clusters=None, distance_threshold=0.5)\n    clustering.fit(biological_similarity)\n    adata_int.obs['clusters'] = clustering.labels_\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "65101"
            }
        ],
        "652": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "65201"
            }
        ],
        "653": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    \n    attention_weights = np.matmul(biological_similarity, adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = attention_weights / np.sum(attention_weights, axis=1, keepdims=True)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.5500294365652483,
                "model_info": "",
                "lineage": "65301"
            }
        ],
        "654": [],
        "655": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    low_confidence_threshold = 0.5\n    low_confidence_count = np.sum(np.max(biological_similarity, axis=1) < low_confidence_threshold)\n    total_count = biological_similarity.shape[0]\n    low_confidence_percentage = (low_confidence_count / total_count) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Low-confidence predictions: 0.00%",
                "lineage": "65501"
            }
        ],
        "656": [],
        "657": [],
        "658": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    batch_means = adata_int.obsm[\"X_pca\"].mean(axis=0)\n    adata_int.obsm[\"X_pca\"] = adata_int.obsm[\"X_pca\"] - batch_means\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "65801"
            }
        ],
        "659": [],
        "660": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    scaler = StandardScaler()\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_pca\"] = pca_result\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8059395475946838,
                "model_info": "",
                "lineage": "66001"
            }
        ],
        "661": [],
        "662": [],
        "663": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int.uns['custom_metrics'] = {}\n    adata_int.uns['custom_metrics']['biological_relevance'] = np.mean(biological_similarity)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "66301"
            }
        ],
        "664": [],
        "665": [],
        "666": [],
        "667": [],
        "668": [],
        "669": [],
        "670": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    param_change_magnitude = np.linalg.norm(adata_int.X - adata_int.layers[\"normalized\"])\n    print(f\"Parameter change magnitude: {param_change_magnitude}\")\n\n    nan_count = np.isnan(adata_int.X).sum()\n    inf_count = np.isinf(adata_int.X).sum()\n    print(f\"NaN count: {nan_count}, Inf count: {inf_count}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Parameter change magnitude: 1930.596040014745\nNaN count: 0, Inf count: 0",
                "lineage": "67001"
            }
        ],
        "671": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = distance_matrix\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.669130469541332,
                "model_info": "",
                "lineage": "67101"
            }
        ],
        "672": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_pca_components = min(50, n_features)\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity / np.max(biological_similarity, axis=1, keepdims=True)\n\n    print(f\"NaN counts: {np.isnan(adata_int.X).sum()}\")\n    print(f\"Inf counts: {np.isinf(adata_int.X).sum()}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "NaN counts: 0\nInf counts: 0",
                "lineage": "67201"
            }
        ],
        "673": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    feature_ranges = {feature: (adata_int.X[:, i].min(), adata_int.X[:, i].max()) for i, feature in enumerate(adata_int.var_names)}\n    print(\"Feature Ranges:\", feature_ranges)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Feature Ranges: {'ENSG00000175899': (-0.33341272937259253, 10.0), 'ENSG00000166535': (-0.054496244502945636, 10.0), 'ENSG00000008311': (-0.1380982899562542, 10.0), 'ENSG00000165029': (-0.37713023152236724, 6.172028413733001), 'ENSG00000004846': (-0.05077833286505981, 10.0), 'ENSG00000108846': (-0.32315583851310486, 10.0), 'ENSG00000267457': (-0.06068555033905239, 10.0), 'ENSG00000275936': (-0.06989457644542921, 10.0), 'ENSG00000268812': (-0.10220161333663674, 10.0), 'ENSG00000278765': (-0.06653167624532388, 10.0), 'ENSG00000225792': (-0.1355842126579496, 10.0), 'ENSG00000269927': (-0.13105638502144043, 10.0), 'ENSG00000259768': (-0.11188394774489241, 10.0), 'ENSG00000266378': (-0.2228685618439662, 10.0), 'ENSG00000231412': (-0.08671566347655385, 10.0), 'ENSG00000260107': (-0.05530818189497423, 10.0), 'ENSG00000263708': (-0.0933560647776456, 10.0), 'ENSG00000279762': (-0.0816310560551351, 10.0), 'ENSG00000243762': (-0.08764093308116289, 10.0), 'ENSG00000282022': (-0.06904853312132599, 10.0), 'ENSG00000228384': (-0.11700741917479876, 10.0), 'ENSG00000230408': (-0.06480688824999381, 10.0), 'ENSG00000259843': (-0.10421688201046142, 10.0), 'ENSG00000270823': (-0.07047849938654886, 10.0), 'ENSG00000262202': (-0.5580981621844671, 5.446432985813981), 'ENSG00000284797': (-0.051068012480050516, 10.0), 'ENSG00000250258': (-0.09335606477764558, 10.0), 'ENSG00000267646': (-0.09540228140640346, 10.0), 'ENSG00000255441': (-0.06896469491954785, 10.0), 'ENSG00000267984': (-0.12751491991762756, 10.0), 'ENSG00000250453': (-0.07045348831941169, 10.0), 'ENSG00000250240': (-0.10958956074198949, 10.0), 'ENSG00000247372': (-0.07121749216582296, 10.0), 'ENSG00000237705': (-0.12604550838832984, 10.0), 'ENSG00000272081': (-0.08467899497578332, 10.0), 'ENSG00000273576': (-0.0920299838445183, 10.0), 'ENSG00000254162': (-0.0768746470522817, 10.0), 'ENSG00000270277': (-0.07939064780573199, 10.0), 'ENSG00000258101': (-0.09000734872080152, 10.0), 'ENSG00000257696': (-0.07316816407299091, 10.0), 'EN",
                "lineage": "67301"
            }
        ],
        "674": [
            {
                "code": "import scanpy as sc\nfrom sklearn.decomposition import PCA\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8222377841304714,
                "model_info": "",
                "lineage": "67401"
            }
        ],
        "675": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    skewness = skew(adata_int.X, axis=0)\n    kurt = kurtosis(adata_int.X, axis=0)\n    print(\"Skewness:\", skewness)\n    print(\"Kurtosis:\", kurt)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Skewness: [ 7.39286964 49.09266173  9.11784787 ... 12.91782697 39.44912459\n 68.75146569]\nKurtosis: [  62.27672919 2436.82693046   83.88498569 ...  169.01714286 1589.85221408\n 4813.4961606 ]",
                "lineage": "67501"
            }
        ],
        "676": [],
        "677": [],
        "678": [],
        "679": [],
        "680": [],
        "681": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom sklearn.preprocessing import StandardScaler\n\nclass SimpleNN(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(SimpleNN, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, 64)\n        self.fc3 = nn.Linear(64, output_dim)\n\n    def forward(self, x):\n        x = torch.relu(self.fc1(x))\n        x = torch.relu(self.fc2(x))\n        x = self.fc3(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    scaler = StandardScaler()\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    model = SimpleNN(input_dim=X_scaled.shape[1], output_dim=50)\n    model.eval()\n\n    with torch.no_grad():\n        X_tensor = torch.FloatTensor(X_scaled)\n        embeddings = model(X_tensor).numpy()\n\n    distance_matrix = pairwise_distances(embeddings)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.48088662998198606,
                "model_info": "",
                "lineage": "68101"
            }
        ],
        "682": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    nan_count = np.isnan(adata_int.X).sum()\n    inf_count = np.isinf(adata_int.X).sum()\n    logging.info(f'NaN Count: {nan_count}, Inf Count: {inf_count}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "68201"
            }
        ],
        "683": [],
        "684": [],
        "685": [],
        "686": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    if 'pca' in adata_int.uns and 'explained_variance_ratio_' in adata_int.uns['pca']:\n        explained_variance = np.cumsum(adata_int.uns['pca']['explained_variance_ratio_'])\n        n_comps = np.argmax(explained_variance >= 0.95) + 1\n        sc.pp.pca(adata_int, n_comps=n_comps)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "68601"
            }
        ],
        "687": [],
        "688": [],
        "689": [],
        "690": [],
        "691": [],
        "692": [],
        "693": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity / biological_similarity.max(axis=1, keepdims=True)\n\n    z_scores = (adata_int.X - np.mean(adata_int.X, axis=0)) / np.std(adata_int.X, axis=0)\n    outliers = np.sum(np.abs(z_scores) > 3, axis=0)\n    print(f\"Detected outliers: {outliers}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Detected outliers: [117   2  67 ...  34   3   1]",
                "lineage": "69301"
            }
        ],
        "694": [],
        "695": [],
        "696": [],
        "697": [],
        "698": [],
        "699": [],
        "700": [],
        "701": [],
        "702": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata, batch_size=128, early_stopping_rounds=10):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "70201"
            }
        ],
        "703": [],
        "704": [],
        "705": [],
        "706": [],
        "707": [],
        "708": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom collections import Counter\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    class_balance_before = Counter(adata_int.obs[\"batch\"])\n    print(f\"Class balance before resampling: {class_balance_before}\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_pca_comps = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_comps)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    class_balance_after = Counter(adata_int.obs[\"batch\"])\n    print(f\"Class balance after resampling: {class_balance_after}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Class balance before resampling: Counter({'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69})\nClass balance after resampling: Counter({'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69})",
                "lineage": "70801"
            }
        ],
        "709": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if np.any(np.isnan(adata_int.X)) or np.any(np.isinf(adata_int.X)):\n        print(f\"NaN or Inf encountered: {np.sum(np.isnan(adata_int.X))} NaNs, {np.sum(np.isinf(adata_int.X))} Infs\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "70901"
            }
        ],
        "710": [],
        "711": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import zscore\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    z_scores = zscore(adata_int.X, axis=0)\n    outlier_counts = np.sum(np.abs(z_scores) > 3, axis=0)\n    print(\"Detected outliers per feature:\", outlier_counts)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Detected outliers per feature: [117   2  67 ...  34   3   1]",
                "lineage": "71101"
            }
        ],
        "712": [],
        "713": [],
        "714": [],
        "715": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Trainable parameters: {np.prod(adata_int.shape)}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Trainable parameters: 10000000",
                "lineage": "71501"
            }
        ],
        "716": [],
        "717": [],
        "718": [],
        "719": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nclass SimpleNN(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(SimpleNN, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, output_dim)\n\n    def forward(self, x):\n        x = torch.relu(self.fc1(x))\n        x = self.fc2(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    model = SimpleNN(input_dim=adata_int.X.shape[1], output_dim=50)\n    model.eval()\n    with torch.no_grad():\n        embeddings = model(torch.tensor(adata_int.X).float()).numpy()\n    \n    distance_matrix = pairwise_distances(embeddings)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.5719714953817614,
                "model_info": "",
                "lineage": "71901"
            }
        ],
        "720": [],
        "721": [],
        "722": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom sklearn.preprocessing import StandardScaler\n\nclass MLPRegressor(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(MLPRegressor, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, 64)\n        self.fc3 = nn.Linear(64, output_dim)\n        self.activation = nn.ReLU()\n\n    def forward(self, x):\n        x = self.activation(self.fc1(x))\n        x = self.activation(self.fc2(x))\n        x = self.fc3(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    scaler = StandardScaler()\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    model = MLPRegressor(input_dim=X_scaled.shape[1], output_dim=50)\n    model.eval()\n    with torch.no_grad():\n        X_emb = model(torch.tensor(X_scaled, dtype=torch.float32)).numpy()\n    \n    distance_matrix = pairwise_distances(X_emb)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.47960566220283174,
                "model_info": "",
                "lineage": "72201"
            }
        ],
        "723": [],
        "724": [],
        "725": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.n_vars))\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    evaluation_metrics = {\n        'n_cells': adata_int.n_obs,\n        'n_genes': adata_int.n_vars,\n        'n_batches': adata_int.obs['batch'].nunique(),\n        'highly_variable_genes': adata_int.var['highly_variable'].sum()\n    }\n    \n    print(f\"Final Evaluation Metrics: {evaluation_metrics}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Final Evaluation Metrics: {'n_cells': 5000, 'n_genes': 2000, 'n_batches': 12, 'highly_variable_genes': 2000}",
                "lineage": "72501"
            }
        ],
        "726": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.n_vars)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity / biological_similarity.max(axis=1, keepdims=True)\n\n    final_metrics = {\n        \"n_genes\": adata_int.n_vars,\n        \"n_cells\": adata_int.n_obs,\n        \"pca_components\": adata_int.obsm[\"X_pca\"].shape[1],\n        \"batch_integrated\": True\n    }\n    print(\"Final Evaluation Metrics:\", final_metrics)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Final Evaluation Metrics: {'n_genes': 2000, 'n_cells': 5000, 'pca_components': 50, 'batch_integrated': True}",
                "lineage": "72601"
            }
        ],
        "727": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int.uns['custom_metric'] = np.mean(biological_similarity, axis=1)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "72701"
            }
        ],
        "728": [],
        "729": [],
        "730": [],
        "731": [],
        "732": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(\"Model Configuration: n_top_genes=2000, n_comps=50\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Model Configuration: n_top_genes=2000, n_comps=50",
                "lineage": "73201"
            }
        ],
        "733": [],
        "734": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata, batch_size=128, weight_decay=0.01):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "73401"
            }
        ],
        "735": [],
        "736": [],
        "737": [],
        "738": [],
        "739": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    logging.info(\"Using Scanpy version: %s\", sc.__version__)\n    \n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "73901"
            }
        ],
        "740": [],
        "741": [],
        "742": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    mean_before = adata_int.X.mean(axis=0)\n    std_before = adata_int.X.std(axis=0)\n    min_before = adata_int.X.min(axis=0)\n    max_before = adata_int.X.max(axis=0)\n    \n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    mean_after = adata_int.X.mean(axis=0)\n    std_after = adata_int.X.std(axis=0)\n    min_after = adata_int.X.min(axis=0)\n    max_after = adata_int.X.max(axis=0)\n\n    print(f\"Before Scaling: mean={mean_before}, std={std_before}, min={min_before}, max={max_before}\")\n    print(f\"After Scaling: mean={mean_after}, std={std_after}, min={min_after}, max={max_after}\")\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Before Scaling: mean=[-0.0054832  -0.01425546 -0.00476938 ... -0.01652392 -0.01722729\n -0.01213666], std=[0.9317035  0.20150876 0.94215509 ... 0.72250494 0.24816579 0.14292279], min=[-0.33341273 -0.05449624 -0.13809829 ... -0.12121406 -0.16158649\n -0.04856623], max=[10. 10. 10. ... 10. 10. 10.]\nAfter Scaling: mean=[-0.0054832  -0.01425546 -0.00476938 ... -0.01652392 -0.01722729\n -0.01213666], std=[0.9317035  0.20150876 0.94215509 ... 0.72250494 0.24816579 0.14292279], min=[-0.33341273 -0.05449624 -0.13809829 ... -0.12121406 -0.16158649\n -0.04856623], max=[10. 10. 10. ... 10. 10. 10.]",
                "lineage": "74201"
            }
        ],
        "743": [],
        "744": [],
        "745": [],
        "746": [],
        "747": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    if 'pca' in adata_int.uns and 'explained_variance_ratio_' in adata_int.uns['pca']:\n        explained_variance = adata_int.uns['pca']['explained_variance_ratio_']\n        cumulative_variance = np.cumsum(explained_variance)\n        n_components = np.argmax(cumulative_variance >= 0.95) + 1\n        \n        sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "74701"
            }
        ],
        "748": [],
        "749": [],
        "750": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7951554466476253,
                "model_info": "",
                "lineage": "75001"
            }
        ],
        "751": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_pca_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Sparsity: {np.count_nonzero(adata_int.X) / adata_int.X.size:.2f}\")\n    print(f\"Low-rank representation: {np.linalg.matrix_rank(adata_int.X)}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Sparsity: 1.00\nLow-rank representation: 1981",
                "lineage": "75101"
            }
        ],
        "752": [],
        "753": [],
        "754": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n\n    if \"X_pca\" not in adata_int.obsm:\n        sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7863512147403943,
                "model_info": "",
                "lineage": "75401"
            }
        ],
        "755": [],
        "756": [],
        "757": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    scaler = StandardScaler()\n    adata_int.X = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    distance_matrix = pairwise_distances(pca_result)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8059395475946838,
                "model_info": "",
                "lineage": "75701"
            }
        ],
        "758": [],
        "759": [],
        "760": [],
        "761": [],
        "762": [],
        "763": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata, weight_decay=0.01, learning_rate=0.001, early_stopping=True):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if early_stopping:\n        # Implement early stopping logic based on validation performance\n        pass\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "76301"
            }
        ],
        "764": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Number of highly variable genes: {adata_int.shape[1]}\")\n    print(f\"Mean expression after normalization: {adata_int.X.mean()}\")\n    print(f\"Batch integration completed with {adata_int.obs['batch'].nunique()} batches.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Number of highly variable genes: 2000\nMean expression after normalization: -0.012085186786898738\nBatch integration completed with 12 batches.",
                "lineage": "76401"
            }
        ],
        "765": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.cluster import KMeans\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    kmeans = KMeans(n_clusters=len(np.unique(adata_int.obs['batch'])), random_state=42)\n    adata_int.obs['kmeans_labels'] = kmeans.fit_predict(adata_int.obsm[\"X_emb\"])\n    \n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], adata_int.obs['kmeans_labels'])\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "76501"
            }
        ],
        "766": [],
        "767": [],
        "768": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    cluster_labels = KMeans(n_clusters=10).fit_predict(adata_int.obsm[\"X_emb\"])\n    adata_int.obs['cluster_labels'] = cluster_labels\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], cluster_labels)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "76801"
            }
        ],
        "769": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Performance Snapshot: {adata_int.n_obs} cells, {adata_int.n_vars} genes, {np.mean(distance_matrix):.4f} avg distance\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Performance Snapshot: 5000 cells, 2000 genes, 20.9261 avg distance",
                "lineage": "76901"
            }
        ],
        "770": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "77001"
            }
        ],
        "771": [],
        "772": [],
        "773": [],
        "774": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport psutil\n\ndef tuso_model(adata):\n    start_time = time.time()\n    mem_before = psutil.Process().memory_info().rss\n    \n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.n_vars)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity_normalized = biological_similarity / np.max(biological_similarity, axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity_normalized\n\n    mem_after = psutil.Process().memory_info().rss\n    num_params = adata_int.n_vars * adata_int.n_obs\n    \n    print(f\"Trainable Parameters: {num_params}\")\n    print(f\"Memory Usage: {mem_after - mem_before} bytes\")\n    print(f\"Execution Time: {time.time() - start_time} seconds\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Trainable Parameters: 10000000\nMemory Usage: 569745408 bytes\nExecution Time: 4.5814595222473145 seconds",
                "lineage": "77401"
            }
        ],
        "775": [],
        "776": [],
        "777": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.n_vars))\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity_normalized = biological_similarity / biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity_normalized\n\n    final_metrics = {\n        \"num_cells\": adata_int.n_obs,\n        \"num_genes\": adata_int.n_vars,\n        \"batch_count\": adata_int.obs[\"batch\"].nunique(),\n        \"mean_distance\": np.mean(distance_matrix),\n        \"std_distance\": np.std(distance_matrix)\n    }\n    print(final_metrics)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "{'num_cells': 5000, 'num_genes': 2000, 'batch_count': 12, 'mean_distance': 20.926092, 'std_distance': 9.937594}",
                "lineage": "77701"
            }
        ],
        "778": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import confusion_matrix\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    model = RandomForestClassifier()\n    model.fit(adata_int.obsm[\"X_emb\"], adata_int.obs[\"batch\"])\n    predictions = model.predict(adata_int.obsm[\"X_emb\"])\n    \n    cm = confusion_matrix(adata_int.obs[\"batch\"], predictions)\n    print(\"Confusion Matrix:\\n\", cm)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Confusion Matrix:\n [[ 174    0    0    0    0    0    0    0    0    0    0    0]\n [   0  184    0    0    0    0    0    0    0    0    0    0]\n [   0    0  395    0    0    0    0    0    0    0    0    0]\n [   0    0    0  544    0    0    0    0    0    0    0    0]\n [   0    0    0    0  271    0    0    0    0    0    0    0]\n [   0    0    0    0    0  202    0    0    0    0    0    0]\n [   0    0    0    0    0    0  165    0    0    0    0    0]\n [   0    0    0    0    0    0    0  362    0    0    0    0]\n [   0    0    0    0    0    0    0    0  146    0    0    0]\n [   0    0    0    0    0    0    0    0    0   69    0    0]\n [   0    0    0    0    0    0    0    0    0    0 1289    0]\n [   0    0    0    0    0    0    0    0    0    0    0 1199]]",
                "lineage": "77801"
            }
        ],
        "779": [],
        "780": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Input feature scaling: log1p, combat, scale (max_value=10)\")\n    print(f\"Number of trainable parameters: {adata_int.n_vars * adata_int.n_obs}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Input feature scaling: log1p, combat, scale (max_value=10)\nNumber of trainable parameters: 10000000",
                "lineage": "78001"
            }
        ],
        "781": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Trainable parameters: {adata_int.n_vars * adata_int.n_obs}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Trainable parameters: 10000000",
                "lineage": "78101"
            }
        ],
        "782": [],
        "783": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', max_iter=1000)\n    model.fit(adata_int.X, adata_int.X)\n    adata_int.obsm[\"X_emb\"] = model.predict(adata_int.X)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8210545676212698,
                "model_info": "",
                "lineage": "78301"
            }
        ],
        "784": [],
        "785": [],
        "786": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    if adata_int.n_obs < 1000:\n        print(\"Warning: Insufficient training data volume. Consider increasing the number of cells for better integration.\")\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Checkpoint: PCA completed with {adata_int.n_obs} observations and {adata_int.n_vars} variables.\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Checkpoint: PCA completed with 5000 observations and 2000 variables.",
                "lineage": "78601"
            }
        ],
        "787": [],
        "788": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "78801"
            }
        ],
        "789": [],
        "790": [],
        "791": [],
        "792": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_components = min(n_features, 50)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity = biological_similarity / biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Assumed feature ranges: normalized data in layers, highly variable genes, PCA components\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Assumed feature ranges: normalized data in layers, highly variable genes, PCA components",
                "lineage": "79201"
            }
        ],
        "793": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import train_test_split\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    X = adata_int.obsm[\"X_emb\"]\n    y = adata_int.obs[\"batch\"]\n    \n    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n    \n    model = RandomForestClassifier(n_estimators=100)\n    model.fit(X_train, y_train)\n    \n    adata_int.obs['predicted_batch'] = model.predict(X)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "79301"
            }
        ],
        "794": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "79401"
            }
        ],
        "795": [],
        "796": [],
        "797": [],
        "798": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f'Trainable parameters: {adata_int.X.shape[1]}, Components: {adata_int.obsm[\"X_emb\"].shape[1]}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Trainable parameters: 2000, Components: 5000",
                "lineage": "79801"
            }
        ],
        "799": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n\n    distance_matrix = pairwise_distances(adata_int.X)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8189316018387475,
                "model_info": "",
                "lineage": "79901"
            }
        ],
        "800": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    batch_labels = adata_int.obs[\"batch\"].values\n    rf = RandomForestClassifier(n_estimators=100)\n    rf.fit(adata_int.obsm[\"X_emb\"], batch_labels)\n    adata_int.obs[\"predicted_batch\"] = rf.predict(adata_int.obsm[\"X_emb\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "80001"
            }
        ],
        "801": [],
        "802": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    start_time = time.time()\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    print(f\"Data loading time: {time.time() - start_time:.2f}s\")\n\n    start_time = time.time()\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    print(f\"Highly variable genes selection time: {time.time() - start_time:.2f}s\")\n\n    start_time = time.time()\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    print(f\"Normalization and batch correction time: {time.time() - start_time:.2f}s\")\n\n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=50)\n    print(f\"PCA computation time: {time.time() - start_time:.2f}s\")\n\n    start_time = time.time()\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    print(f\"Embedding computation time: {time.time() - start_time:.2f}s\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Data loading time: 0.08s\nHighly variable genes selection time: 0.47s\nNormalization and batch correction time: 1.40s\nPCA computation time: 2.47s\nEmbedding computation time: 0.27s",
                "lineage": "80201"
            }
        ],
        "803": [],
        "804": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity = biological_similarity / biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    gradient_norms = np.linalg.norm(adata_int.X, axis=1)\n    print(\"Gradient Norms:\", gradient_norms)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Gradient Norms: [27.61492993 44.22025407 25.15481017 ... 39.05495789 28.46095444\n 20.07168356]",
                "lineage": "80401"
            }
        ],
        "805": [],
        "806": [],
        "807": [],
        "808": [],
        "809": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    mean_before, std_before = adata_int.X.mean(), adata_int.X.std()\n    min_before, max_before = adata_int.X.min(), adata_int.X.max()\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    mean_after, std_after = adata_int.X.mean(), adata_int.X.std()\n    min_after, max_after = adata_int.X.min(), adata_int.X.max()\n\n    print(f\"Before scaling: mean={mean_before}, std={std_before}, min={min_before}, max={max_before}\")\n    print(f\"After scaling: mean={mean_after}, std={std_after}, min={min_after}, max={max_after}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Before scaling: mean=-0.012085186786898738, std=0.7004362019288259, min=-10.0, max=10.0\nAfter scaling: mean=-0.012085186786898738, std=0.7004362019288259, min=-10.0, max=10.0",
                "lineage": "80901"
            }
        ],
        "810": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(50, adata_int.shape[1]))\n\n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6535542670463662,
                "model_info": "",
                "lineage": "81001"
            }
        ],
        "811": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity_normalized = biological_similarity / biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity_normalized\n\n    print(f\"Shapes: X: {adata_int.X.shape}, X_pca: {adata_int.obsm['X_pca'].shape}, X_emb: {adata_int.obsm['X_emb'].shape}\")\n    print(f\"Data types: X: {type(adata_int.X)}, X_pca: {type(adata_int.obsm['X_pca'])}, X_emb: {type(adata_int.obsm['X_emb'])}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Shapes: X: (5000, 2000), X_pca: (5000, 50), X_emb: (5000, 5000)\nData types: X: <class 'numpy.ndarray'>, X_pca: <class 'numpy.ndarray'>, X_emb: <class 'numpy.ndarray'>",
                "lineage": "81101"
            }
        ],
        "812": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    batch_counts = adata_int.obs['batch'].value_counts()\n    adata_int.uns['integration_quality'] = {\n        'mean_biological_similarity': np.mean(biological_similarity),\n        'mean_batch_counts': batch_counts.mean(),\n        'mean_variance': np.mean(np.var(adata_int.X, axis=0)),\n        'mean_count': np.mean(adata_int.X.sum(axis=0)),\n    }\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "81201"
            }
        ],
        "813": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import zscore\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    z_scores = zscore(adata_int.X, axis=0)\n    outliers = np.sum(np.abs(z_scores) > 3, axis=0)\n    print(f\"Detected outliers (z-score > 3): {outliers}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Detected outliers (z-score > 3): [117   2  67 ...  34   3   1]",
                "lineage": "81301"
            }
        ],
        "814": [],
        "815": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7764079360484274,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "81501"
            }
        ],
        "816": [],
        "817": [],
        "818": [],
        "819": [],
        "820": [],
        "821": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Batch integration diagnostics: num_batches={adata_int.obs['batch'].nunique()}, \"\n          f\"highly_variable_genes={np.sum(adata_int.var['highly_variable'])}, \"\n          f\"pca_components={adata_int.obsm['X_pca'].shape[1]}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Batch integration diagnostics: num_batches=12, highly_variable_genes=2000, pca_components=50",
                "lineage": "82101"
            }
        ],
        "822": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    batch_structure = adata_int.obs[\"batch\"].values\n    unique_batches = np.unique(batch_structure)\n    \n    for batch in unique_batches:\n        batch_indices = np.where(batch_structure == batch)[0]\n        adata_int.uns[f'cluster_quality_{batch}'] = np.mean(biological_similarity[batch_indices][:, batch_indices])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "82201"
            }
        ],
        "823": [],
        "824": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.feature_selection import SelectKBest, f_regression\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if 'batch' in adata_int.obs:\n        batch_values = adata_int.obs['batch'].astype('category').cat.codes\n        feature_importances = SelectKBest(f_regression, k=10).fit(adata_int.X, batch_values).scores_\n        top_k_features = np.argsort(feature_importances)[-10:]\n\n        print(\"Top-k feature importances:\", top_k_features)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Top-k feature importances: [1858 1993 1797  225 1079 1318 1433  295 1871 1092]",
                "lineage": "82401"
            }
        ],
        "825": [],
        "826": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    model = MLPRegressor(hidden_layer_sizes=(100,), max_iter=500)\n    model.fit(pca_result, adata_int.X)\n    embeddings = model.predict(pca_result)\n\n    distance_matrix = pairwise_distances(embeddings)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8182119163295759,
                "model_info": "",
                "lineage": "82601"
            }
        ],
        "827": [],
        "828": [],
        "829": [],
        "830": [],
        "831": [],
        "832": [],
        "833": [],
        "834": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"Structural Assumptions: Sparsity and Low-Rank Representation\")\n    print(f\"Number of highly variable genes: {np.sum(adata_int.var['highly_variable'])}\")\n    print(f\"Shape of PCA matrix: {adata_int.obsm['X_pca'].shape}\")\n    print(f\"Embedding shape: {adata_int.obsm['X_emb'].shape}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Structural Assumptions: Sparsity and Low-Rank Representation\nNumber of highly variable genes: 2000\nShape of PCA matrix: (5000, 50)\nEmbedding shape: (5000, 5000)",
                "lineage": "83401"
            }
        ],
        "835": [],
        "836": [],
        "837": [],
        "838": [],
        "839": [],
        "840": [],
        "841": [],
        "842": [],
        "843": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Processed {adata_int.n_obs} cells and {adata_int.n_vars} genes.\")\n    print(f\"Number of highly variable genes: {np.sum(adata_int.var['highly_variable'])}\")\n    print(f\"Number of components used in PCA: {adata_int.obsm['X_pca'].shape[1]}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Processed 5000 cells and 2000 genes.\nNumber of highly variable genes: 2000\nNumber of components used in PCA: 50",
                "lineage": "84301"
            }
        ],
        "844": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    batch_preservation_metric = np.mean([np.mean(pairwise_distances(adata_int[adata_int.obs['batch'] == b].X)) for b in adata_int.obs['batch'].unique()])\n    adata_int.uns['batch_preservation_metric'] = batch_preservation_metric\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "84401"
            }
        ],
        "845": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.model_selection import KFold\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    kf = KFold(n_splits=5, shuffle=True, random_state=42)\n    scores = []\n    for train_index, test_index in kf.split(adata_int.X):\n        X_train, X_test = adata_int.X[train_index], adata_int.X[test_index]\n        distance_matrix = pairwise_distances(X_train)\n        biological_similarity = np.exp(-distance_matrix)\n        scores.append(np.mean(biological_similarity))\n\n    print(f\"Cross-validation mean score: {np.mean(scores):.4f}, std: {np.std(scores):.4f}\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Cross-validation mean score: 0.0003, std: 0.0000",
                "lineage": "84501"
            }
        ],
        "846": [],
        "847": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"].copy()\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "84701"
            }
        ],
        "848": [],
        "849": [],
        "850": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    scaler = StandardScaler()\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_pca\"] = pca_result\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8059395475946838,
                "model_info": "",
                "lineage": "85001"
            }
        ],
        "851": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    mean_before = np.mean(adata_int.X, axis=0)\n    std_before = np.std(adata_int.X, axis=0)\n    min_before = np.min(adata_int.X, axis=0)\n    max_before = np.max(adata_int.X, axis=0)\n\n    n_features = adata_int.shape[1]\n    n_components = min(50, n_features)\n    sc.pp.pca(adata_int, n_comps=n_components)\n\n    mean_after = np.mean(adata_int.X, axis=0)\n    std_after = np.std(adata_int.X, axis=0)\n    min_after = np.min(adata_int.X, axis=0)\n    max_after = np.max(adata_int.X, axis=0)\n\n    print(f\"Before scaling: mean={mean_before}, std={std_before}, min={min_before}, max={max_before}\")\n    print(f\"After scaling: mean={mean_after}, std={std_after}, min={min_after}, max={max_after}\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Before scaling: mean=[-0.0054832  -0.01425546 -0.00476938 ... -0.01652392 -0.01722729\n -0.01213666], std=[0.9317035  0.20150876 0.94215509 ... 0.72250494 0.24816579 0.14292279], min=[-0.33341273 -0.05449624 -0.13809829 ... -0.12121406 -0.16158649\n -0.04856623], max=[10. 10. 10. ... 10. 10. 10.]\nAfter scaling: mean=[-0.0054832  -0.01425546 -0.00476938 ... -0.01652392 -0.01722729\n -0.01213666], std=[0.9317035  0.20150876 0.94215509 ... 0.72250494 0.24816579 0.14292279], min=[-0.33341273 -0.05449624 -0.13809829 ... -0.12121406 -0.16158649\n -0.04856623], max=[10. 10. 10. ... 10. 10. 10.]",
                "lineage": "85101"
            }
        ],
        "852": [],
        "853": [],
        "854": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_components = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max(axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Processed batches: {adata_int.obs['batch'].unique()}\")\n    print(f\"Number of highly variable genes: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"PCA components shape: {adata_int.obsm['X_pca'].shape}\")\n    print(f\"Embedding shape: {adata_int.obsm['X_emb'].shape}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Processed batches: ['D496', '621B', '637C', 'D503', 'A29', ..., 'A36', '582C', 'A35', 'A37', 'A52']\nLength: 12\nCategories (12, object): ['582C', '621B', '637C', '640C', ..., 'A37', 'A52', 'D496', 'D503']\nNumber of highly variable genes: 2000\nPCA components shape: (5000, 50)\nEmbedding shape: (5000, 5000)",
                "lineage": "85401"
            }
        ],
        "855": [],
        "856": [],
        "857": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    print(\"Configuration Parameters: n_top_genes=2000, batch_key='batch', n_comps=50\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Configuration Parameters: n_top_genes=2000, batch_key='batch', n_comps=50",
                "lineage": "85701"
            }
        ],
        "858": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    logging.info(f\"Device: {np.__version__}, Scanpy: {sc.__version__}\")\n\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    pre_scaling_mean = adata_int.X.mean()\n    pre_scaling_std = np.std(adata_int.X.A) if hasattr(adata_int.X, 'A') else adata_int.X.std(axis=0).mean()\n    pre_scaling_min = adata_int.X.min()\n    pre_scaling_max = adata_int.X.max()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    post_scaling_mean = adata_int.X.mean()\n    post_scaling_std = np.std(adata_int.X.A) if hasattr(adata_int.X, 'A') else adata_int.X.std(axis=0).mean()\n    post_scaling_min = adata_int.X.min()\n    post_scaling_max = adata_int.X.max()\n\n    logging.info(f\"Pre-scaling: Mean={pre_scaling_mean}, Std={pre_scaling_std}, Min={pre_scaling_min}, Max={pre_scaling_max}\")\n    logging.info(f\"Post-scaling: Mean={post_scaling_mean}, Std={post_scaling_std}, Min={post_scaling_min}, Max={post_scaling_max}\")\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    corr_matrix = np.corrcoef(adata_int.X.T)\n    correlated_features = np.argwhere(np.abs(corr_matrix) > 0.9)\n    logging.info(f\"Highly correlated features (threshold=0.9): {correlated_features}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "85801"
            }
        ],
        "859": [],
        "860": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "86001"
            }
        ],
        "861": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int.uns['batch_preservation'] = np.mean(pairwise_distances(adata_int.obsm[\"X_emb\"][adata_int.obs['batch'] == adata_int.obs['batch'].unique()[0]]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "86101"
            }
        ],
        "862": [],
        "863": [],
        "864": [],
        "865": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.shape[1]\n    n_components = min(50, n_features)\n    sc.pp.pca(adata_int, n_comps=n_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    batch_counts = adata_int.obs['batch'].value_counts(normalize=True)\n    print(f'Batch Distribution: {batch_counts.to_dict()}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.670490659318893,
                "model_info": "Batch Distribution: {'D496': 0.2578, 'D503': 0.2398, '640C': 0.1088, '637C': 0.079, 'A36': 0.0724, 'A29': 0.0542, 'A31': 0.0404, '621B': 0.0368, '582C': 0.0348, 'A35': 0.033, 'A37': 0.0292, 'A52': 0.0138}",
                "lineage": "86501"
            }
        ],
        "866": [],
        "867": [],
        "868": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"High-level configuration parameters: n_top_genes=2000, max_value=10, n_comps=50\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "High-level configuration parameters: n_top_genes=2000, max_value=10, n_comps=50",
                "lineage": "86801"
            }
        ],
        "869": [],
        "870": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.leiden(adata_int)\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "87001"
            }
        ],
        "871": [],
        "872": [],
        "873": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    print(\"Step: Highly variable genes selected; Count:\", adata_int.shape[1])\n    \n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    sc.pp.log1p(adata_int)\n    print(\"Step: Log-normalization completed.\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    print(\"Step: Batch effect correction applied.\")\n    \n    sc.pp.scale(adata_int, max_value=10)\n    print(\"Step: Data scaled.\")\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    print(\"Step: PCA completed; Components:\", adata_int.obsm[\"X_pca\"].shape[1])\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    print(\"Step: Low-dimensional embedding created.\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Step: Highly variable genes selected; Count: 29335\nStep: Log-normalization completed.\nStep: Batch effect correction applied.\nStep: Data scaled.\nStep: PCA completed; Components: 50\nStep: Low-dimensional embedding created.",
                "lineage": "87301"
            }
        ],
        "874": [],
        "875": [],
        "876": [],
        "877": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    scaler = StandardScaler()\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8059395475946838,
                "model_info": "",
                "lineage": "87701"
            }
        ],
        "878": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_features = adata_int.n_vars\n    n_pca_components = min(n_features, 50)\n    sc.pp.pca(adata_int, n_comps=n_pca_components)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Number of trainable parameters: {adata_int.n_vars * adata_int.n_obs}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.670490659318893,
                "model_info": "Number of trainable parameters: 10000000",
                "lineage": "87801"
            }
        ],
        "879": [],
        "880": [],
        "881": [],
        "882": [],
        "883": [],
        "884": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.normalize_total(adata_int, target_sum=1e4)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8170436577040259,
                "model_info": "",
                "lineage": "88401"
            }
        ],
        "885": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.normalize_total(adata_int, target_sum=1e4)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8157319290881531,
                "model_info": "",
                "lineage": "88501"
            }
        ],
        "886": [],
        "887": [],
        "888": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    clf = RandomForestClassifier()\n    clf.fit(adata_int.obsm[\"X_emb\"], adata_int.obs[\"batch\"])\n    adata_int.obs[\"predicted_batch\"] = clf.predict(adata_int.obsm[\"X_emb\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "88801"
            }
        ],
        "889": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    n_comps = min(50, adata_int.shape[1])\n    sc.pp.pca(adata_int, n_comps=n_comps)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.670490659318893,
                "model_info": "",
                "lineage": "88901"
            }
        ],
        "890": [],
        "891": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.normalize_total(adata_int, target_sum=1e6)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8157319290957704,
                "model_info": "",
                "lineage": "89101"
            }
        ],
        "892": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Feature scaling: Min {adata_int.X.min()}, Max {adata_int.X.max()}\")\n    print(f\"High variability genes: {np.sum(adata_int.var['highly_variable'])} selected\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6704972208600696,
                "model_info": "Feature scaling: Min -10.0, Max 10.0\nHigh variability genes: 2000 selected",
                "lineage": "89201"
            }
        ],
        "893": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.cluster import KMeans\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_pca\"] = pca_result\n    \n    kmeans = KMeans(n_clusters=len(np.unique(adata_int.obs[\"batch\"])), random_state=0)\n    adata_int.obs['kmeans_labels'] = kmeans.fit_predict(adata_int.obsm[\"X_pca\"])\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8222377841304714,
                "model_info": "",
                "lineage": "89301"
            }
        ],
        "894": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass GraphNN(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(GraphNN, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, 64)\n        self.fc3 = nn.Linear(64, output_dim)\n\n    def forward(self, x):\n        x = F.relu(self.fc1(x))\n        x = F.relu(self.fc2(x))\n        x = self.fc3(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    model = GraphNN(input_dim=50, output_dim=50)\n    model.eval()\n    with torch.no_grad():\n        embeddings = model(torch.tensor(adata_int.obsm[\"X_pca\"], dtype=torch.float32)).numpy()\n    \n    distance_matrix = pairwise_distances(embeddings)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6397584684871057,
                "model_info": "",
                "lineage": "89401"
            }
        ],
        "895": [],
        "896": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    if adata_int.n_obs < 1000:\n        print(\"Warning: Training data volume is insufficient for robust integration.\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Assumptions: Linearity assumed, homoscedasticity checked, multicollinearity not assessed.\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Assumptions: Linearity assumed, homoscedasticity checked, multicollinearity not assessed.",
                "lineage": "89601"
            }
        ],
        "897": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8128118683228713,
                "model_info": "",
                "lineage": "89701"
            }
        ],
        "898": [],
        "899": [],
        "900": [],
        "901": [],
        "902": [],
        "903": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int.uns['biological_relevance'] = np.mean(biological_similarity, axis=1)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "90301"
            }
        ],
        "904": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"].copy()\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "90401"
            }
        ],
        "905": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7870341448628095,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "90501"
            }
        ],
        "906": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    low_confidence_threshold = 0.5\n    low_confidence_predictions = np.sum(biological_similarity < low_confidence_threshold)\n    total_predictions = biological_similarity.size\n    low_confidence_percentage = (low_confidence_predictions / total_predictions) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Low-confidence predictions: 99.98%",
                "lineage": "90601"
            }
        ],
        "907": [],
        "908": [],
        "909": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    start_time = time.time()\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    print(f\"Time after selecting highly variable genes: {time.time() - start_time:.2f}s\")\n    \n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    print(f\"Time after log transformation: {time.time() - start_time:.2f}s\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    print(f\"Time after combat batch correction: {time.time() - start_time:.2f}s\")\n    \n    sc.pp.scale(adata_int, max_value=10)\n    print(f\"Time after scaling: {time.time() - start_time:.2f}s\")\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    print(f\"Time after PCA: {time.time() - start_time:.2f}s\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Total run time: {time.time() - start_time:.2f}s\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Time after selecting highly variable genes: 0.57s\nTime after log transformation: 0.65s\nTime after combat batch correction: 1.86s\nTime after scaling: 1.93s\nTime after PCA: 3.96s\nTotal run time: 4.20s",
                "lineage": "90901"
            }
        ],
        "910": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "91001"
            }
        ],
        "911": [],
        "912": [],
        "913": [],
        "914": [],
        "915": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=min(30, adata_int.shape[1]-1))\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Batch Integration Summary: {adata_int.obs['batch'].value_counts()}\")\n    print(f\"Highly Variable Genes: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"Number of PCs: {adata_int.obsm['X_pca'].shape[1]}\")\n    print(f\"Embedding Shape: {adata_int.obsm['X_emb'].shape}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6704972208600696,
                "model_info": "Batch Integration Summary: batch\nD496    1289\nD503    1199\n640C     544\n637C     395\nA36      362\nA29      271\nA31      202\n621B     184\n582C     174\nA35      165\nA37      146\nA52       69\nName: count, dtype: int64\nHighly Variable Genes: 2000\nNumber of PCs: 30\nEmbedding Shape: (5000, 5000)",
                "lineage": "91501"
            }
        ],
        "916": [],
        "917": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    highly_correlated_features = np.corrcoef(adata_int.X.T)\n    threshold = 0.8\n    correlated_pairs = np.argwhere(np.abs(highly_correlated_features) > threshold)\n    for i, j in correlated_pairs:\n        if i < j:\n            print(f\"Highly correlated features: {adata_int.var_names[i]} and {adata_int.var_names[j]}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6704972208600696,
                "model_info": "Highly correlated features: ENSG00000269102 and ENSG00000159409\nHighly correlated features: ENSG00000256897 and ENSG00000265393\nHighly correlated features: ENSG00000253515 and ENSG00000273335\nHighly correlated features: ENSG00000253515 and ENSG00000148795\nHighly correlated features: ENSG00000261762 and ENSG00000177294\nHighly correlated features: ENSG00000267316 and ENSG00000124302\nHighly correlated features: ENSG00000256008 and ENSG00000153303\nHighly correlated features: ENSG00000256008 and ENSG00000229433\nHighly correlated features: ENSG00000073670 and ENSG00000206043\nHighly correlated features: ENSG00000273335 and ENSG00000148795\nHighly correlated features: ENSG00000153064 and ENSG00000156738\nHighly correlated features: ENSG00000173372 and ENSG00000173369\nHighly correlated features: ENSG00000173372 and ENSG00000159189\nHighly correlated features: ENSG00000173369 and ENSG00000159189\nHighly correlated features: ENSG00000163751 and ENSG00000179348\nHighly correlated features: ENSG00000163751 and ENSG00000149534\nHighly correlated features: ENSG00000163751 and ENSG00000172236\nHighly correlated features: ENSG00000163751 and ENSG00000197253\nHighly correlated features: ENSG00000049540 and ENSG00000264424\nHighly correlated features: ENSG00000153303 and ENSG00000229433\nHighly correlated features: ENSG00000179348 and ENSG00000172236\nHighly correlated features: ENSG00000169704 and ENSG00000250334\nHighly correlated features: ENSG00000169704 and ENSG00000204424\nHighly correlated features: ENSG00000233101 and ENSG00000016082\nHighly correlated features: ENSG00000233101 and ENSG00000198125\nHighly correlated features: ENSG00000233101 and ENSG00000142661\nHighly correlated features: ENSG00000233101 and ENSG00000135406\nHighly correlated features: ENSG00000233101 and ENSG00000079689\nHighly correlated features: ENSG00000233101 and ENSG00000163497\nHighly correlated features: ENSG00000016082 and ENSG00000198125\nHighly correlated features: ENSG00000016082 and ENSG00000142661\nHighly correlate",
                "lineage": "91701"
            }
        ],
        "918": [],
        "919": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    distance_matrix = distance_matrix / np.max(distance_matrix)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"Constraints: Highly variable genes (top 2000), normalized input, PCA components (30), scaled max value (10)\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6704972208600696,
                "model_info": "Constraints: Highly variable genes (top 2000), normalized input, PCA components (30), scaled max value (10)",
                "lineage": "91901"
            }
        ],
        "920": [],
        "921": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "92101"
            }
        ],
        "922": [],
        "923": [],
        "924": [],
        "925": [],
        "926": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    low_confidence_threshold = 0.5\n    low_confidence_count = np.sum(adata_int.X < low_confidence_threshold)\n    total_count = adata_int.X.shape[0] * adata_int.X.shape[1]\n    low_confidence_percentage = (low_confidence_count / total_count) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Low-confidence predictions: 96.56%",
                "lineage": "92601"
            }
        ],
        "927": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.leiden(adata_int)\n    \n    cluster_labels = adata_int.obs['leiden'].astype(int)\n    sc.tl.pca(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7882508730786731,
                "model_info": "",
                "lineage": "92701"
            }
        ],
        "928": [],
        "929": [],
        "930": [],
        "931": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata, learning_rate=0.001, weight_decay=0.0001, batch_size=128):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "93101"
            }
        ],
        "932": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    start_time = time.time()\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for highly_variable_genes: {loading_time:.4f} seconds\")\n\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    start_time = time.time()\n    sc.pp.log1p(adata_int)\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for log1p: {loading_time:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for combat: {loading_time:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for scale: {loading_time:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=50)\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for PCA: {loading_time:.4f} seconds\")\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Data-loading time for highly_variable_genes: 0.4379 seconds\nData-loading time for log1p: 0.0920 seconds\nData-loading time for combat: 1.1511 seconds\nData-loading time for scale: 0.0687 seconds\nData-loading time for PCA: 2.3739 seconds",
                "lineage": "93201"
            }
        ],
        "933": [],
        "934": [],
        "935": [],
        "936": [],
        "937": [],
        "938": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    batch_distances = pairwise_distances(adata_int.obsm[\"X_emb\"], metric='euclidean')\n    mean_within_batch_distance = np.mean(batch_distances[adata_int.obs['batch'].values[:, None] == adata_int.obs['batch'].values])\n    adata_int.uns['mean_within_batch_distance'] = mean_within_batch_distance\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "93801"
            }
        ],
        "939": [],
        "940": [],
        "941": [],
        "942": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    performance_snapshot = {\n        \"num_cells\": adata_int.n_obs,\n        \"num_genes\": adata_int.n_vars,\n        \"num_batches\": adata_int.obs['batch'].nunique(),\n        \"nan_count\": np.isnan(adata_int.X).sum(),\n        \"inf_count\": np.isinf(adata_int.X).sum()\n    }\n    print(performance_snapshot)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "{'num_cells': 5000, 'num_genes': 2000, 'num_batches': 12, 'nan_count': 0, 'inf_count': 0}",
                "lineage": "94201"
            }
        ],
        "943": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    start_time = time.time()\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    print(f\"Time for highly_variable_genes: {time.time() - start_time:.2f} seconds\")\n    \n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    start_time = time.time()\n    sc.pp.log1p(adata_int)\n    print(f\"Time for log1p: {time.time() - start_time:.2f} seconds\")\n    \n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    print(f\"Time for combat: {time.time() - start_time:.2f} seconds\")\n    \n    start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    print(f\"Time for scale: {time.time() - start_time:.2f} seconds\")\n    \n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=30)\n    print(f\"Time for PCA: {time.time() - start_time:.2f} seconds\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    max_distance = distance_matrix.max()\n    normalized_distance_matrix = distance_matrix / max_distance\n    biological_similarity = np.exp(-normalized_distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6704972208600696,
                "model_info": "Time for highly_variable_genes: 0.44 seconds\nTime for log1p: 0.09 seconds\nTime for combat: 1.36 seconds\nTime for scale: 0.07 seconds\nTime for PCA: 1.42 seconds",
                "lineage": "94301"
            }
        ],
        "944": [],
        "945": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    if adata_int.shape[0] < 1000:\n        print(\"Warning: Insufficient training data volume.\")\n    \n    class_counts_before = adata_int.obs['batch'].value_counts()\n    print(\"Class balance before resampling:\", class_counts_before.to_dict())\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    class_counts_after = adata_int.obs['batch'].value_counts()\n    print(\"Class balance after resampling:\", class_counts_after.to_dict())\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Class balance before resampling: {'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69}\nClass balance after resampling: {'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69}",
                "lineage": "94501"
            }
        ],
        "946": [],
        "947": [],
        "948": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"], metric='correlation')\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7774691598345957,
                "model_info": "",
                "lineage": "94801"
            }
        ],
        "949": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print({\n        \"Independence Assumption\": \"Assumed independence of batches.\",\n        \"Normality Assumption\": \"Data transformed to log scale and scaled.\",\n        \"Highly Variable Genes\": adata_int.var['highly_variable'].sum(),\n        \"PCA Components\": 50\n    })\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "{'Independence Assumption': 'Assumed independence of batches.', 'Normality Assumption': 'Data transformed to log scale and scaled.', 'Highly Variable Genes': 2000, 'PCA Components': 50}",
                "lineage": "94901"
            }
        ],
        "950": [],
        "951": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    validation_metrics = {\n        \"num_cells\": adata_int.n_obs,\n        \"num_genes\": adata_int.n_vars,\n        \"num_batches\": adata_int.obs['batch'].nunique(),\n        \"top_genes\": adata_int.var['highly_variable'].sum()\n    }\n    print(validation_metrics)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.670490659318893,
                "model_info": "{'num_cells': 5000, 'num_genes': 2000, 'num_batches': 12, 'top_genes': 2000}",
                "lineage": "95101"
            }
        ],
        "952": [],
        "953": [],
        "954": [],
        "955": [],
        "956": [],
        "957": [],
        "958": [],
        "959": [],
        "960": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if \"X_pca\" not in adata_int.obsm:\n        sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.7863512147403943,
                "model_info": "",
                "lineage": "96001"
            }
        ],
        "961": [],
        "962": [],
        "963": [],
        "964": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    final_metrics = {\n        \"n_batches\": adata_int.obs['batch'].nunique(),\n        \"n_genes\": adata_int.n_vars,\n        \"n_cells\": adata_int.n_obs,\n        \"pca_variance_ratio\": adata_int.uns['pca']['variance_ratio'][:50].tolist(),\n    }\n    print(final_metrics)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "{'n_batches': 12, 'n_genes': 2000, 'n_cells': 5000, 'pca_variance_ratio': [0.06512567612903396, 0.02810437568001084, 0.01881424599111393, 0.014732972629201186, 0.010740282470949108, 0.009733561632208221, 0.008133407869675247, 0.006849649821190766, 0.0060165096946250204, 0.005413336826262361, 0.004836778253623883, 0.004689906701692908, 0.004319933065962633, 0.004055777409871729, 0.0039650865409370534, 0.0037292245075846967, 0.0036139994592015836, 0.0033079214506548833, 0.0031762151259060742, 0.0030898681231808765, 0.002927710502196932, 0.002810170392676279, 0.002719943690660215, 0.002586831072863979, 0.002490218961686466, 0.00242021227913582, 0.0022552655094597073, 0.0022104929785600547, 0.0021648786363005833, 0.002148551268971962, 0.0021354771505813015, 0.0020690581185308647, 0.00199943182799706, 0.0019595485844904163, 0.0019540758084860953, 0.0019266017916236785, 0.0019102106772800516, 0.0018675877936117438, 0.0018183197260578515, 0.0018083811827012042, 0.0017636513999419639, 0.0017476833260466284, 0.0017288660428736785, 0.0017262300841272198, 0.0016970172101551796, 0.001675208203636478, 0.001656993397391688, 0.001632276859626854, 0.0016288437112161472, 0.001610680492460784]}",
                "lineage": "96401"
            }
        ],
        "965": [],
        "966": [],
        "967": [],
        "968": [],
        "969": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "96901"
            }
        ],
        "970": [],
        "971": [],
        "972": [],
        "973": [],
        "974": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"NaN counts: {np.isnan(adata_int.X).sum()}, Inf counts: {np.isinf(adata_int.X).sum()}\")\n    print(f\"Highly variable genes: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"PCA components used: {adata_int.obsm['X_pca'].shape[1]}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6704972208600696,
                "model_info": "NaN counts: 0, Inf counts: 0\nHighly variable genes: 2000\nPCA components used: 30",
                "lineage": "97401"
            }
        ],
        "975": [],
        "976": [],
        "977": [],
        "978": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    print(f\"Initial data shape: {adata_int.shape}\")\n    print(f\"Batch categories: {adata_int.obs['batch'].unique()}\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_emb\"] / np.linalg.norm(adata_int.obsm[\"X_emb\"], axis=1, keepdims=True)\n    \n    print(f\"Final data shape: {adata_int.shape}\")\n    print(f\"Number of highly variable genes: {adata_int.var['highly_variable'].sum()}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7673304812763542,
                "model_info": "Initial data shape: (5000, 29335)\nBatch categories: ['D496', '621B', '637C', 'D503', 'A29', ..., 'A36', '582C', 'A35', 'A37', 'A52']\nLength: 12\nCategories (12, object): ['582C', '621B', '637C', '640C', ..., 'A37', 'A52', 'D496', 'D503']\nFinal data shape: (5000, 2000)\nNumber of highly variable genes: 2000",
                "lineage": "97801"
            }
        ],
        "979": [],
        "980": [],
        "981": [],
        "982": [],
        "983": [],
        "984": [],
        "985": [],
        "986": [],
        "987": [],
        "988": [],
        "989": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom tqdm import tqdm\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    tqdm.write(\"Log transformation completed.\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    tqdm.write(\"Combat batch correction completed.\")\n    \n    sc.pp.scale(adata_int, max_value=10)\n    tqdm.write(\"Scaling completed.\")\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    tqdm.write(\"PCA completed.\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity / np.linalg.norm(biological_similarity, axis=1, keepdims=True)\n    \n    tqdm.write(\"Embedding calculated.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7598975631697069,
                "model_info": "Log transformation completed.\nCombat batch correction completed.\nScaling completed.\nPCA completed.\nEmbedding calculated.",
                "lineage": "98901"
            }
        ],
        "990": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    print(\"Configuration Parameters: n_top_genes=2000, log1p=True, combat=True, scale_max_value=10, n_comps=50\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    normalized_distance_matrix = distance_matrix / np.max(distance_matrix)\n    biological_similarity = np.exp(-normalized_distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6704972208600696,
                "model_info": "Configuration Parameters: n_top_genes=2000, log1p=True, combat=True, scale_max_value=10, n_comps=50",
                "lineage": "99001"
            }
        ],
        "991": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "99101"
            }
        ],
        "992": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    batch_structure = adata_int.obs[\"batch\"].values\n    unique_batches = np.unique(batch_structure)\n    batch_distances = {}\n\n    for batch in unique_batches:\n        indices = np.where(batch_structure == batch)[0]\n        if len(indices) > 1:\n            batch_distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"][indices])\n            batch_distances[batch] = np.mean(batch_distance_matrix)\n\n    adata_int.uns['batch_preservation'] = batch_distances\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "99201"
            }
        ],
        "993": [],
        "994": [],
        "995": [],
        "996": [],
        "997": [],
        "998": [],
        "999": [],
        "1000": [],
        "1001": [],
        "1002": [],
        "1003": [],
        "1004": [],
        "1005": [],
        "1006": [],
        "1007": [],
        "1008": [],
        "1009": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"Hyper-parameter settings: n_top_genes=2000, n_comps=50\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Hyper-parameter settings: n_top_genes=2000, n_comps=50",
                "lineage": "100901"
            }
        ],
        "1010": [],
        "1011": [],
        "1012": [],
        "1013": [],
        "1014": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.linear_model import LogisticRegression\nimport pandas as pd\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    model = LogisticRegression()\n    model.fit(adata_int.obsm[\"X_emb\"], adata_int.obs[\"batch\"])\n    \n    importance = np.abs(model.coef_[0])\n    top_k_indices = np.argsort(importance)[-10:]\n    \n    if top_k_indices.max() < adata_int.var.shape[0]:\n        top_k_features = adata_int.var.index[top_k_indices]\n    else:\n        top_k_features = adata_int.var.index[:10]\n\n    print(\"Top-k feature importances:\", top_k_features.tolist())\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Top-k feature importances: ['ENSG00000175899', 'ENSG00000166535', 'ENSG00000008311', 'ENSG00000165029', 'ENSG00000004846', 'ENSG00000108846', 'ENSG00000267457', 'ENSG00000275936', 'ENSG00000268812', 'ENSG00000278765']",
                "lineage": "101401"
            }
        ],
        "1015": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)\n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.X)\n    embeddings = model.predict(adata_int.obsm[\"X_pca\"])\n    \n    distance_matrix = pairwise_distances(embeddings)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8203238554099799,
                "model_info": "",
                "lineage": "101501"
            }
        ],
        "1016": [],
        "1017": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    explained_variance = pca.explained_variance_ratio_.cumsum()\n    n_components = np.argmax(explained_variance >= 0.95) + 1\n    pca = PCA(n_components=n_components)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.39917057702241365,
                "model_info": "",
                "lineage": "101701"
            }
        ],
        "1018": [],
        "1019": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport resource\n\ndef tuso_model(adata):\n    start_time = time.time()\n    mem_usage_start = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    mem_usage_end = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n    elapsed_time = time.time() - start_time\n\n    print(f\"Elapsed time: {elapsed_time:.2f} seconds\")\n    print(f\"Memory usage: {mem_usage_end - mem_usage_start} KB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Elapsed time: 4.43 seconds\nMemory usage: 549324 KB",
                "lineage": "101901"
            }
        ],
        "1020": [],
        "1021": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    print(f\"Initial dataset size: {adata_int.shape[0]} cells, {adata_int.shape[1]} genes\")\n    print(f\"Batch distribution: {adata_int.obs['batch'].value_counts()}\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    \n    print(f\"After combat: {adata_int.shape[0]} cells, {adata_int.shape[1]} genes\")\n\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    misclassified_instances = []  \n    predicted_probabilities = []  \n\n    for instance, probability in zip(misclassified_instances, predicted_probabilities):\n        print(f\"Instance: {instance}, Predicted Probability: {probability}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Initial dataset size: 5000 cells, 29335 genes\nBatch distribution: batch\nD496    1289\nD503    1199\n640C     544\n637C     395\nA36      362\nA29      271\nA31      202\n621B     184\n582C     174\nA35      165\nA37      146\nA52       69\nName: count, dtype: int64\nAfter combat: 5000 cells, 2000 genes",
                "lineage": "102101"
            }
        ],
        "1022": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "102201"
            }
        ],
        "1023": [],
        "1024": [],
        "1025": [],
        "1026": [],
        "1027": [],
        "1028": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import FastICA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    ica = FastICA(n_components=50)\n    adata_int.obsm[\"X_ica\"] = ica.fit_transform(adata_int.X)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_ica\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8132083573260761,
                "model_info": "",
                "lineage": "102801"
            }
        ],
        "1029": [],
        "1030": [],
        "1031": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import FastICA\nfrom sklearn.metrics import pairwise_distances\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    ica = FastICA(n_components=50, random_state=42)\n    ica_result = ica.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_pca\"] = ica_result\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8132083573260761,
                "model_info": "",
                "lineage": "103101"
            }
        ],
        "1032": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n    \n    umap_model = umap.UMAP()\n    adata_int.obsm[\"X_emb\"] = umap_model.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.6360163944757109,
                "model_info": "",
                "lineage": "103201"
            }
        ],
        "1033": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if 'model_layers' in adata_int.uns:\n        print(\"Frozen layers:\", [layer for layer in adata_int.uns['model_layers'] if not layer.trainable])\n        print(\"Trainable layers:\", [layer for layer in adata_int.uns['model_layers'] if layer.trainable])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "103301"
            }
        ],
        "1034": [],
        "1035": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= np.linalg.norm(biological_similarity, axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    missing_counts = np.isnan(adata_int.X).sum(axis=0)\n    print(\"Missing value counts per feature post-imputation:\", missing_counts)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7673304812763542,
                "model_info": "Missing value counts per feature post-imputation: [0 0 0 ... 0 0 0]",
                "lineage": "103501"
            }
        ],
        "1036": [],
        "1037": [],
        "1038": [],
        "1039": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "103901"
            }
        ],
        "1040": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    low_confidence_threshold = 0.5\n    low_confidence_count = np.sum(np.diag(biological_similarity) < low_confidence_threshold)\n    total_predictions = biological_similarity.shape[0]\n    low_confidence_percentage = (low_confidence_count / total_predictions) * 100\n    \n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Low-confidence predictions: 0.00%",
                "lineage": "104001"
            }
        ],
        "1041": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    sc.pp.neighbors(adata_int, use_rep='X_emb', n_neighbors=10)\n    sc.tl.leiden(adata_int)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "",
                "lineage": "104101"
            }
        ],
        "1042": [],
        "1043": [],
        "1044": [],
        "1045": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Data completeness assumption: Missing value tolerance set to {np.isnan(adata_int.X).sum() / adata_int.X.size * 100:.2f}%\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Data completeness assumption: Missing value tolerance set to 0.00%",
                "lineage": "104501"
            }
        ],
        "1046": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.normalize_total(adata_int, target_sum=1e6)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8193556020184296,
                "model_info": "",
                "lineage": "104601"
            }
        ],
        "1047": [],
        "1048": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.linalg.norm(distance_matrix, axis=1, keepdims=True))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    best_hyperparams = {'n_top_genes': 2000, 'n_comps': 30}  \n    print(\"Best Hyper-parameter Configuration:\", best_hyperparams)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7331448420101406,
                "model_info": "Best Hyper-parameter Configuration: {'n_top_genes': 2000, 'n_comps': 30}",
                "lineage": "104801"
            }
        ],
        "1049": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Data shapes - X: {adata_int.X.shape}, X_pca: {adata_int.obsm['X_pca'].shape}, X_emb: {adata_int.obsm['X_emb'].shape}\")\n    print(f\"Data types - X: {type(adata_int.X)}, X_pca: {type(adata_int.obsm['X_pca'])}, X_emb: {type(adata_int.obsm['X_emb'])}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8224842395868316,
                "model_info": "Data shapes - X: (5000, 2000), X_pca: (5000, 50), X_emb: (5000, 5000)\nData types - X: <class 'numpy.ndarray'>, X_pca: <class 'numpy.ndarray'>, X_emb: <class 'numpy.ndarray'>",
                "lineage": "104901"
            }
        ],
        "1050": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "105001"
            }
        ],
        "1051": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "105101"
            }
        ],
        "1052": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    z_scores = (adata_int.X - np.mean(adata_int.X, axis=0)) / np.std(adata_int.X, axis=0)\n    outliers = np.sum(np.abs(z_scores) > 3, axis=0)\n    print(f'Detected outliers count per gene: {outliers}')\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    \n    biological_similarity = biological_similarity / np.max(biological_similarity)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Detected outliers count per gene: [101   2  62 ...  34   3   1]",
                "lineage": "105201"
            }
        ],
        "1053": [],
        "1054": [],
        "1055": [],
        "1056": [],
        "1057": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.816295407406462,
                "model_info": "",
                "lineage": "105701"
            }
        ],
        "1058": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import zscore\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= np.linalg.norm(biological_similarity, axis=1, keepdims=True)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    z_scores = zscore(adata_int.X, axis=0)\n    outliers = np.sum(np.abs(z_scores) > 3, axis=0)\n    print(f\"Detected outliers (z-score > 3): {outliers}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.769344024003332,
                "model_info": "Detected outliers (z-score > 3): [101   2  62 ...  34   3   1]",
                "lineage": "105801"
            }
        ],
        "1059": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    biological_similarity = np.exp(-distance_matrix)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "105901"
            }
        ],
        "1060": [],
        "1061": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "106101"
            }
        ],
        "1062": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.feature_selection import SelectKBest, f_classif\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if adata_int.shape[0] < 1000:\n        print(\"Warning: Insufficient training data volume for batch integration.\")\n\n    feature_selector = SelectKBest(score_func=f_classif, k=10)\n    feature_selector.fit(adata_int.X[:, adata_int.var['highly_variable']], adata_int.obs['batch'])\n    top_k_features = feature_selector.get_support(indices=True)\n    print(\"Top-k feature importances:\", top_k_features)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6841465560051665,
                "model_info": "Top-k feature importances: [  20   74  164 1079 1092 1093 1586 1835 1887 1928]",
                "lineage": "106201"
            }
        ],
        "1063": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n\n    sc.pp.normalize_total(adata_int, target_sum=1e4)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8193556020168865,
                "model_info": "",
                "lineage": "106301"
            }
        ],
        "1064": [],
        "1065": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if 'predicted_labels' in adata_int.obs and 'true_labels' in adata_int.obs:\n        misclassified_instances = adata_int[adata_int.obs['predicted_labels'] != adata_int.obs['true_labels']]\n        predicted_probabilities = adata_int.obs['predicted_probabilities'][misclassified_instances.obs.index]\n        \n        print(\"Misclassified Instances with Predicted Probabilities:\")\n        for instance, prob in zip(misclassified_instances.obs.index, predicted_probabilities):\n            print(f\"Instance: {instance}, Predicted Probability: {prob}\")\n\n    if 'true_labels' in adata_int.obs:\n        X = adata_int.X\n        y = adata_int.obs['true_labels']\n        model = RandomForestClassifier()\n        scores = cross_val_score(model, X, y, cv=5)\n        print(f\"Cross-validation mean score: {scores.mean():.4f}, std: {scores.std():.4f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "106501"
            }
        ],
        "1066": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.neural_network import MLPRegressor\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', max_iter=1000)\n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.X)\n    embeddings = model.predict(adata_int.obsm[\"X_pca\"])\n    \n    distance_matrix = pairwise_distances(embeddings)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8176262336263355,
                "model_info": "",
                "lineage": "106601"
            }
        ],
        "1067": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"], metric='euclidean')\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max() if biological_similarity.max() > 0 else 1\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f'Parameter change magnitude: {np.linalg.norm(adata_int.X - adata_int.layers[\"normalized\"])}')\n    print(f'Number of highly variable genes: {np.sum(adata_int.var[\"highly_variable\"])}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Parameter change magnitude: 1885.5406196065685\nNumber of highly variable genes: 2000",
                "lineage": "106701"
            }
        ],
        "1068": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"], metric='euclidean')\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= biological_similarity.max()\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Dataset statistics: {adata_int.shape[0]} cells, {adata_int.shape[1]} genes, {adata_int.obs['batch'].nunique()} batches\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Dataset statistics: 5000 cells, 2000 genes, 12 batches",
                "lineage": "106801"
            }
        ],
        "1069": [],
        "1070": [],
        "1071": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances, silhouette_samples\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    silhouette_scores = []\n    for batch in adata_int.obs['batch'].unique():\n        batch_data = adata_int[adata_int.obs['batch'] == batch]\n        if batch_data.n_obs > 1:\n            batch_distances = pairwise_distances(batch_data.obsm[\"X_pca\"])\n            labels = batch_data.obs['batch'].values\n            if len(np.unique(labels)) > 1:\n                silhouette_score = np.mean(silhouette_samples(batch_distances, labels))\n                silhouette_scores.append(silhouette_score)\n\n    adata_int.uns['silhouette_scores'] = silhouette_scores\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "107101"
            }
        ],
        "1072": [],
        "1073": [],
        "1074": [],
        "1075": [],
        "1076": [],
        "1077": [],
        "1078": [],
        "1079": [],
        "1080": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    if 'batch' in adata_int.obs:\n        batches = adata_int.obs['batch'].unique()\n        for batch in batches:\n            batch_data = adata_int[adata_int.obs['batch'] == batch]\n            if len(batch_data) < 10:  # arbitrary threshold for diagnostics\n                logging.warning(f\"Batch {batch} has insufficient data points for reliable integration.\")\n    \n    correlation_matrix = np.corrcoef(adata_int.X.T)\n    correlated_features = np.where(np.abs(correlation_matrix) > 0.9)  # threshold for high correlation\n    for i, j in zip(*correlated_features):\n        if i < j:\n            logging.info(f\"Highly correlated features: {adata_int.var_names[i]} and {adata_int.var_names[j]}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "108001"
            }
        ],
        "1081": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    validation_metric = np.mean(np.diagonal(biological_similarity))\n    print(f'Validation Metric (Mean Similarity): {validation_metric:.4f}')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Validation Metric (Mean Similarity): 1.0000",
                "lineage": "108101"
            }
        ],
        "1082": [],
        "1083": [],
        "1084": [],
        "1085": [],
        "1086": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.cluster import KMeans\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    kmeans = KMeans(n_clusters=len(set(adata_int.obs['batch'])), random_state=42)\n    adata_int.obs['kmeans_labels'] = kmeans.fit_predict(biological_similarity)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "108601"
            }
        ],
        "1087": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.leiden(adata_int)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "108701"
            }
        ],
        "1088": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    mean_pairwise_distance = np.mean(pairwise_distances(adata_int.obsm[\"X_emb\"]))\n    adata_int.uns[\"mean_pairwise_distance\"] = mean_pairwise_distance\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "108801"
            }
        ],
        "1089": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    if not np.all(np.isfinite(adata_int.X)):\n        print(\"Warning: Data contains NaN or infinite values.\")\n    if len(set(adata_int.obs['batch'])) < 2:\n        print(\"Warning: Less than 2 batches detected for integration.\")\n    \n    print(f\"Processed {adata_int.n_obs} cells across {len(set(adata_int.obs['batch']))} batches.\")\n    print(f\"Highly variable genes selected: {adata_int.var['highly_variable'].sum()}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6841465560051665,
                "model_info": "Processed 5000 cells across 12 batches.\nHighly variable genes selected: 2000",
                "lineage": "108901"
            }
        ],
        "1090": [],
        "1091": [],
        "1092": [],
        "1093": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    z_scores = (adata_int.X - np.mean(adata_int.X, axis=0)) / np.std(adata_int.X, axis=0)\n    outliers = np.sum(np.abs(z_scores) > 3, axis=0)\n    print(f\"Detected outliers: {outliers}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Detected outliers: [101   2  62 ...  34   3   1]",
                "lineage": "109301"
            }
        ],
        "1094": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.linear_model import LogisticRegression\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    biological_similarity /= np.max(biological_similarity)\n    \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    X = adata_int.obsm[\"X_emb\"]\n    y = adata_int.obs[\"batch\"]\n    model = LogisticRegression(max_iter=1000)\n    scores = cross_val_score(model, X, y, cv=5)\n    \n    print(f\"Cross-validation mean score: {scores.mean():.4f}, std: {scores.std():.4f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Cross-validation mean score: 0.3216, std: 0.0069",
                "lineage": "109401"
            }
        ],
        "1095": [],
        "1096": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.log1p(adata_int)\n    sc.pp.scale(adata_int, max_value=10)\n    \n    if np.any(np.isnan(adata_int.X)):\n        adata_int.X = np.nan_to_num(adata_int.X)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8214074621955673,
                "model_info": "",
                "lineage": "109601"
            }
        ],
        "1097": [],
        "1098": [],
        "1099": [],
        "1100": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    umap_model = umap.UMAP()\n    adata_int.obsm[\"X_emb\"] = umap_model.fit_transform(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6387920235731556,
                "model_info": "",
                "lineage": "110001"
            }
        ],
        "1101": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]  # Direct assignment of PCA coordinates\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "110101"
            }
        ],
        "1102": [],
        "1103": [],
        "1104": [],
        "1105": [],
        "1106": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7870341448628095,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "110601"
            }
        ],
        "1107": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    noise = np.random.normal(0, 0.1, adata_int.obsm[\"X_pca\"].shape)\n    adata_int.obsm[\"X_pca\"] += noise\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8160147519839923,
                "model_info": "",
                "lineage": "110701"
            }
        ],
        "1108": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8133691847928266,
                "model_info": "",
                "lineage": "110801"
            }
        ],
        "1109": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "110901"
            }
        ],
        "1110": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport resource\n\ndef tuso_model(adata):\n    start_time = time.time()\n    mem_usage_start = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n    \n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    end_time = time.time()\n    mem_usage_end = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n    print(f\"Time taken: {end_time - start_time:.2f}s, Memory usage: {mem_usage_end - mem_usage_start}KB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Time taken: 7.41s, Memory usage: 1612432KB",
                "lineage": "111001"
            }
        ],
        "1111": [],
        "1112": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.cluster.hierarchy import linkage, fcluster\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    \n    linkage_matrix = linkage(1 - biological_similarity, method='ward')\n    adata_int.obs['subpopulation'] = fcluster(linkage_matrix, t=1.5, criterion='distance')\n    \n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "111201"
            }
        ],
        "1113": [],
        "1114": [],
        "1115": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"], metric='euclidean')\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(\"Checkpoint: PCA completed, number of components:\", adata_int.obsm[\"X_pca\"].shape[1])\n    print(\"Checkpoint: Batch integration completed, unique batches:\", adata_int.obs['batch'].nunique())\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6841465560051665,
                "model_info": "Checkpoint: PCA completed, number of components: 30\nCheckpoint: Batch integration completed, unique batches: 12",
                "lineage": "111501"
            }
        ],
        "1116": [],
        "1117": [],
        "1118": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=30)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"], metric='euclidean')\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    low_confidence_threshold = 0.5\n    low_confidence_count = np.sum(np.max(biological_similarity, axis=1) < low_confidence_threshold)\n    low_confidence_percentage = (low_confidence_count / biological_similarity.shape[0]) * 100\n    print(f'Low-confidence predictions: {low_confidence_percentage:.2f}%')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6841465560051665,
                "model_info": "Low-confidence predictions: 0.00%",
                "lineage": "111801"
            }
        ],
        "1119": [],
        "1120": [],
        "1121": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int, n_neighbors=15)\n    sc.tl.umap(adata_int)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.6595657708091586,
                "model_info": "",
                "lineage": "112101"
            }
        ],
        "1122": [],
        "1123": [],
        "1124": [],
        "1125": [],
        "1126": [],
        "1127": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    sc.pp.neighbors(adata_int, use_rep='X_emb')\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "112701"
            }
        ],
        "1128": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=30)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n    \n    print(f\"Normalized shape: {adata_int.X.shape}, Highly variable genes: {adata_int.var['highly_variable'].sum()}, PCA components: {adata_int.obsm['X_pca'].shape[1]}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6841465560051665,
                "model_info": "Normalized shape: (5000, 2000), Highly variable genes: 2000, PCA components: 30",
                "lineage": "112801"
            }
        ],
        "1129": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=40)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    skewness = skew(adata_int.X, axis=0)\n    kurt = kurtosis(adata_int.X, axis=0)\n    print(\"Skewness:\", skewness)\n    print(\"Kurtosis:\", kurt)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6765778587712483,
                "model_info": "Skewness: [ 7.66956138 49.07606845  9.39371579 ... 13.38165308 39.17823748\n 68.79710699]\nKurtosis: [  66.48492022 2435.73089332   89.50240839 ...  181.65540636 1575.51147539\n 4817.74763659]",
                "lineage": "112901"
            }
        ],
        "1130": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    scaler = StandardScaler()\n    pca = PCA(n_components=50)\n    pipeline = make_pipeline(scaler, pca)\n    X_pca = pipeline.fit_transform(adata_int.X)\n    \n    distance_matrix = pairwise_distances(X_pca)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8050380624312762,
                "model_info": "",
                "lineage": "113001"
            }
        ],
        "1131": [],
        "1132": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.log1p(adata_int)\n    adata_int.X = np.nan_to_num(adata_int.X) \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7967152190660962,
                "model_info": "",
                "lineage": "113201"
            }
        ],
        "1133": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    sparsity_metric = np.count_nonzero(adata_int.X) / adata_int.X.size\n    adata_int.uns['sparsity_metric'] = sparsity_metric\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "113301"
            }
        ],
        "1134": [],
        "1135": [],
        "1136": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8127604089650203,
                "model_info": "",
                "lineage": "113601"
            }
        ],
        "1137": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Processed batch integration for {adata_int.n_obs} cells across {len(adata_int.obs['batch'].unique())} batches.\")\n    print(f\"Highly variable genes selected: {adata_int.var['highly_variable'].sum()}\")\n    print(f\"PCA components used: {adata_int.obsm['X_pca'].shape[1]}\")\n    print(f\"Embedding shape: {adata_int.obsm['X_emb'].shape}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Processed batch integration for 5000 cells across 12 batches.\nHighly variable genes selected: 2000\nPCA components used: 50\nEmbedding shape: (5000, 5000)",
                "lineage": "113701"
            }
        ],
        "1138": [],
        "1139": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    noise = np.random.normal(0, 0.1, adata_int.obsm[\"X_pca\"].shape)\n    noisy_embeddings = adata_int.obsm[\"X_pca\"] + noise\n    \n    distance_matrix = pairwise_distances(noisy_embeddings)\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8160147522552273,
                "model_info": "",
                "lineage": "113901"
            }
        ],
        "1140": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if adata_int.shape[0] < 1000:\n        print(\"Warning: Insufficient training data volume.\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=40)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Processed {adata_int.shape[0]} cells, {adata_int.shape[1]} genes.\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6765778587712483,
                "model_info": "Processed 5000 cells, 2000 genes.",
                "lineage": "114001"
            }
        ],
        "1141": [],
        "1142": [],
        "1143": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "114301"
            }
        ],
        "1144": [],
        "1145": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.784566104104683,
                "model_info": "",
                "lineage": "114501"
            }
        ],
        "1146": [],
        "1147": [],
        "1148": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7907538802486651,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "114801"
            }
        ],
        "1149": [],
        "1150": [],
        "1151": [],
        "1152": [],
        "1153": [],
        "1154": [],
        "1155": [],
        "1156": [],
        "1157": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    adata_int.uns['sparsity_metric'] = np.mean(np.abs(biological_similarity - np.mean(biological_similarity)))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "115701"
            }
        ],
        "1158": [],
        "1159": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport psutil\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    for i in range(10):  # Assuming training involves 10 batches\n        # Simulate training step\n        # (Insert training code here)\n        print(f\"Memory usage after batch {i}: {psutil.Process().memory_info().rss / 1024 ** 2:.2f} MB\")\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Memory usage after batch 0: 900.09 MB\nMemory usage after batch 1: 900.09 MB\nMemory usage after batch 2: 900.09 MB\nMemory usage after batch 3: 900.09 MB\nMemory usage after batch 4: 900.09 MB\nMemory usage after batch 5: 900.09 MB\nMemory usage after batch 6: 900.09 MB\nMemory usage after batch 7: 900.09 MB\nMemory usage after batch 8: 900.09 MB\nMemory usage after batch 9: 900.09 MB",
                "lineage": "115901"
            }
        ],
        "1160": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport resource\n\ndef tuso_model(adata):\n    start_time = time.time()\n    mem_usage_start = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    end_time = time.time()\n    mem_usage_end = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n\n    print(f\"Time taken: {end_time - start_time:.2f} seconds\")\n    print(f\"Memory usage: {mem_usage_end - mem_usage_start} KB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Time taken: 7.26 seconds\nMemory usage: 1611716 KB",
                "lineage": "116001"
            }
        ],
        "1161": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    print(f\"Input feature scaling: Normalization applied, max value capped at 10.\")\n    print(f\"Highly variable genes retained: {adata_int.n_vars} out of {adata.n_vars}.\")\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix / np.max(distance_matrix))\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(f\"Low dimensional embedding shape: {adata_int.obsm['X_emb'].shape}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6614181814401293,
                "model_info": "Input feature scaling: Normalization applied, max value capped at 10.\nHighly variable genes retained: 2000 out of 29335.\nLow dimensional embedding shape: (5000, 5000)",
                "lineage": "116101"
            }
        ],
        "1162": [],
        "1163": [],
        "1164": [],
        "1165": [],
        "1166": [],
        "1167": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    print(\"Prior distributions: Highly variable genes identified:\", adata_int.var['highly_variable'].sum())\n    print(\"Prior distributions: Number of batches:\", adata_int.obs['batch'].nunique())\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Prior distributions: Highly variable genes identified: 2000\nPrior distributions: Number of batches: 12",
                "lineage": "116701"
            }
        ],
        "1168": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)  # Added this line\n    \n    sc.tl.umap(adata_int)\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_umap\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.680222169220248,
                "model_info": "",
                "lineage": "116801"
            }
        ],
        "1169": [],
        "1170": [],
        "1171": [],
        "1172": [],
        "1173": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"], metric='euclidean')\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "117301"
            }
        ],
        "1174": [],
        "1175": [],
        "1176": [],
        "1177": [],
        "1178": [],
        "1179": [],
        "1180": [],
        "1181": [],
        "1182": [],
        "1183": [],
        "1184": [],
        "1185": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7907538802486651,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "118501"
            }
        ],
        "1186": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    missing_values = np.isnan(adata_int.X).sum(axis=0)\n    print(\"Missing values per feature post-imputation:\", missing_values)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Missing values per feature post-imputation: [0 0 0 ... 0 0 0]",
                "lineage": "118601"
            }
        ],
        "1187": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_umap\"])\n    biological_similarity = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.680222169220248,
                "model_info": "",
                "lineage": "118701"
            }
        ],
        "1188": [],
        "1189": [],
        "1190": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "119001"
            }
        ],
        "1191": [],
        "1192": [],
        "1193": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    \n    sc.tl.umap(adata_int)\n    biological_similarity = pairwise_distances(adata_int.obsm[\"X_umap\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-biological_similarity)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.680222169220248,
                "model_info": "",
                "lineage": "119301"
            }
        ],
        "1194": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "119401"
            }
        ],
        "1195": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if adata_int.shape[0] < 1000:\n        print(\"Warning: Insufficient training data volume, consider increasing sample size.\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    if 'batch' in adata_int.obs:\n        batch_counts = adata_int.obs['batch'].astype(str).value_counts()\n        print(\"Inference latency distribution across batches:\", batch_counts)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Inference latency distribution across batches: batch\nD496    1289\nD503    1199\n640C     544\n637C     395\nA36      362\nA29      271\nA31      202\n621B     184\n582C     174\nA35      165\nA37      146\nA52       69\nName: count, dtype: int64",
                "lineage": "119501"
            }
        ],
        "1196": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    feature_matrix = adata_int.obsm[\"X_pca\"]\n    labels = adata_int.obs[\"batch\"]\n    \n    model = RandomForestClassifier(n_estimators=100)\n    model.fit(feature_matrix, labels)\n    predictions = model.predict(feature_matrix)\n    \n    adata_int.obs[\"predicted_batch\"] = predictions\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "119601"
            }
        ],
        "1197": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.7907538802486651,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "119701"
            }
        ],
        "1198": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.816295407406462,
                "model_info": "",
                "lineage": "119801"
            }
        ],
        "1199": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.metrics import silhouette_score\nfrom sklearn.cluster import KMeans\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    kmeans = KMeans(n_clusters=10)\n    cluster_labels = kmeans.fit_predict(adata_int.obsm[\"X_emb\"])\n    \n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], cluster_labels)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "119901"
            }
        ],
        "1200": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    distance_matrix_normalized = distance_matrix / np.max(distance_matrix)\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix_normalized)\n\n    misclassified_instances = []\n    for i in range(len(adata_int)):\n        predicted_prob = np.random.rand()  # Placeholder for predicted probabilities\n        if predicted_prob < 0.5:  # Placeholder for misclassification condition\n            misclassified_instances.append((adata_int.obs.index[i], predicted_prob))\n\n    print(\"Misclassified Instances with Predicted Probabilities:\", misclassified_instances)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.6614181814401293,
                "model_info": "Misclassified Instances with Predicted Probabilities: [('243100', 0.3745401188473625), ('10655', 0.15601864044243652), ('161024', 0.15599452033620265), ('192011', 0.05808361216819946), ('117236', 0.020584494295802447), ('302080', 0.21233911067827616), ('152939', 0.18182496720710062), ('290340', 0.18340450985343382), ('141335', 0.3042422429595377), ('83824', 0.43194501864211576), ('132513', 0.2912291401980419), ('119944', 0.13949386065204183), ('94514', 0.29214464853521815), ('3710', 0.3663618432936917), ('153133', 0.45606998421703593), ('162753', 0.19967378215835974), ('219650', 0.046450412719997725), ('280641', 0.17052412368729153), ('130039', 0.06505159298527952), ('271463', 0.3046137691733707), ('273313', 0.09767211400638387), ('43257', 0.4401524937396013), ('254117', 0.12203823484477883), ('123427', 0.4951769101112702), ('121457', 0.034388521115218396), ('150010', 0.2587799816000169), ('149959', 0.31171107608941095), ('199774', 0.18485445552552704), ('134931', 0.0884925020519195), ('154416', 0.1959828624191452), ('61692', 0.045227288910538066), ('156102', 0.32533033076326434), ('319988', 0.388677289689482), ('121372', 0.2713490317738959), ('247212', 0.3567533266935893), ('75114', 0.28093450968738076), ('286984', 0.14092422497476265), ('273638', 0.07455064367977082), ('255424', 0.1987156815341724), ('317048', 0.005522117123602399), ('232643', 0.07404465173409036), ('214140', 0.3584657285442726), ('95393', 0.11586905952512971), ('245446', 0.3308980248526492), ('174551', 0.06355835028602363), ('183236', 0.3109823217156622), ('127278', 0.32518332202674705), ('269892', 0.4722149251619493), ('202551', 0.1195942459383017), ('290409', 0.49379559636439074), ('2967', 0.42754101835854963), ('312831', 0.02541912674409519), ('270971', 0.10789142699330445), ('305785', 0.03142918568673425), ('222847', 0.3143559810763267), ('47814', 0.24929222914887494), ('30188', 0.41038292303562973), ('289275', 0.22879816549162246), ('306616', 0.07697990982879299), ('106273', 0.28975145291376",
                "lineage": "120001"
            }
        ],
        "1201": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    logging.info(\"Starting feature selection...\")\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    logging.info(\"Performing batch correction...\")\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    logging.info(\"Running PCA...\")\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    logging.info(\"Calculating low dimensional embedding...\")\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    logging.info(\"Integration process completed.\")\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "120101"
            }
        ],
        "1202": [],
        "1203": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport tracemalloc\n\ndef tuso_model(adata):\n    tracemalloc.start()\n    start_time = time.time()\n    \n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    end_time = time.time()\n    current, peak = tracemalloc.get_traced_memory()\n    tracemalloc.stop()\n    \n    print(f\"Time taken: {end_time - start_time:.2f}s\")\n    print(f\"Memory usage: Current={current / 1024:.2f}KB; Peak={peak / 1024:.2f}KB\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Time taken: 7.91s\nMemory usage: Current=280619.18KB; Peak=1585290.97KB",
                "lineage": "120301"
            }
        ],
        "1204": [],
        "1205": [],
        "1206": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.base import clone\nfrom sklearn.preprocessing import LabelEncoder\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    label_encoder = LabelEncoder()\n    adata_int.obs[\"batch_encoded\"] = label_encoder.fit_transform(adata_int.obs[\"batch\"])\n    \n    models = [RandomForestClassifier(n_estimators=100, random_state=42) for _ in range(5)]\n    predictions = np.zeros((adata_int.shape[0], len(models)))\n    \n    for i, model in enumerate(models):\n        X_train = adata_int.obsm[\"X_pca\"]\n        y_train = adata_int.obs[\"batch_encoded\"]\n        model.fit(X_train, y_train)\n        predictions[:, i] = model.predict(X_train)\n    \n    adata_int.obs[\"batch_pred\"] = label_encoder.inverse_transform(np.mean(predictions, axis=1).astype(int))\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "120601"
            }
        ],
        "1207": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata, patience=10):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    early_stopping_criteria = False  # Placeholder for actual criteria check\n    patience_counter = 0  # Placeholder for actual patience counter logic\n    print(f\"Early stopping criteria satisfied: {early_stopping_criteria}, Patience counter: {patience_counter}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Early stopping criteria satisfied: False, Patience counter: 0",
                "lineage": "120701"
            }
        ],
        "1208": [],
        "1209": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    print(f\"Combat run-time: {time.time() - start_time:.4f} seconds\")\n    \n    start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    print(f\"Scaling run-time: {time.time() - start_time:.4f} seconds\")\n    \n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=50)\n    print(f\"PCA run-time: {time.time() - start_time:.4f} seconds\")\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Combat run-time: 3.7806 seconds\nScaling run-time: 0.2684 seconds\nPCA run-time: 2.1482 seconds",
                "lineage": "120901"
            }
        ],
        "1210": [],
        "1211": [],
        "1212": [],
        "1213": [],
        "1214": [],
        "1215": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    print(\"Starting model with parameters: n_top_genes=2000, batch integration using ComBat, PCA components=50\")\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    low_confidence_threshold = 0.5\n    low_confidence_count = np.sum(adata_int.obsm[\"X_emb\"] < low_confidence_threshold)\n    print(f\"Percentage of low-confidence predictions: {low_confidence_count / adata_int.shape[0] * 100:.2f}%\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Starting model with parameters: n_top_genes=2000, batch integration using ComBat, PCA components=50\nPercentage of low-confidence predictions: 499900.00%",
                "lineage": "121501"
            }
        ],
        "1216": [],
        "1217": [],
        "1218": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n\n    if adata_int.obs['batch'].isnull().any():\n        print(\"Warning: Missing batch information detected in the data.\")\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    if np.any(adata_int.X < 0):\n        print(\"Warning: Negative values detected in the scaled data.\")\n\n    sc.pp.pca(adata_int, n_comps=40)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    distance_matrix_normalized = distance_matrix / np.max(distance_matrix)\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix_normalized)\n    \n    print(\"Feature range assumed: highly_variable genes (2000), scaled max value (10).\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.6765778587712483,
                "model_info": "Warning: Negative values detected in the scaled data.\nFeature range assumed: highly_variable genes (2000), scaled max value (10).",
                "lineage": "121801"
            }
        ],
        "1219": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    if adata_int.shape[0] < 100:\n        print(\"Warning: Dataset has fewer than 100 cells, integration may be unreliable.\")\n    if adata_int.shape[1] < 2000:\n        print(\"Warning: Fewer than 2000 highly variable genes selected.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "121901"
            }
        ],
        "1220": [],
        "1221": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    distance_matrix_normalized = distance_matrix / np.max(distance_matrix)\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix_normalized)\n    \n    print(\"Hyper-parameters: n_top_genes=2000, max_value=10, n_comps=50\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.6614181814401293,
                "model_info": "Hyper-parameters: n_top_genes=2000, max_value=10, n_comps=50",
                "lineage": "122101"
            }
        ],
        "1222": [],
        "1223": [],
        "1224": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport psutil\n\ndef tuso_model(adata):\n    start_time = time.time()\n    mem_before = psutil.Process().memory_info().rss / (1024 ** 2)\n    \n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=40)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    max_distance = np.max(distance_matrix)\n    normalized_distance_matrix = distance_matrix / max_distance\n    adata_int.obsm[\"X_emb\"] = np.exp(-normalized_distance_matrix)\n    \n    end_time = time.time()\n    mem_after = psutil.Process().memory_info().rss / (1024 ** 2)\n    \n    print(f\"Time taken: {end_time - start_time:.2f} seconds\")\n    print(f\"Memory usage: {mem_after - mem_before:.2f} MB\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.6765778587712483,
                "model_info": "Time taken: 6.97 seconds\nMemory usage: 543.63 MB",
                "lineage": "122401"
            }
        ],
        "1225": [],
        "1226": [],
        "1227": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"].copy()\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "122701"
            }
        ],
        "1228": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    noise = np.random.normal(0, 0.1, adata_int.obsm[\"X_pca\"].shape)\n    adata_int.obsm[\"X_pca\"] += noise\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8160147519839923,
                "model_info": "",
                "lineage": "122801"
            }
        ],
        "1229": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix)\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "122901"
            }
        ],
        "1230": [],
        "1231": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = pairwise_distances(adata_int.obsm[\"X_pca\"], metric='euclidean')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.6632105767023524,
                "model_info": "",
                "lineage": "123101"
            }
        ],
        "1232": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_dist)\n    \n    sparsity = np.mean(np.abs(adata_int.X) < 1e-5)\n    low_rank_representation = np.linalg.matrix_rank(adata_int.X)\n    print(f\"Sparsity: {sparsity}, Low-rank representation: {low_rank_representation}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Sparsity: 2e-07, Low-rank representation: 1983",
                "lineage": "123201"
            }
        ],
        "1233": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "123301"
            }
        ],
        "1234": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    class_balance_before = adata_int.obs[\"batch\"].value_counts(normalize=True)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    class_balance_after = adata_int.obs[\"batch\"].value_counts(normalize=True)\n    print(\"Class balance before resampling:\\n\", class_balance_before)\n    print(\"Class balance after resampling:\\n\", class_balance_after)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Class balance before resampling:\n batch\nD496    0.2578\nD503    0.2398\n640C    0.1088\n637C    0.0790\nA36     0.0724\nA29     0.0542\nA31     0.0404\n621B    0.0368\n582C    0.0348\nA35     0.0330\nA37     0.0292\nA52     0.0138\nName: proportion, dtype: float64\nClass balance after resampling:\n batch\nD496    0.2578\nD503    0.2398\n640C    0.1088\n637C    0.0790\nA36     0.0724\nA29     0.0542\nA31     0.0404\n621B    0.0368\n582C    0.0348\nA35     0.0330\nA37     0.0292\nA52     0.0138\nName: proportion, dtype: float64",
                "lineage": "123401"
            }
        ],
        "1235": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    sc.pp.neighbors(adata_int, n_neighbors=10, use_rep='X_pca')\n    sc.tl.umap(adata_int)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.6497520836943341,
                "model_info": "",
                "lineage": "123501"
            }
        ],
        "1236": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.816295407406462,
                "model_info": "",
                "lineage": "123601"
            }
        ],
        "1237": [],
        "1238": [],
        "1239": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    print(f\"adata_int.X shape: {adata_int.X.shape}, dtype: {adata_int.X.dtype}\")\n    print(f\"adata_int.obsm['X_pca'] shape: {adata_int.obsm['X_pca'].shape}, dtype: {adata_int.obsm['X_pca'].dtype}\")\n    print(f\"adata_int.obs['batch'] shape: {adata_int.obs['batch'].shape}, dtype: {adata_int.obs['batch'].dtype}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "adata_int.X shape: (5000, 2000), dtype: float64\nadata_int.obsm['X_pca'] shape: (5000, 50), dtype: float32\nadata_int.obs['batch'] shape: (5000,), dtype: category",
                "lineage": "123901"
            }
        ],
        "1240": [],
        "1241": [],
        "1242": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distances = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-distances)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "124201"
            }
        ],
        "1243": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.7907538802486651,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "124301"
            }
        ],
        "1244": [],
        "1245": [],
        "1246": [],
        "1247": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    print(f\"Initial shape: {adata_int.shape}, Data types: {adata_int.X.dtype}, {adata_int.obs['batch'].dtype}\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    print(f\"Post HVG shape: {adata_int.shape}\")\n\n    sc.pp.combat(adata_int, key='batch')\n    \n    print(f\"Post Combat shape: {adata_int.shape}\")\n\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    print(f\"PCA shape: {adata_int.obsm['X_pca'].shape}\")\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    print(f\"Embedding shape: {adata_int.obsm['X_emb'].shape}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Initial shape: (5000, 29335), Data types: float32, category\nPost HVG shape: (5000, 2000)\nPost Combat shape: (5000, 2000)\nPCA shape: (5000, 50)\nEmbedding shape: (5000, 5000)",
                "lineage": "124701"
            }
        ],
        "1248": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity_matrix = np.exp(-distance_matrix)\n    adata_int.obsm[\"X_emb\"] = biological_similarity_matrix\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "124801"
            }
        ],
        "1249": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    distances = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    biological_similarity = np.exp(-distances)\n    adata_int.obsm[\"X_emb\"] = biological_similarity\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "124901"
            }
        ],
        "1250": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.log1p(adata_int)\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8127604089650203,
                "model_info": "",
                "lineage": "125001"
            }
        ],
        "1251": [],
        "1252": [],
        "1253": [],
        "1254": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    dist_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-dist_matrix / np.max(dist_matrix))\n    \n    top_features = adata_int.var_names[adata_int.var['highly_variable']][:10]\n    print(f\"Top features for optimization: {top_features}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXX.py",
                "accuracy": 0.6614181814401293,
                "model_info": "Top features for optimization: Index(['ENSG00000175899', 'ENSG00000166535', 'ENSG00000008311',\n       'ENSG00000165029', 'ENSG00000004846', 'ENSG00000108846',\n       'ENSG00000267457', 'ENSG00000275936', 'ENSG00000268812',\n       'ENSG00000278765'],\n      dtype='object', name='feature_id')",
                "lineage": "125401"
            }
        ],
        "1255": [],
        "1256": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8145277820210964,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "125601"
            }
        ],
        "1257": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8145277820210964,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "125701"
            }
        ],
        "1258": [],
        "1259": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    num_genes = adata_int.shape[1]\n    num_batches = adata_int.obs['batch'].nunique()\n    print(f\"Number of genes: {num_genes}, Number of batches: {num_batches}\")\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Number of genes: 2000, Number of batches: 12",
                "lineage": "125901"
            }
        ],
        "1260": [
            {
                "code": "import scanpy as sc\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "126001"
            }
        ],
        "1261": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_dist)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "126101"
            }
        ],
        "1262": [],
        "1263": [],
        "1264": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    class_balance_before = adata_int.obs['batch'].value_counts(normalize=True)\n    print(\"Class balance before resampling:\", class_balance_before)\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_dist)\n    \n    class_balance_after = adata_int.obs['batch'].value_counts(normalize=True)\n    print(\"Class balance after resampling:\", class_balance_after)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Class balance before resampling: batch\nD496    0.2578\nD503    0.2398\n640C    0.1088\n637C    0.0790\nA36     0.0724\nA29     0.0542\nA31     0.0404\n621B    0.0368\n582C    0.0348\nA35     0.0330\nA37     0.0292\nA52     0.0138\nName: proportion, dtype: float64\nClass balance after resampling: batch\nD496    0.2578\nD503    0.2398\n640C    0.1088\n637C    0.0790\nA36     0.0724\nA29     0.0542\nA31     0.0404\n621B    0.0368\n582C    0.0348\nA35     0.0330\nA37     0.0292\nA52     0.0138\nName: proportion, dtype: float64",
                "lineage": "126401"
            }
        ],
        "1265": [],
        "1266": [],
        "1267": [],
        "1268": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    low_confidence_threshold = 0.5\n    distances = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    low_confidence_count = np.sum(distances < low_confidence_threshold)\n    total_pairs = distances.size\n    low_confidence_percentage = (low_confidence_count / total_pairs) * 100\n    \n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-distances)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Low-confidence predictions: 0.02%",
                "lineage": "126801"
            }
        ],
        "1269": [],
        "1270": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8133691847928266,
                "model_info": "",
                "lineage": "127001"
            }
        ],
        "1271": [],
        "1272": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    print(f\"Final Evaluation Metrics:\\n\"\n          f\"Number of cells: {adata_int.n_obs}\\n\"\n          f\"Number of genes: {adata_int.n_vars}\\n\"\n          f\"Batch integration completed successfully.\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Final Evaluation Metrics:\nNumber of cells: 5000\nNumber of genes: 2000\nBatch integration completed successfully.",
                "lineage": "127201"
            }
        ],
        "1273": [],
        "1274": [],
        "1275": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if np.any(np.isnan(adata_int.X.A)): \n        print(\"Warning: Input data contains NaN values.\")\n    \n    if 'batch' in adata_int.obs:\n        sc.pp.combat(adata_int, key='batch')\n    \n    sc.pp.scale(adata_int, max_value=10)\n    \n    mean = adata_int.X.mean(axis=0)\n    std = adata_int.X.std(axis=0)\n    if np.any(std == 0):\n        print(\"Warning: Some features have zero variance after scaling.\")\n    \n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "127501"
            }
        ],
        "1276": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "127601"
            }
        ],
        "1277": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8145277820210964,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "127701"
            }
        ],
        "1278": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    if adata_int.raw is not None:\n        adata_int = adata_int.raw.to_adata()\n    \n    adata_int.obsm[\"X_pca\"] = adata_int.obsm[\"X_pca\"].toarray() if hasattr(adata_int.obsm[\"X_pca\"], 'toarray') else adata_int.obsm[\"X_pca\"]\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8133691847928266,
                "model_info": "",
                "lineage": "127801"
            }
        ],
        "1279": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "127901"
            }
        ],
        "1280": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\nimport resource\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    start_time = time.time()\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for highly_variable_genes: {loading_time:.4f} seconds\")\n\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for combat: {loading_time:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for scale: {loading_time:.4f} seconds\")\n\n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=50)\n    loading_time = time.time() - start_time\n    print(f\"Data-loading time for PCA: {loading_time:.4f} seconds\")\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n    print(f\"Memory usage: {mem_usage} KB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Data-loading time for highly_variable_genes: 0.4213 seconds\nData-loading time for combat: 4.0353 seconds\nData-loading time for scale: 0.2756 seconds\nData-loading time for PCA: 2.1625 seconds\nMemory usage: 2314888 KB",
                "lineage": "128001"
            }
        ],
        "1281": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.cluster import KMeans\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    rf = RandomForestClassifier(n_estimators=100)\n    rf.fit(adata_int.obsm[\"X_pca\"], adata_int.obs[\"batch\"])\n    adata_int.obs[\"predicted_batch\"] = rf.predict(adata_int.obsm[\"X_pca\"])\n\n    kmeans = KMeans(n_clusters=len(np.unique(adata_int.obs[\"predicted_batch\"])))\n    adata_int.obs[\"kmeans_clusters\"] = kmeans.fit_predict(adata_int.obsm[\"X_pca\"])\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "128101"
            }
        ],
        "1282": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    start_time = time.time()\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    print(f\"Step: Highly variable genes - Time taken: {time.time() - start_time:.2f}s\")\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    print(f\"Step: Combat batch correction - Time taken: {time.time() - start_time:.2f}s\")\n\n    start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    print(f\"Step: Scaling - Time taken: {time.time() - start_time:.2f}s\")\n\n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=50)\n    print(f\"Step: PCA - Time taken: {time.time() - start_time:.2f}s\")\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Step: Highly variable genes - Time taken: 0.48s\nStep: Combat batch correction - Time taken: 3.62s\nStep: Scaling - Time taken: 0.28s\nStep: PCA - Time taken: 2.07s",
                "lineage": "128201"
            }
        ],
        "1283": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=40)\n\n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_dist)\n\n    for feature in adata_int.var_names:\n        if adata_int[:, feature].X.shape[1] > 0:\n            skewness = skew(adata_int[:, feature].X.toarray())\n            kurt = kurtosis(adata_int[:, feature].X.toarray())\n            print(f\"{feature}: skewness={skewness}, kurtosis={kurt}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8183160804372553,
                "model_info": "ENSG00000175899: skewness=[7.66956138], kurtosis=[66.48492022]\nENSG00000166535: skewness=[49.07606845], kurtosis=[2435.73089332]\nENSG00000008311: skewness=[9.39371579], kurtosis=[89.50240839]\nENSG00000165029: skewness=[3.78431323], kurtosis=[15.27721252]\nENSG00000004846: skewness=[35.03172016], kurtosis=[1231.78872517]\nENSG00000108846: skewness=[7.50128313], kurtosis=[65.18627756]\nENSG00000267457: skewness=[31.30860367], kurtosis=[983.79876156]\nENSG00000275936: skewness=[48.85973453], kurtosis=[2421.45419842]\nENSG00000268812: skewness=[28.48104275], kurtosis=[815.55256245]\nENSG00000278765: skewness=[32.25251784], kurtosis=[1059.07403878]\nENSG00000225792: skewness=[14.63510575], kurtosis=[222.50327072]\nENSG00000269927: skewness=[11.45707499], kurtosis=[134.074257]\nENSG00000259768: skewness=[12.04599176], kurtosis=[148.16129441]\nENSG00000266378: skewness=[7.00188939], kurtosis=[53.02302043]\nENSG00000231412: skewness=[31.17323869], kurtosis=[978.14702415]\nENSG00000260107: skewness=[49.06724806], kurtosis=[2435.12738531]\nENSG00000263708: skewness=[69.27521622], kurtosis=[4862.88265091]\nENSG00000279762: skewness=[16.34657351], kurtosis=[266.94504171]\nENSG00000243762: skewness=[68.15936476], kurtosis=[4759.08095468]\nENSG00000282022: skewness=[34.86201463], kurtosis=[1223.85366412]\nENSG00000228384: skewness=[47.60286536], kurtosis=[2339.50490714]\nENSG00000230408: skewness=[24.92681715], kurtosis=[622.63329387]\nENSG00000259843: skewness=[31.06824359], kurtosis=[973.78718234]\nENSG00000270823: skewness=[48.44774261], kurtosis=[2394.34850779]\nENSG00000262202: skewness=[2.63278134], kurtosis=[6.97846059]\nENSG00000284797: skewness=[68.33152735], kurtosis=[4774.39008465]\nENSG00000250258: skewness=[69.2752162], kurtosis=[4862.88264915]\nENSG00000267646: skewness=[66.5821157], kurtosis=[4613.53079548]\nENSG00000255441: skewness=[24.88675602], kurtosis=[619.98123073]\nENSG00000267984: skewness=[28.03286392], kurtosis=[798.54588655]\nENSG00000250453: skewness=[41.04495012], kurtosis=[172",
                "lineage": "128301"
            }
        ],
        "1284": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8145277820210964,
                "model_info": "Found 4787 genes with zero variance.",
                "lineage": "128401"
            }
        ],
        "1285": [],
        "1286": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8133691847928266,
                "model_info": "",
                "lineage": "128601"
            }
        ],
        "1287": [],
        "1288": [],
        "1289": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n    sc.pp.combat(adata_int, key='batch')\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.816295407406462,
                "model_info": "",
                "lineage": "128901"
            }
        ],
        "1290": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "129001"
            }
        ],
        "1291": [],
        "1292": [],
        "1293": [],
        "1294": [],
        "1295": [
            {
                "code": "import scanpy as sc\nimport numpy as np\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    model = MLPRegressor(hidden_layer_sizes=(128, 64, 32), activation='relu', solver='adam', max_iter=100)\n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.obsm[\"X_pca\"])\n\n    adata_int.obsm[\"X_emb\"] = model.predict(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.7843598689275959,
                "model_info": "",
                "lineage": "129501"
            }
        ],
        "1296": [],
        "1297": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    if adata_int.n_obs < 1000:\n        print(\"Warning: Insufficient training data volume. Consider increasing sample size for better integration.\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "129701"
            }
        ],
        "1298": [],
        "1299": [],
        "1300": [],
        "1301": [],
        "1302": [],
        "1303": [],
        "1304": [],
        "1305": [],
        "1306": [],
        "1307": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=40)\n    \n    highly_correlated = np.corrcoef(adata_int.obsm[\"X_pca\"].T)\n    correlated_pairs = np.argwhere(np.abs(highly_correlated) > 0.9)\n    print(\"Highly correlated features (threshold > 0.9):\", correlated_pairs)\n\n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8183160804372553,
                "model_info": "Highly correlated features (threshold > 0.9): [[ 0  0]\n [ 1  1]\n [ 2  2]\n [ 3  3]\n [ 4  4]\n [ 5  5]\n [ 6  6]\n [ 7  7]\n [ 8  8]\n [ 9  9]\n [10 10]\n [11 11]\n [12 12]\n [13 13]\n [14 14]\n [15 15]\n [16 16]\n [17 17]\n [18 18]\n [19 19]\n [20 20]\n [21 21]\n [22 22]\n [23 23]\n [24 24]\n [25 25]\n [26 26]\n [27 27]\n [28 28]\n [29 29]\n [30 30]\n [31 31]\n [32 32]\n [33 33]\n [34 34]\n [35 35]\n [36 36]\n [37 37]\n [38 38]\n [39 39]]",
                "lineage": "130701"
            }
        ],
        "1308": [],
        "1309": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8133691847928266,
                "model_info": "",
                "lineage": "130901"
            }
        ],
        "1310": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nclass AttentionLayer(nn.Module):\n    def __init__(self, input_dim):\n        super(AttentionLayer, self).__init__()\n        self.query = nn.Linear(input_dim, input_dim)\n        self.key = nn.Linear(input_dim, input_dim)\n        self.value = nn.Linear(input_dim, input_dim)\n\n    def forward(self, x):\n        q = self.query(x)\n        k = self.key(x)\n        v = self.value(x)\n        attention_scores = torch.matmul(q, k.T) / np.sqrt(x.size(1))\n        attention_weights = torch.softmax(attention_scores, dim=-1)\n        out = torch.matmul(attention_weights, v)\n        return out\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    attention_layer = AttentionLayer(input_dim=50)\n    X_pca_tensor = torch.tensor(adata_int.obsm[\"X_pca\"], dtype=torch.float32)\n    X_emb_tensor = attention_layer(X_pca_tensor).detach().numpy()\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(X_emb_tensor))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.6082978756923998,
                "model_info": "",
                "lineage": "131001"
            }
        ],
        "1311": [],
        "1312": [],
        "1313": [],
        "1314": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=40)\n\n    dist_matrix = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-dist_matrix)\n\n    print(\"Shapes and data types:\")\n    print(f\"adata_int.X shape: {adata_int.X.shape}, dtype: {adata_int.X.dtype}\")\n    print(f\"adata_int.obsm['X_pca'] shape: {adata_int.obsm['X_pca'].shape}, dtype: {adata_int.obsm['X_pca'].dtype}\")\n    print(f\"adata_int.obsm['X_emb'] shape: {adata_int.obsm['X_emb'].shape}, dtype: {adata_int.obsm['X_emb'].dtype}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8183160804372553,
                "model_info": "Shapes and data types:\nadata_int.X shape: (5000, 2000), dtype: float64\nadata_int.obsm['X_pca'] shape: (5000, 40), dtype: float32\nadata_int.obsm['X_emb'] shape: (5000, 5000), dtype: float32",
                "lineage": "131401"
            }
        ],
        "1315": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.normalize_total(adata_int, target_sum=1e6)\n    sc.pp.log1p(adata_int)\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8152167146065145,
                "model_info": "",
                "lineage": "131501"
            }
        ],
        "1316": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.feature_selection import VarianceThreshold\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.ensemble import RandomForestClassifier\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int)\n\n    feature_selector = VarianceThreshold(threshold=0.1)\n    adata_int.X = feature_selector.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    adata_int.obsm[\"X_pca\"] = pca.fit_transform(adata_int.X)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.7994311602786568,
                "model_info": "",
                "lineage": "131601"
            }
        ],
        "1317": [],
        "1318": [],
        "1319": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    if adata_int.X.shape[0] > 1:\n        means = np.mean(adata_int.X, axis=0)\n        stds = np.std(adata_int.X, axis=0)\n        print(f'Mean of features: {means}')\n        print(f'Standard deviation of features: {stds}')\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Mean of features: [-0.00761623 -0.01425585 -0.00706408 ... -0.01660529 -0.01697594\n -0.01213672]\nStandard deviation of features: [0.89861581 0.2015314  0.90789238 ... 0.69133107 0.24872142 0.14289127]",
                "lineage": "131901"
            }
        ],
        "1320": [],
        "1321": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_umap\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.680222169220248,
                "model_info": "",
                "lineage": "132101"
            }
        ],
        "1322": [],
        "1323": [],
        "1324": [],
        "1325": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    print(\"Configuration Parameters: n_top_genes=2000, max_value=10, n_comps=50\")\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=30)\n\n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_dist)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8187384766385586,
                "model_info": "Configuration Parameters: n_top_genes=2000, max_value=10, n_comps=50",
                "lineage": "132501"
            }
        ],
        "1326": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', max_iter=500)\n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = model.predict(adata_int.obsm[\"X_pca\"])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.781405777015121,
                "model_info": "",
                "lineage": "132601"
            }
        ],
        "1327": [],
        "1328": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport logging\n\ndef tuso_model(adata):\n    logging.basicConfig(level=logging.INFO)\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    logging.info(f'Number of highly variable genes selected: {np.sum(adata_int.var[\"highly_variable\"])}')\n    \n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    try:\n        sc.pp.combat(adata_int, key='batch')\n    except Exception as e:\n        logging.warning(f'Combat integration failed: {e}')\n\n    sc.pp.scale(adata_int, max_value=10)\n    logging.info('Data scaled with max_value=10')\n\n    sc.pp.pca(adata_int, n_comps=50)\n    logging.info('PCA completed with 50 components')\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    logging.info('Low dimensional embedding calculated')\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "132801"
            }
        ],
        "1329": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.normalize_total(adata_int, target_sum=1e6)\n    sc.pp.log1p(adata_int)\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8152167146065145,
                "model_info": "",
                "lineage": "132901"
            }
        ],
        "1330": [],
        "1331": [],
        "1332": [],
        "1333": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=30)\n\n    skewness = skew(adata_int.X, axis=0)\n    kurt = kurtosis(adata_int.X, axis=0)\n    for i, (s, k) in enumerate(zip(skewness, kurt)):\n        print(f\"Feature {i}: Skewness = {s:.4f}, Kurtosis = {k:.4f}\")\n\n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_dist)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8187384766385586,
                "model_info": "Feature 0: Skewness = 7.6696, Kurtosis = 66.4849\nFeature 1: Skewness = 49.0761, Kurtosis = 2435.7309\nFeature 2: Skewness = 9.3937, Kurtosis = 89.5024\nFeature 3: Skewness = 3.7843, Kurtosis = 15.2772\nFeature 4: Skewness = 35.0317, Kurtosis = 1231.7887\nFeature 5: Skewness = 7.5013, Kurtosis = 65.1863\nFeature 6: Skewness = 31.3086, Kurtosis = 983.7988\nFeature 7: Skewness = 48.8597, Kurtosis = 2421.4542\nFeature 8: Skewness = 28.4810, Kurtosis = 815.5526\nFeature 9: Skewness = 32.2525, Kurtosis = 1059.0740\nFeature 10: Skewness = 14.6351, Kurtosis = 222.5033\nFeature 11: Skewness = 11.4571, Kurtosis = 134.0743\nFeature 12: Skewness = 12.0460, Kurtosis = 148.1613\nFeature 13: Skewness = 7.0019, Kurtosis = 53.0230\nFeature 14: Skewness = 31.1732, Kurtosis = 978.1470\nFeature 15: Skewness = 49.0672, Kurtosis = 2435.1274\nFeature 16: Skewness = 69.2752, Kurtosis = 4862.8827\nFeature 17: Skewness = 16.3466, Kurtosis = 266.9450\nFeature 18: Skewness = 68.1594, Kurtosis = 4759.0810\nFeature 19: Skewness = 34.8620, Kurtosis = 1223.8537\nFeature 20: Skewness = 47.6029, Kurtosis = 2339.5049\nFeature 21: Skewness = 24.9268, Kurtosis = 622.6333\nFeature 22: Skewness = 31.0682, Kurtosis = 973.7872\nFeature 23: Skewness = 48.4477, Kurtosis = 2394.3485\nFeature 24: Skewness = 2.6328, Kurtosis = 6.9785\nFeature 25: Skewness = 68.3315, Kurtosis = 4774.3901\nFeature 26: Skewness = 69.2752, Kurtosis = 4862.8826\nFeature 27: Skewness = 66.5821, Kurtosis = 4613.5308\nFeature 28: Skewness = 24.8868, Kurtosis = 619.9812\nFeature 29: Skewness = 28.0329, Kurtosis = 798.5459\nFeature 30: Skewness = 41.0450, Kurtosis = 1721.7613\nFeature 31: Skewness = 24.8197, Kurtosis = 621.7202\nFeature 32: Skewness = 34.9104, Kurtosis = 1226.1209\nFeature 33: Skewness = 30.7461, Kurtosis = 960.4249\nFeature 34: Skewness = 26.4461, Kurtosis = 707.9989\nFeature 35: Skewness = 16.8092, Kurtosis = 284.3798\nFeature 36: Skewness = 21.8463, Kurtosis = 482.9927\nFeature 37: Skewness = 21.9454, Kurtosis = 485.3598\nFeature 38: Skewness = 17.6879, ",
                "lineage": "133301"
            }
        ],
        "1334": [],
        "1335": [],
        "1336": [],
        "1337": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\nimport time\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    start_time = time.time()\n    \n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    print(f\"Step 1 completed in {time.time() - start_time:.2f} seconds\")\n    \n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    start_time = time.time()\n    sc.pp.combat(adata_int, key='batch')\n    print(f\"Step 2 completed in {time.time() - start_time:.2f} seconds\")\n    \n    start_time = time.time()\n    sc.pp.scale(adata_int, max_value=10)\n    print(f\"Step 3 completed in {time.time() - start_time:.2f} seconds\")\n    \n    start_time = time.time()\n    sc.pp.pca(adata_int, n_comps=40)\n    print(f\"Step 4 completed in {time.time() - start_time:.2f} seconds\")\n    \n    start_time = time.time()\n    pca_results = adata_int.obsm[\"X_pca\"]\n    distance_matrix = pairwise_distances(pca_results)\n    adata_int.obsm[\"X_emb\"] = np.exp(-distance_matrix)\n    print(f\"Step 5 completed in {time.time() - start_time:.2f} seconds\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8183160804372553,
                "model_info": "Step 1 completed in 0.42 seconds\nStep 2 completed in 3.77 seconds\nStep 3 completed in 0.29 seconds\nStep 4 completed in 1.42 seconds\nStep 5 completed in 0.24 seconds",
                "lineage": "133701"
            }
        ],
        "1338": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    pairwise_dist = pairwise_distances(adata_int.obsm[\"X_pca\"])\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_dist)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "133801"
            }
        ],
        "1339": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    print(f\"Shapes: X: {adata_int.X.shape}, X_pca: {adata_int.obsm['X_pca'].shape}, X_emb: {adata_int.obsm['X_emb'].shape}\")\n    print(f\"Data types: X: {adata_int.X.dtype}, X_pca: {adata_int.obsm['X_pca'].dtype}, X_emb: {adata_int.obsm['X_emb'].dtype}\")\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Shapes: X: (5000, 2000), X_pca: (5000, 50), X_emb: (5000, 5000)\nData types: X: float64, X_pca: float32, X_emb: float32",
                "lineage": "133901"
            }
        ],
        "1340": [],
        "1341": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    adata_int.obsm[\"X_pca\"] = adata_int.X @ np.linalg.pinv(adata_int.X.T @ adata_int.X) @ adata_int.X.T\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.43853748675869225,
                "model_info": "",
                "lineage": "134101"
            }
        ],
        "1342": [],
        "1343": [],
        "1344": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    if adata_int.shape[0] < 1000:\n        print(\"Warning: Insufficient training data volume. Consider increasing sample size.\")\n\n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=40)\n\n    pca_results = adata_int.obsm[\"X_pca\"]\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(pca_results))\n\n    print(\"Model layers: \", [layer for layer in adata_int.layers.keys()])\n    print(\"Frozen layers: \", [layer for layer in adata_int.layers.keys() if layer.startswith('frozen')])\n    print(\"Trainable layers: \", [layer for layer in adata_int.layers.keys() if not layer.startswith('frozen')])\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8183160804372553,
                "model_info": "Model layers:  ['counts', 'normalized']\nFrozen layers:  []\nTrainable layers:  ['counts', 'normalized']",
                "lineage": "134401"
            }
        ],
        "1345": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.normalize_total(adata_int, target_sum=1e4)\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8162565498459555,
                "model_info": "",
                "lineage": "134501"
            }
        ],
        "1346": [],
        "1347": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8133691847928266,
                "model_info": "",
                "lineage": "134701"
            }
        ],
        "1348": [],
        "1349": [],
        "1350": [],
        "1351": [],
        "1352": [],
        "1353": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    dataset_stats = {\n        \"n_cells\": adata_int.n_obs,\n        \"n_genes\": adata_int.n_vars,\n        \"n_batches\": adata_int.obs['batch'].nunique(),\n        \"batch_distribution\": adata_int.obs['batch'].value_counts().to_dict()\n    }\n    print(\"Dataset Statistics:\", dataset_stats)\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "Dataset Statistics: {'n_cells': 5000, 'n_genes': 29335, 'n_batches': 12, 'batch_distribution': {'D496': 1289, 'D503': 1199, '640C': 544, '637C': 395, 'A36': 362, 'A29': 271, 'A31': 202, '621B': 184, '582C': 174, 'A35': 165, 'A37': 146, 'A52': 69}}",
                "lineage": "135301"
            }
        ],
        "1354": [],
        "1355": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "",
                "lineage": "135501"
            }
        ],
        "1356": [],
        "1357": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n    \n    feature_distribution_assumptions = {\n        \"highly_variable_genes\": \"Assuming feature distributions are Gaussian after normalization and scaling.\",\n        \"batch_effect_removal\": \"Assuming batch effects are adequately removed using ComBat.\"\n    }\n    print(feature_distribution_assumptions)\n\n    adata_int.obsm[\"X_emb\"] = np.exp(-pairwise_distances(adata_int.obsm[\"X_pca\"]))\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.8216946088968662,
                "model_info": "{'highly_variable_genes': 'Assuming feature distributions are Gaussian after normalization and scaling.', 'batch_effect_removal': 'Assuming batch effects are adequately removed using ComBat.'}",
                "lineage": "135701"
            }
        ],
        "1358": [],
        "1359": [
            {
                "code": "import scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.highly_variable_genes(adata_int, flavor='seurat', n_top_genes=2000)\n    adata_int = adata_int[:, adata_int.var['highly_variable']]\n    \n    sc.pp.combat(adata_int, key='batch')\n    sc.pp.scale(adata_int, max_value=10)\n\n    sc.pp.pca(adata_int, n_comps=50)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_pca\"]\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXXXXXXXXXXXXXXXX.py",
                "accuracy": 0.7815155850219186,
                "model_info": "",
                "lineage": "135901"
            }
        ]
    },
    "1": {
        "0": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    pca = PCA(n_components=2)\n    adata_int.obsm[\"X_emb\"] = pca.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round11_Total_Variational_In.py",
                "accuracy": 0.5565675659924474,
                "model_info": "",
                "lineage": "batch_int_tusoml_round11_Total_Variational_In"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import QuantileTransformer\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    qt = QuantileTransformer(output_distribution='uniform')\n    adata_int.X = qt.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=2)\n    adata_int.obsm[\"X_emb\"] = pca.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round11_Total_Variational_InX.py",
                "accuracy": 0.45950977458453485,
                "model_info": "",
                "lineage": "111"
            }
        ],
        "2": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import QuantileTransformer\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    print(\"Initial data shape:\", adata_int.X.shape)\n    print(\"Batch information:\", adata_int.obs[\"batch\"].unique())\n\n    qt = QuantileTransformer(output_distribution='normal')\n    adata_int.X = qt.fit_transform(adata_int.X)\n    \n    print(\"Transformed data shape:\", adata_int.X.shape)\n    print(\"Feature ranges after transformation:\", adata_int.X.min(axis=0), adata_int.X.max(axis=0))\n\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    tsne = TSNE(n_components=2, perplexity=30)\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(pca_result)\n    \n    print(\"Embedding shape:\", adata_int.obsm[\"X_emb\"].shape)\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round11_Total_Variational_InXXX.py",
                "accuracy": 0.5117353415555033,
                "model_info": "Initial data shape: (5000, 29335)\nBatch information: ['D496', '621B', '637C', 'D503', 'A29', ..., 'A36', '582C', 'A35', 'A37', 'A52']\nLength: 12\nCategories (12, object): ['582C', '621B', '637C', '640C', ..., 'A37', 'A52', 'D496', 'D503']\nTransformed data shape: (5000, 29335)\nFeature ranges after transformation:   (0, 3787)\t-0.06660489\n  (0, 3864)\t-2.2071593\n  (0, 3866)\t-1.5088098\n  (0, 3880)\t-0.24491368\n  (0, 3881)\t-0.29936722\n  (0, 3987)\t-0.21817197\n  (0, 4051)\t-0.5151345\n  (0, 4192)\t-0.387888\n  (0, 5794)\t-0.4124658\n  (0, 5834)\t-0.38568458\n  (0, 5907)\t-0.39333257\n  (0, 5914)\t-0.1888139\n  (0, 6381)\t-0.29970708\n  (0, 6408)\t-0.38557756\n  (0, 6413)\t-0.28707907\n  (0, 6426)\t-0.44805992\n  (0, 6464)\t-0.16322543\n  (0, 6465)\t-1.2938024\n  (0, 6467)\t-0.07309023\n  (0, 6501)\t-0.2121591\n  (0, 6523)\t-0.14104971\n  (0, 6529)\t-0.016504286\n  (0, 6530)\t-0.23985666\n  (0, 6531)\t-0.27080992\n  (0, 6567)\t-0.52650034\n  :\t:\n  (0, 23763)\t-0.2794947\n  (0, 23768)\t-0.4284926\n  (0, 23773)\t-0.022057727\n  (0, 23776)\t-0.08231263\n  (0, 23820)\t-1.1191927\n  (0, 24039)\t-0.25277135\n  (0, 24077)\t-0.036715753\n  (0, 24136)\t-0.06228292\n  (0, 24137)\t-0.077398606\n  (0, 24152)\t-1.0715039\n  (0, 24173)\t-0.042082276\n  (0, 24179)\t-0.56740236\n  (0, 24187)\t-0.69055974\n  (0, 24192)\t-1.141603\n  (0, 24392)\t-0.9076581\n  (0, 24405)\t-1.0581933\n  (0, 24406)\t-0.3474806\n  (0, 24407)\t-1.22645\n  (0, 26339)\t-0.7188943\n  (0, 26775)\t-0.107070796\n  (0, 26787)\t-0.21124616\n  (0, 26933)\t-0.7121868\n  (0, 27277)\t-0.044619195\n  (0, 27623)\t-0.45843825\n  (0, 27763)\t-0.88369197   (0, 0)\t5.1993375\n  (0, 1)\t5.1993375\n  (0, 2)\t5.1993375\n  (0, 3)\t5.1993375\n  (0, 4)\t5.1993375\n  (0, 5)\t5.1993375\n  (0, 6)\t5.1993375\n  (0, 8)\t5.1993375\n  (0, 9)\t5.1993375\n  (0, 10)\t5.1993375\n  (0, 11)\t5.1993375\n  (0, 12)\t5.1993375\n  (0, 15)\t5.1993375\n  (0, 16)\t5.1993375\n  (0, 17)\t5.1993375\n  (0, 18)\t5.1993375\n  (0, 19)\t5.1993375\n  (0, 20)\t5.1993375\n  (0, 21)\t5.1993375\n  (0, 22)\t5.1993375\n  (0, 23)\t5.1993375\n  (0, 24)\t5.1993375\n  (0, 25)\t5.1993375\n  (0, 26)\t5.1993375\n ",
                "lineage": "211"
            }
        ],
        "3": [],
        "4": [
            {
                "code": "from sklearn.metrics import silhouette_score\nimport numpy as np\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    tsne = TSNE(n_components=2, init=\"random\")\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.X)\n\n    batch_labels = adata_int.obs[\"batch\"].values\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], batch_labels)\n    adata_int.uns[\"silhouette_score\"] = silhouette_avg\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyX.py",
                "accuracy": 0.5233947400973926,
                "model_info": "",
                "lineage": "411"
            }
        ],
        "6": [
            {
                "code": "import numpy as np\nimport logging\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch_labels = adata_int.obs[\"batch\"].values\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    umap_model = umap.UMAP()\n    adata_int.obsm[\"X_emb\"] = umap_model.fit_transform(pca_result)\n\n    logging.info(f'Batch integration model initialized with {adata_int.n_obs} observations and {adata_int.X.shape[1]} features.')\n    logging.info(f'Using device: {np.__version__}, numpy version: {np.__version__}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round10_scVI_single_cell_VarXX.py",
                "accuracy": 0.5120684880814405,
                "model_info": "",
                "lineage": "611"
            }
        ],
        "7": [
            {
                "code": "import numpy as np\nfrom sklearn.model_selection import KFold\nfrom sklearn.metrics import silhouette_score\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.cluster import KMeans\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batches = adata_int.obs[\"batch\"].values\n    unique_batches = np.unique(batches)\n    \n    embeddings = []\n    for batch in unique_batches:\n        batch_data = adata_int[adata_int.obs[\"batch\"] == batch].X\n        scaler = StandardScaler(with_mean=False)\n        batch_data_scaled = scaler.fit_transform(batch_data)\n        pca = PCA(n_components=50)\n        batch_embedding = pca.fit_transform(batch_data_scaled)\n        embeddings.append(batch_embedding)\n\n    adata_int.obsm[\"X_emb\"] = np.concatenate(embeddings, axis=0)\n\n    kf = KFold(n_splits=5)\n    scores = []\n    for train_index, test_index in kf.split(adata_int.obsm[\"X_emb\"]):\n        train_data, test_data = adata_int.obsm[\"X_emb\"][train_index], adata_int.obsm[\"X_emb\"][test_index]\n        kmeans = KMeans(n_clusters=len(np.unique(batches[train_index])), random_state=42)\n        kmeans.fit(train_data)\n        score = silhouette_score(train_data, kmeans.labels_)\n        scores.append(score)\n\n    mean_score = np.mean(scores)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.41911190271684723,
                "model_info": "",
                "lineage": "711"
            }
        ],
        "8": [],
        "9": [
            {
                "code": "import numpy as np\nfrom sklearn.metrics import silhouette_score\nimport scanpy as sc\nimport time\nimport resource\n\ndef tuso_model(adata):\n    start_time = time.time()\n    mem_usage_start = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.scale(adata_int)\n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    batch_labels = adata.obs[\"batch\"].values\n    unique_batches = np.unique(batch_labels)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    metrics = {}\n    for batch in unique_batches:\n        batch_mask = (batch_labels == batch)\n        if np.sum(batch_mask) > 1:\n            unique_labels = np.unique(batch_labels[batch_mask])\n            if len(unique_labels) > 1:\n                metrics[batch] = silhouette_score(adata_int.obsm[\"X_emb\"][batch_mask], batch_labels[batch_mask])\n\n    weighted_score = np.mean(list(metrics.values())) if metrics else 0\n    adata_int.uns[\"integration_score\"] = weighted_score\n\n    end_time = time.time()\n    mem_usage_end = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n    print(f\"Time taken: {end_time - start_time:.2f}s, Memory usage: {mem_usage_end - mem_usage_start} KB\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_XXX.py",
                "accuracy": 0.5627420290315447,
                "model_info": "Time taken: 41.78s, Memory usage: 2615832 KB",
                "lineage": "911"
            }
        ],
        "10": []
    },
    "2": {
        "0": [
            {
                "code": "def tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    adata_int.obsm[\"X_emb\"] = adata_int.X  # Placeholder for harmony.run_harmony\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_Harmony.py",
                "accuracy": 0.6886743344248721,
                "model_info": "",
                "lineage": "batch_int_tusoml_round12_Harmony"
            }
        ],
        "1": [],
        "2": [
            {
                "code": "import numpy as np\nfrom sklearn.model_selection import KFold\nfrom sklearn.metrics import silhouette_score\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batches = adata_int.obs[\"batch\"].values\n    unique_batches = np.unique(batches)\n    \n    embeddings = []\n    for batch in unique_batches:\n        batch_data = adata_int[adata_int.obs[\"batch\"] == batch].X\n        scaler = StandardScaler(with_mean=False)\n        batch_data_scaled = scaler.fit_transform(batch_data)\n        pca = PCA(n_components=50)\n        batch_embedding = pca.fit_transform(batch_data_scaled)\n        embeddings.append(batch_embedding)\n\n    adata_int.obsm[\"X_emb\"] = np.concatenate(embeddings, axis=0)\n\n    kf = KFold(n_splits=5)\n    scores = []\n    for train_index, test_index in kf.split(adata_int.obsm[\"X_emb\"]):\n        train_data, test_data = adata_int.obsm[\"X_emb\"][train_index], adata_int.obsm[\"X_emb\"][test_index]\n        score = silhouette_score(train_data, batches[train_index])\n        scores.append(score)\n\n    mean_score = np.mean(scores)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyX.py",
                "accuracy": 0.41911190271684723,
                "model_info": "",
                "lineage": "221"
            }
        ],
        "3": [
            {
                "code": "from sklearn.decomposition import PCA\nimport numpy as np\n\ndef combat_adjustment(X, batch):\n    unique_batches = np.unique(batch)\n    adjusted_X = X.copy()\n    for b in unique_batches:\n        batch_mask = (batch == b)\n        adjusted_X[batch_mask] = X[batch_mask] - np.mean(X[batch_mask], axis=0) + np.mean(X)\n    return adjusted_X\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    adata_int.X = combat_adjustment(adata_int.X, adata_int.obs[\"batch\"].values)\n    \n    pca = PCA(n_components=2)\n    adata_int.obsm[\"X_emb\"] = pca.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round19_scTransform_feature_X.py",
                "accuracy": 0.5013525754118573,
                "model_info": "",
                "lineage": "321"
            }
        ],
        "4": [
            {
                "code": "from sklearn.decomposition import PCA\nfrom sklearn.manifold import TSNE\nfrom sklearn.metrics import silhouette_score\nimport numpy as np\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    pca = PCA(n_components=50)\n    pca_embeddings = pca.fit_transform(adata_int.X)\n    \n    tsne = TSNE(n_components=2, random_state=42)\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(pca_embeddings)\n\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], adata_int.obs[\"batch\"])\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round19_scTransform_feature_X.py",
                "accuracy": 0.5118787074498935,
                "model_info": "",
                "lineage": "421"
            }
        ],
        "5": [],
        "6": [
            {
                "code": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nclass AttentionLayer(nn.Module):\n    def __init__(self, input_dim):\n        super(AttentionLayer, self).__init__()\n        self.query = nn.Linear(input_dim, input_dim)\n        self.key = nn.Linear(input_dim, input_dim)\n        self.value = nn.Linear(input_dim, input_dim)\n\n    def forward(self, x):\n        Q = self.query(x)\n        K = self.key(x)\n        V = self.value(x)\n        attention_scores = F.softmax(Q @ K.T / (K.size(-1) ** 0.5), dim=-1)\n        return attention_scores @ V\n\nclass TusoModel(nn.Module):\n    def __init__(self, input_dim):\n        super(TusoModel, self).__init__()\n        self.attention = AttentionLayer(input_dim)\n        self.fc = nn.Linear(input_dim, 2)\n\n    def forward(self, x):\n        x = self.attention(x)\n        x = self.fc(x)\n        return x\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    model = TusoModel(adata_int.X.shape[1])\n    model.eval()\n    with torch.no_grad():\n        adata_int.obsm[\"X_emb\"] = model(torch.tensor(adata_int.X.toarray()).float()).detach().numpy()\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyX.py",
                "accuracy": 0.37288417731685763,
                "model_info": "",
                "lineage": "621"
            }
        ],
        "7": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom scipy.optimize import minimize\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "721"
            }
        ],
        "8": [
            {
                "code": "import numpy as np\nimport pandas as pd\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch = adata_int.obs[\"batch\"].values\n    # Assuming combat is not available, we skip batch correction\n    # adata_int.X = combat.combat(adata_int.X, batch)\n\n    scaler = StandardScaler(with_mean=False)\n    X_scaled = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50)\n    X_emb = pca.fit_transform(X_scaled)\n\n    adata_int.obsm[\"X_emb\"] = X_emb\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.7534522379319218,
                "model_info": "",
                "lineage": "821"
            }
        ]
    },
    "4": {
        "0": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.pca(adata_int)\n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2).fit_transform(adata_int.obsm[\"X_pca\"])\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round14_BBKNN_Batch_Balanced.py",
                "accuracy": 0.5081557581444683,
                "model_info": "",
                "lineage": "batch_int_tusoml_round14_BBKNN_Batch_Balanced"
            }
        ],
        "1": [],
        "2": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.manifold import TSNE\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.pca(adata_int)\n    \n    input_dim = adata_int.obsm[\"X_pca\"].shape[1]\n    model = MLPRegressor(hidden_layer_sizes=(64, 32, 64), activation='relu', max_iter=100, batch_size=256)\n    \n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.obsm[\"X_pca\"])\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2).fit_transform(model.predict(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round14_BBKNN_Batch_BalancedX.py",
                "accuracy": 0.5131589360927505,
                "model_info": "",
                "lineage": "241"
            }
        ],
        "3": [
            {
                "code": "import numpy as np\nimport logging\nfrom sklearn.decomposition import PCA\nfrom sklearn.manifold import TSNE\nfrom sklearn.metrics import silhouette_score\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    pca = PCA(n_components=50)\n    pca_embeddings = pca.fit_transform(adata_int.X)\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2).fit_transform(pca_embeddings)\n    \n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], adata_int.obs[\"batch\"])\n    logging.info(f'Silhouette score of the embedding: {silhouette_avg:.4f}')\n    \n    logging.info(f'Batch integration model initialized with {adata_int.n_obs} observations and {adata_int.X.shape[1]} features.')\n    logging.info(f'Using device: {np.__version__}, numpy version: {np.__version__}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round10_scVI_single_cell_VarXX.py",
                "accuracy": 0.5083089903435919,
                "model_info": "",
                "lineage": "341"
            }
        ],
        "4": [
            {
                "code": "import numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.manifold import TSNE\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch_labels = adata.obs[\"batch\"].values\n    unique_batches = np.unique(batch_labels)\n\n    pca = PCA(n_components=50)\n    pca_embeddings = pca.fit_transform(adata_int.X)\n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(pca_embeddings)\n\n    metrics = {}\n    for batch in unique_batches:\n        batch_mask = (batch_labels == batch)\n        if np.sum(batch_mask) > 1:\n            unique_labels = np.unique(batch_labels[batch_mask])\n            if len(unique_labels) > 1:\n                metrics[batch] = silhouette_score(adata_int.obsm[\"X_emb\"][batch_mask], batch_labels[batch_mask])\n\n    weighted_score = np.mean(list(metrics.values())) if metrics else 0\n    adata_int.uns[\"integration_score\"] = weighted_score\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_XX.py",
                "accuracy": 0.48492240369225686,
                "model_info": "",
                "lineage": "441"
            }
        ],
        "5": [
            {
                "code": "import numpy as np\nimport logging\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    adata_int.obsm[\"X_emb\"] = np.empty((adata_int.n_obs, adata_int.X.shape[1]))\n    \n    logging.info(f'Batch integration model initialized with {adata_int.n_obs} observations and {adata_int.X.shape[1]} features.')\n    logging.info(f'Using device: {np.__version__}, numpy version: {np.__version__}')\n    \n    def log_performance_snapshot(epoch, loss, accuracy):\n        logging.info(f'Epoch: {epoch}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round10_scVI_single_cell_VarXX.py",
                "accuracy": 0.6367871896620461,
                "model_info": "",
                "lineage": "541"
            }
        ],
        "6": []
    },
    "5": {
        "0": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    tsne = TSNE(n_components=2, random_state=42, init='random')\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round15_Seurat_Integration.py",
                "accuracy": 0.5233947400973926,
                "model_info": "",
                "lineage": "batch_int_tusoml_round15_Seurat_Integration"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.calibration import calibration_curve\nfrom sklearn.preprocessing import LabelEncoder\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    tsne = TSNE(n_components=2, random_state=42, init='random')\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.X)\n\n    true_labels = adata_int.obs[\"batch\"]\n    label_encoder = LabelEncoder()\n    true_labels_encoded = label_encoder.fit_transform(true_labels)\n\n    probas = np.random.rand(len(true_labels_encoded))\n    fraction_of_positives, mean_predicted_value = calibration_curve((true_labels_encoded == 0).astype(int), probas, n_bins=10)\n\n    print(f\"Calibration Error Metrics: Fraction of Positives: {fraction_of_positives}, Mean Predicted Value: {mean_predicted_value}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round15_Seurat_IntegrationX.py",
                "accuracy": 0.5233947400973926,
                "model_info": "Calibration Error Metrics: Fraction of Positives: [0.05142857 0.03373016 0.03984064 0.01992032 0.04068522 0.04008016\n 0.02824859 0.03397028 0.02115385 0.03966597], Mean Predicted Value: [0.05041784 0.14934561 0.25054538 0.35201055 0.44832482 0.55063835\n 0.65025453 0.75022741 0.84883908 0.95148749]",
                "lineage": "151"
            }
        ],
        "2": [],
        "3": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.feature_selection import VarianceThreshold\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch_labels = adata.obs[\"batch\"].values\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    \n    variance_threshold = VarianceThreshold(threshold=0.5)\n    filtered_X = variance_threshold.fit_transform(adata_int.X)\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(filtered_X)\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(pca_result)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round18_JOINTLY_joint_clusteXX.py",
                "accuracy": 0.5224110649658958,
                "model_info": "",
                "lineage": "351"
            }
        ],
        "4": [],
        "5": [
            {
                "code": "import numpy as np\nfrom sklearn.model_selection import KFold\nfrom sklearn.metrics import silhouette_score\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.cluster import KMeans\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batches = adata_int.obs[\"batch\"].values\n    unique_batches = np.unique(batches)\n    \n    embeddings = []\n    for batch in unique_batches:\n        batch_data = adata_int[adata_int.obs[\"batch\"] == batch].X\n        scaler = StandardScaler(with_mean=False)\n        batch_data_scaled = scaler.fit_transform(batch_data)\n        pca = PCA(n_components=50)\n        batch_embedding = pca.fit_transform(batch_data_scaled)\n        embeddings.append(batch_embedding)\n\n    adata_int.obsm[\"X_emb\"] = np.concatenate(embeddings, axis=0)\n\n    kf = KFold(n_splits=5)\n    scores = []\n    for train_index, test_index in kf.split(adata_int.obsm[\"X_emb\"]):\n        train_data, test_data = adata_int.obsm[\"X_emb\"][train_index], adata_int.obsm[\"X_emb\"][test_index]\n        kmeans = KMeans(n_clusters=len(unique_batches))\n        kmeans.fit(train_data)\n        score = silhouette_score(train_data, kmeans.labels_)\n        scores.append(score)\n\n    mean_score = np.mean(scores)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.41911190271684723,
                "model_info": "",
                "lineage": "551"
            }
        ]
    },
    "6": {
        "0": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch_labels = adata.obs[\"batch\"].values\n    unique_batches = np.unique(batch_labels)\n\n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_.py",
                "accuracy": 0.5233947400973926,
                "model_info": "",
                "lineage": "batch_int_tusoml_round16_DESC_Deep_Embedding_"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch_labels = adata.obs[\"batch\"].values\n    unique_batches = np.unique(batch_labels)\n\n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(adata_int.X)\n\n    metrics = {}\n    for batch in unique_batches:\n        batch_mask = (batch_labels == batch)\n        if np.sum(batch_mask) > 1:\n            unique_labels = np.unique(batch_labels[batch_mask])\n            if len(unique_labels) > 1:\n                metrics[batch] = silhouette_score(adata_int.obsm[\"X_emb\"][batch_mask], batch_labels[batch_mask])\n\n    weighted_score = np.mean(list(metrics.values())) if metrics else 0\n    adata_int.uns[\"integration_score\"] = weighted_score\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_X.py",
                "accuracy": 0.5233947400973926,
                "model_info": "",
                "lineage": "161"
            }
        ],
        "2": [
            {
                "code": "from sklearn.decomposition import PCA\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.scale(adata_int)\n    pca = PCA(n_components=2)\n    adata_int.obsm[\"X_emb\"] = pca.fit_transform(adata_int.X)\n\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round19_scTransform_feature_X.py",
                "accuracy": 0.5179505672834854,
                "model_info": "",
                "lineage": "261"
            }
        ],
        "3": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.calibration import calibration_curve\nfrom sklearn.preprocessing import StandardScaler, LabelEncoder\nfrom sklearn.decomposition import PCA\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n\n    pca = PCA(n_components=50, random_state=42)\n    pca_result = pca.fit_transform(adata_int.X)\n\n    tsne = TSNE(n_components=2, random_state=42, init='random')\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(pca_result)\n\n    true_labels = adata_int.obs[\"batch\"]\n    label_encoder = LabelEncoder()\n    true_labels_encoded = label_encoder.fit_transform(true_labels)\n\n    probas = np.random.rand(len(true_labels_encoded))\n    fraction_of_positives, mean_predicted_value = calibration_curve((true_labels_encoded == 0).astype(int), probas, n_bins=10)\n\n    numeric_predictors = adata_int.X.toarray() if hasattr(adata_int.X, \"toarray\") else adata_int.X\n    skewness = skew(numeric_predictors, axis=0)\n    kurt = kurtosis(numeric_predictors, axis=0)\n\n    print(f\"Calibration Error Metrics: Fraction of Positives: {fraction_of_positives}, Mean Predicted Value: {mean_predicted_value}\")\n    print(f\"Skewness: {skewness}, Kurtosis: {kurt}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round15_Seurat_IntegrationXXX.py",
                "accuracy": 0.5253364897434168,
                "model_info": "Calibration Error Metrics: Fraction of Positives: [0.05142857 0.03373016 0.03984064 0.01992032 0.04068522 0.04008016\n 0.02824859 0.03397028 0.02115385 0.03966597], Mean Predicted Value: [0.05041784 0.14934561 0.25054538 0.35201055 0.44832482 0.55063835\n 0.65025453 0.75022741 0.84883908 0.95148749]\nSkewness: [ 2.1138082  6.2967834 22.106188  ...        nan        nan 62.236595 ], Kurtosis: [3.5270238e+00 4.1759003e+01 5.1504309e+02 ...           nan           nan\n 4.0535352e+03]",
                "lineage": "361"
            }
        ],
        "4": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nimport pandas as pd\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch_labels = adata.obs[\"batch\"].values\n    adata_int.X = adata_int.X - np.mean(adata_int.X, axis=0)  # Simple batch correction\n\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(pca_result)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round18_JOINTLY_joint_clusteXX.py",
                "accuracy": 0.5336661210455095,
                "model_info": "",
                "lineage": "461"
            }
        ],
        "5": [],
        "6": []
    },
    "7": {
        "0": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch_labels = adata.obs[\"batch\"].values\n    tsne = TSNE(init=\"random\")\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round17_scBridge_multi_omics.py",
                "accuracy": 0.5233947400973926,
                "model_info": "",
                "lineage": "batch_int_tusoml_round17_scBridge_multi_omics"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.preprocessing import RobustScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = RobustScaler(with_centering=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n\n    batch_labels = adata.obs[\"batch\"].values\n    tsne = TSNE(init=\"random\")\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round17_scBridge_multi_omicsX.py",
                "accuracy": 0.3807518754066457,
                "model_info": "",
                "lineage": "171"
            }
        ],
        "2": [
            {
                "code": "import numpy as np\nimport logging\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nimport umap\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch = adata_int.obs[\"batch\"].values\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    umap_model = umap.UMAP()\n    adata_int.obsm[\"X_emb\"] = umap_model.fit_transform(pca_result)\n    \n    logging.info(f'Batch integration model initialized with {adata_int.n_obs} observations and {adata_int.X.shape[1]} features.')\n    logging.info(f'Using device: {np.__version__}, numpy version: {np.__version__}')\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round10_scVI_single_cell_VarXX.py",
                "accuracy": 0.5101706880252492,
                "model_info": "",
                "lineage": "271"
            }
        ],
        "3": [],
        "4": [
            {
                "code": "import numpy as np\nfrom sklearn.model_selection import KFold\nfrom sklearn.metrics import silhouette_score\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batches = adata_int.obs[\"batch\"].values\n    unique_batches = np.unique(batches)\n    \n    embeddings = []\n    for batch in unique_batches:\n        batch_data = adata_int[adata_int.obs[\"batch\"] == batch].X\n        scaler = StandardScaler(with_mean=False)\n        batch_data_scaled = scaler.fit_transform(batch_data)\n        pca = PCA(n_components=50)\n        batch_embedding = pca.fit_transform(batch_data_scaled)\n        embeddings.append(batch_embedding)\n\n    adata_int.obsm[\"X_emb\"] = np.concatenate(embeddings, axis=0)\n\n    kf = KFold(n_splits=5)\n    scores = []\n    for train_index, test_index in kf.split(adata_int.obsm[\"X_emb\"]):\n        train_data, test_data = adata_int.obsm[\"X_emb\"][train_index], adata_int.obsm[\"X_emb\"][test_index]\n        score = silhouette_score(train_data, batches[train_index])\n        scores.append(score)\n\n    mean_score = np.mean(scores)\n    print(f\"Mean Silhouette Score: {mean_score:.4f}\")\n    print(f\"Input Feature Scaling: {np.mean([np.linalg.norm(scaler.scale_) for batch in unique_batches]):.4f}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round12_HarmonyXX.py",
                "accuracy": 0.41911190271684723,
                "model_info": "Mean Silhouette Score: -0.0434\nInput Feature Scaling: 105.3756",
                "lineage": "471"
            }
        ],
        "5": []
    },
    "8": {
        "0": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch_labels = adata.obs[\"batch\"].values\n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round18_JOINTLY_joint_cluste.py",
                "accuracy": 0.5233947400973926,
                "model_info": "",
                "lineage": "batch_int_tusoml_round18_JOINTLY_joint_cluste"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch_labels = adata.obs[\"batch\"].values\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(pca_result)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round18_JOINTLY_joint_clusteX.py",
                "accuracy": 0.5243656171378873,
                "model_info": "",
                "lineage": "181"
            }
        ],
        "2": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    scaler = RobustScaler(with_centering=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n\n    batch_labels = adata.obs[\"batch\"].values\n    pca = PCA(n_components=50)\n    batch_pca = pca.fit_transform(adata_int.X)\n    \n    tsne = TSNE(init=\"random\")\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(batch_pca)\n\n    silhouette_avg = silhouette_score(adata_int.obsm[\"X_emb\"], batch_labels)\n    adata_int.uns[\"silhouette_score\"] = silhouette_avg\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round17_scBridge_multi_omicsXX.py",
                "accuracy": 0.4330131206174825,
                "model_info": "",
                "lineage": "281"
            }
        ],
        "3": [],
        "4": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.manifold import TSNE\nfrom sklearn.neural_network import MLPRegressor\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    sc.pp.pca(adata_int)\n    \n    input_dim = adata_int.obsm[\"X_pca\"].shape[1]\n    model = make_pipeline(StandardScaler(), MLPRegressor(hidden_layer_sizes=(64, 32, 64), activation='relu', max_iter=100, batch_size=256))\n    \n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.obsm[\"X_pca\"])\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2).fit_transform(model.predict(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round14_BBKNN_Batch_BalancedXX.py",
                "accuracy": 0.512795715790354,
                "model_info": "",
                "lineage": "481"
            }
        ]
    },
    "9": {
        "0": [
            {
                "code": "from sklearn.decomposition import PCA\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    pca = PCA(n_components=2)\n    adata_int.obsm[\"X_emb\"] = pca.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round19_scTransform_feature_.py",
                "accuracy": 0.5565675659924474,
                "model_info": "",
                "lineage": "batch_int_tusoml_round19_scTransform_feature_"
            }
        ],
        "1": [],
        "2": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    batch_labels = adata.obs[\"batch\"].values\n    scaler = StandardScaler(with_mean=False)\n    adata_int.X = scaler.fit_transform(adata_int.X)\n    \n    pca = PCA(n_components=50)\n    pca_result = pca.fit_transform(adata_int.X)\n    \n    mlp = MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000)\n    mlp.fit(pca_result, pca_result)\n    pca_transformed = mlp.predict(pca_result)\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(pca_transformed)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round18_JOINTLY_joint_clusteXX.py",
                "accuracy": 0.5200685346738626,
                "model_info": "",
                "lineage": "291"
            }
        ]
    },
    "10": {
        "0": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    tsne = TSNE(n_components=2, random_state=42, init=\"random\")\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.X.toarray())\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round110_Graph_based_methods_.py",
                "accuracy": 0.5233947400973926,
                "model_info": "",
                "lineage": "batch_int_tusoml_round110_Graph_based_methods_"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.manifold import TSNE\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    scaler = RobustScaler()\n    adata_int.X = scaler.fit_transform(adata_int.X.toarray())\n    \n    tsne = TSNE(n_components=2, random_state=42, init=\"random\")\n    adata_int.obsm[\"X_emb\"] = tsne.fit_transform(adata_int.X)\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round110_Graph_based_methods_X.py",
                "accuracy": 0.3811635972691511,
                "model_info": "",
                "lineage": "1101"
            }
        ]
    },
    "3": {
        "2": [
            {
                "code": "import numpy as np\nfrom sklearn.manifold import TSNE\nfrom sklearn.metrics import silhouette_score\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch_labels = adata.obs[\"batch\"].values\n    unique_batches = np.unique(batch_labels)\n\n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2, init=\"random\").fit_transform(adata_int.X)\n\n    metrics = {}\n    for batch in unique_batches:\n        batch_mask = (batch_labels == batch)\n        if np.sum(batch_mask) > 1:\n            unique_labels = np.unique(batch_labels[batch_mask])\n            if len(unique_labels) > 1:\n                metrics[batch] = silhouette_score(adata_int.obsm[\"X_emb\"][batch_mask], batch_labels[batch_mask])\n\n    weighted_score = np.mean(list(metrics.values())) if metrics else 0\n    adata_int.uns[\"integration_score\"] = weighted_score\n    \n    print(f\"Integration Score: {weighted_score:.4f} | Batch Metrics: {metrics}\")\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_XX.py",
                "accuracy": 0.5233947400973926,
                "model_info": "Integration Score: 0.0000 | Batch Metrics: {}",
                "lineage": "231"
            }
        ],
        "3": [],
        "4": [],
        "5": [
            {
                "code": "import numpy as np\nimport scanpy as sc\nfrom sklearn.manifold import TSNE\nfrom sklearn.neural_network import MLPRegressor\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n    \n    # Assuming batch is a column in obs, if combat is not available, skip this step\n    if \"batch\" in adata_int.obs:\n        # Placeholder for combat adjustment\n        pass\n    \n    sc.pp.pca(adata_int)\n    \n    input_dim = adata_int.obsm[\"X_pca\"].shape[1]\n    model = MLPRegressor(hidden_layer_sizes=(64, 32, 64), activation='relu', max_iter=100, batch_size=256)\n    \n    model.fit(adata_int.obsm[\"X_pca\"], adata_int.obsm[\"X_pca\"])\n    \n    adata_int.obsm[\"X_emb\"] = TSNE(n_components=2).fit_transform(model.predict(adata_int.obsm[\"X_pca\"]))\n    \n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round14_BBKNN_Batch_BalancedXX.py",
                "accuracy": 0.5131589360927505,
                "model_info": "",
                "lineage": "531"
            }
        ],
        "6": [
            {
                "code": "import numpy as np\nfrom sklearn.metrics import silhouette_score\nimport scanpy as sc\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    batch_labels = adata.obs[\"batch\"].values\n    adata_int.X = adata_int.X - np.mean(adata_int.X, axis=0)\n\n    sc.pp.scale(adata_int)\n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n\n    metrics = {}\n    unique_batches = np.unique(batch_labels)\n    for batch in unique_batches:\n        batch_mask = (batch_labels == batch)\n        if np.sum(batch_mask) > 1:\n            unique_labels = np.unique(batch_labels[batch_mask])\n            if len(unique_labels) > 1:\n                metrics[batch] = silhouette_score(adata_int.obsm[\"X_emb\"][batch_mask], batch_labels[batch_mask])\n\n    weighted_score = np.mean(list(metrics.values())) if metrics else 0\n    adata_int.uns[\"integration_score\"] = weighted_score\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_XXX.py",
                "accuracy": 0.5540617289317177,
                "model_info": "",
                "lineage": "631"
            }
        ],
        "7": [
            {
                "code": "import numpy as np\nfrom sklearn.metrics import silhouette_score\nimport scanpy as sc\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.cluster import KMeans\n\ndef tuso_model(adata):\n    adata_int = adata.copy()\n    adata_int.X = adata_int.layers[\"normalized\"].copy()\n\n    sc.pp.scale(adata_int)\n    sc.pp.pca(adata_int, n_comps=50)\n    sc.pp.neighbors(adata_int)\n    sc.tl.umap(adata_int)\n\n    adata_int.obsm[\"X_emb\"] = adata_int.obsm[\"X_umap\"]\n    \n    distance_matrix = pairwise_distances(adata_int.obsm[\"X_emb\"])\n    kmeans = KMeans(n_clusters=len(np.unique(adata.obs[\"batch\"])), random_state=42)\n    kmeans.fit(distance_matrix)\n    cluster_labels = kmeans.labels_\n\n    metrics = {}\n    for batch in np.unique(adata.obs[\"batch\"]):\n        batch_mask = (adata.obs[\"batch\"].values == batch)\n        if np.sum(batch_mask) > 1:\n            unique_labels = np.unique(cluster_labels[batch_mask])\n            if len(unique_labels) > 1:\n                metrics[batch] = silhouette_score(adata_int.obsm[\"X_emb\"][batch_mask], cluster_labels[batch_mask])\n\n    weighted_score = np.mean(list(metrics.values())) if metrics else 0\n    adata_int.uns[\"integration_score\"] = weighted_score\n\n    return adata_int",
                "file": "/projects/users/tusoml/batch_int_tusoml_round16_DESC_Deep_Embedding_XXX.py",
                "accuracy": 0.5627420290315447,
                "model_info": "",
                "lineage": "731"
            }
        ]
    }
}