{
    "1": {
        "0": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.semi_supervised import SelfTrainingClassifier\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n\n    y_train = adata_train.obs[\"celltype_label\"].values\n    y_train = np.where(y_train == \"unknown\", -1, y_train)\n\n    model = SelfTrainingClassifier(RandomForestClassifier())\n    model.fit(X_train, y_train)\n\n    adata_test.obs[\"celltype_label\"] = model.predict(X_test)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round11_semi_supervised_lear.py",
                "accuracy": 0.5735,
                "model_info": "",
                "lineage": "label_proj_tusoml_round11_semi_supervised_lear"
            }
        ],
        "1": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.semi_supervised import SelfTrainingClassifier\nfrom sklearn.preprocessing import RobustScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n\n    scaler = RobustScaler()\n    X_train = scaler.fit_transform(X_train)\n    X_test = scaler.transform(X_test)\n\n    y_train = adata_train.obs[\"celltype_label\"].values\n    y_train = np.where(y_train == \"unknown\", -1, y_train)\n\n    model = SelfTrainingClassifier(RandomForestClassifier())\n    model.fit(X_train, y_train)\n\n    adata_test.obs[\"celltype_label\"] = model.predict(X_test)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round11_semi_supervised_learX.py",
                "accuracy": 0.5735,
                "model_info": "",
                "lineage": "111"
            }
        ],
        "2": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.semi_supervised import SelfTrainingClassifier\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n\n    y_train = adata_train.obs[\"celltype_label\"].values\n    y_train = np.where(y_train == \"unknown\", -1, y_train)\n\n    model = SelfTrainingClassifier(make_pipeline(StandardScaler(), RandomForestClassifier()))\n    model.fit(X_train, y_train)\n\n    adata_test.obs[\"celltype_label\"] = model.predict(X_test)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round11_semi_supervised_learX.py",
                "accuracy": 0.5735,
                "model_info": "",
                "lineage": "211"
            }
        ],
        "3": [],
        "4": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.utils import resample\nfrom sklearn.feature_selection import SelectFromModel\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test = adata_test.layers[\"normalized\"].toarray()\n\n    le = LabelEncoder()\n    y_train = le.fit_transform(adata_train.obs[\"celltype_label\"])\n\n    model = RandomForestClassifier()\n    model.fit(X_train, y_train)\n\n    selector = SelectFromModel(model, prefit=True, threshold='mean')\n    X_train_selected = selector.transform(X_train)\n    X_test_selected = selector.transform(X_test)\n\n    unique_labels = np.unique(y_train)\n    augmented_X_train = []\n    augmented_y_train = []\n\n    for label in unique_labels:\n        X_label = X_train_selected[y_train == label]\n        X_resampled = resample(X_label, replace=True, n_samples=X_label.shape[0] * 2, random_state=42)\n        augmented_X_train.append(X_resampled)\n        augmented_y_train.extend([label] * X_resampled.shape[0])\n\n    X_train_augmented = np.vstack(augmented_X_train)\n    y_train_augmented = np.array(augmented_y_train)\n\n    model = RandomForestClassifier()\n    model.fit(X_train_augmented, y_train_augmented)\n\n    y_pred = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = le.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round17_deep_transfer_learniXX.py",
                "accuracy": 0.6142,
                "model_info": "",
                "lineage": "411"
            }
        ],
        "5": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.utils import resample\nfrom sklearn.feature_selection import SelectFromModel\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test = adata_test.layers[\"normalized\"].toarray()\n\n    le = LabelEncoder()\n    y_train = le.fit_transform(adata_train.obs[\"celltype_label\"])\n\n    pca = PCA(n_components=0.95)\n    X_train_transformed = pca.fit_transform(X_train)\n    X_test_transformed = pca.transform(X_test)\n\n    model = RandomForestClassifier()\n    model.fit(X_train_transformed, y_train)\n\n    selector = SelectFromModel(model, prefit=True, threshold='mean')\n    X_train_selected = selector.transform(X_train_transformed)\n    X_test_selected = selector.transform(X_test_transformed)\n\n    unique_labels = np.unique(y_train)\n    augmented_X_train = []\n    augmented_y_train = []\n\n    for label in unique_labels:\n        X_label = X_train_selected[y_train == label]\n        X_resampled = resample(X_label, replace=True, n_samples=X_label.shape[0] * 2, random_state=42)\n        augmented_X_train.append(X_resampled)\n        augmented_y_train.extend([label] * X_resampled.shape[0])\n\n    X_train_augmented = np.vstack(augmented_X_train)\n    y_train_augmented = np.array(augmented_y_train)\n\n    model = RandomForestClassifier()\n    model.fit(X_train_augmented, y_train_augmented)\n\n    y_pred = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = le.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round17_deep_transfer_learniXXX.py",
                "accuracy": 0.6245,
                "model_info": "",
                "lineage": "511"
            }
        ],
        "6": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.utils import resample\nfrom sklearn.feature_selection import SelectFromModel\nimport numpy as np\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.model_selection import cross_val_score\nimport logging\n\ndef tuso_model(adata_train, adata_test):\n    logging.basicConfig(level=logging.INFO)\n    logging.info(f'Dataset statistics: Train samples: {adata_train.shape[0]}, Test samples: {adata_test.shape[0]}, Features: {adata_train.shape[1]}')\n    \n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test = adata_test.layers[\"normalized\"].toarray()\n\n    le = LabelEncoder()\n    y_train = le.fit_transform(adata_train.obs[\"celltype_label\"])\n\n    pca = PCA(n_components=0.95)\n    X_train_transformed = pca.fit_transform(X_train)\n    X_test_transformed = pca.transform(X_test)\n\n    model = RandomForestClassifier()\n    model.fit(X_train_transformed, y_train)\n\n    selector = SelectFromModel(model, prefit=True, threshold='mean')\n    X_train_selected = selector.transform(X_train_transformed)\n    X_test_selected = selector.transform(X_test_transformed)\n\n    unique_labels = np.unique(y_train)\n    augmented_X_train = []\n    augmented_y_train = []\n\n    for label in unique_labels:\n        X_label = X_train_selected[y_train == label]\n        X_resampled = resample(X_label, replace=True, n_samples=X_label.shape[0] * 2, random_state=42)\n        augmented_X_train.append(X_resampled)\n        augmented_y_train.extend([label] * X_resampled.shape[0])\n\n    X_train_augmented = np.vstack(augmented_X_train)\n    y_train_augmented = np.array(augmented_y_train)\n\n    model = RandomForestClassifier()\n    cv_scores = cross_val_score(model, X_train_augmented, y_train_augmented, cv=5)\n    logging.info(f'Cross-validation scores: {cv_scores}, Mean: {np.mean(cv_scores)}, Std: {np.std(cv_scores)}')\n\n    model.fit(X_train_augmented, y_train_augmented)\n\n    y_pred = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = le.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round17_deep_transfer_learniXXXXX.py",
                "accuracy": 0.6192,
                "model_info": "",
                "lineage": "611"
            }
        ]
    },
    "4": {
        "0": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_.py",
                "accuracy": 0.8027,
                "model_info": "",
                "lineage": "label_proj_tusoml_round14_multi_task_learning_"
            }
        ],
        "1": []
    },
    "6": {
        "0": [
            {
                "code": "import numpy as np\nimport anndata as ad\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.neural_network import MLPClassifier\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    label_encoder = LabelEncoder()\n    y_train = label_encoder.fit_transform(adata_train.obs[\"celltype_label\"])\n    \n    model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500)\n    model.fit(X_train, y_train)\n    \n    y_pred = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = label_encoder.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round16_ZINB_model_based_aut.py",
                "accuracy": 0.7601,
                "model_info": "",
                "lineage": "label_proj_tusoml_round16_ZINB_model_based_aut"
            }
        ],
        "1": [
            {
                "code": "import numpy as np\nimport anndata as ad\nfrom sklearn.preprocessing import LabelEncoder, StandardScaler\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    scaler = StandardScaler()\n    X_train = scaler.fit_transform(X_train)\n    X_test = scaler.transform(X_test)\n    \n    pca = PCA(n_components=0.95)\n    X_train = pca.fit_transform(X_train)\n    X_test = pca.transform(X_test)\n    \n    label_encoder = LabelEncoder()\n    y_train = label_encoder.fit_transform(adata_train.obs[\"celltype_label\"])\n    \n    model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500)\n    model.fit(X_train, y_train)\n    \n    y_pred = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = label_encoder.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round16_ZINB_model_based_autX.py",
                "accuracy": 0.574,
                "model_info": "",
                "lineage": "161"
            }
        ]
    },
    "7": {
        "0": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.preprocessing import LabelEncoder\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test = adata_test.layers[\"normalized\"].toarray()\n\n    le = LabelEncoder()\n    y_train = le.fit_transform(adata_train.obs[\"celltype_label\"])\n\n    model = RandomForestClassifier()\n    model.fit(X_train, y_train)\n\n    y_pred = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = le.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round17_deep_transfer_learni.py",
                "accuracy": 0.5735,
                "model_info": "",
                "lineage": "label_proj_tusoml_round17_deep_transfer_learni"
            }
        ],
        "1": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test = adata_test.layers[\"normalized\"].toarray()\n\n    le = LabelEncoder()\n    y_train = le.fit_transform(adata_train.obs[\"celltype_label\"])\n\n    unique_labels = np.unique(y_train)\n    augmented_X_train = []\n    augmented_y_train = []\n\n    for label in unique_labels:\n        X_label = X_train[y_train == label]\n        X_resampled = resample(X_label, replace=True, n_samples=X_label.shape[0] * 2, random_state=42)\n        augmented_X_train.append(X_resampled)\n        augmented_y_train.extend([label] * X_resampled.shape[0])\n\n    X_train_augmented = np.vstack(augmented_X_train)\n    y_train_augmented = np.array(augmented_y_train)\n\n    model = RandomForestClassifier()\n    model.fit(X_train_augmented, y_train_augmented)\n\n    y_pred = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = le.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round17_deep_transfer_learniX.py",
                "accuracy": 0.5761,
                "model_info": "",
                "lineage": "171"
            }
        ]
    },
    "0": {
        "2": [
            {
                "code": "import numpy as np\nimport anndata as ad\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    label_encoder = LabelEncoder()\n    y_train = label_encoder.fit_transform(adata_train.obs[\"celltype_label\"])\n    \n    pca = PCA(n_components=0.95)\n    model = make_pipeline(StandardScaler(), pca, MLPClassifier(hidden_layer_sizes=(100,), max_iter=500))\n    model.fit(X_train, y_train)\n    \n    y_pred = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = label_encoder.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round16_ZINB_model_based_autX.py",
                "accuracy": 0.574,
                "model_info": "",
                "lineage": "201"
            }
        ],
        "3": [
            {
                "code": "import numpy as np\nimport anndata as ad\nfrom sklearn.preprocessing import LabelEncoder, StandardScaler\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.decomposition import PCA\nfrom sklearn.model_selection import train_test_split\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    label_encoder = LabelEncoder()\n    y_train = label_encoder.fit_transform(adata_train.obs[\"celltype_label\"])\n    \n    scaler = StandardScaler()\n    X_train = scaler.fit_transform(X_train)\n    X_test = scaler.transform(X_test)\n    \n    pca = PCA(n_components=0.95)\n    X_train = pca.fit_transform(X_train)\n    X_test = pca.transform(X_test)\n\n    model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000, alpha=0.01, activation='relu')\n    model.fit(X_train, y_train)\n    \n    print(f\"Training data shape: {X_train.shape}, Data type: {X_train.dtype}\")\n    print(f\"Test data shape: {X_test.shape}, Data type: {X_test.dtype}\")\n    \n    y_pred = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = label_encoder.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round16_ZINB_model_based_autXX.py",
                "accuracy": 0.5837,
                "model_info": "Training data shape: (3885, 3336), Data type: float32\nTest data shape: (1115, 3336), Data type: float32",
                "lineage": "301"
            }
        ],
        "4": [],
        "5": [],
        "6": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectFromModel\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    clf = LogisticRegression(max_iter=1000)\n    clf.fit(X_train, y_train)\n    \n    selector = SelectFromModel(clf, threshold=\"mean\", prefit=True)\n    X_train_selected = selector.transform(X_train)\n    X_test_selected = selector.transform(X_test)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_train_selected, y_train)\n\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.7999,
                "model_info": "",
                "lineage": "601"
            }
        ],
        "7": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    noise = np.random.normal(0, 0.1, X_train.shape)\n    X_train_noisy = X_train + noise\n\n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_train_noisy, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.794,
                "model_info": "",
                "lineage": "701"
            }
        ],
        "8": [
            {
                "code": "from sklearn.feature_selection import SelectFromModel\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    selector = SelectFromModel(LogisticRegression(max_iter=1000))\n    selector.fit(X_train, y_train.ravel())\n    X_train_selected = selector.transform(X_train)\n    X_test_selected = selector.transform(X_test)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_train_selected, y_train)\n\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.7999,
                "model_info": "",
                "lineage": "801"
            }
        ],
        "9": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n\n    unique_labels = np.unique(y_train)\n    augmented_X_train = []\n    augmented_y_train = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        y_class = y_train[y_train == label]\n        X_resampled, y_resampled = resample(X_class, y_class, n_samples=len(X_class) * 2, replace=True)\n        augmented_X_train.append(X_resampled)\n        augmented_y_train.append(y_resampled)\n\n    X_train_augmented = np.vstack(augmented_X_train)\n    y_train_augmented = np.hstack(augmented_y_train)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_train_augmented, y_train_augmented)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.7935,
                "model_info": "",
                "lineage": "901"
            }
        ],
        "10": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100))\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.5735,
                "model_info": "",
                "lineage": "1001"
            }
        ],
        "11": [],
        "12": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom sklearn.metrics import accuracy_score\n\nclass GNNModel(nn.Module):\n    def __init__(self, input_dim, output_dim):\n        super(GNNModel, self).__init__()\n        self.fc1 = nn.Linear(input_dim, 128)\n        self.fc2 = nn.Linear(128, output_dim)\n\n    def forward(self, x):\n        x = torch.relu(self.fc1(x))\n        x = self.fc2(x)\n        return x\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].cat.codes.values\n\n    scaler = StandardScaler()\n    X_train = scaler.fit_transform(X_train)\n    X_test = scaler.transform(X_test)\n\n    input_dim = X_train.shape[1]\n    output_dim = len(np.unique(y_train))\n\n    model = GNNModel(input_dim, output_dim)\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    X_train_tensor = torch.FloatTensor(X_train)\n    y_train_tensor = torch.LongTensor(y_train)\n\n    for epoch in range(100):\n        model.train()\n        optimizer.zero_grad()\n        outputs = model(X_train_tensor)\n        loss = criterion(outputs, y_train_tensor)\n        loss.backward()\n        optimizer.step()\n\n    model.eval()\n    with torch.no_grad():\n        X_test_tensor = torch.FloatTensor(X_test)\n        predictions = model(X_test_tensor)\n        predicted_classes = torch.argmax(predictions, dim=1).numpy()\n\n    adata_test.obs[\"celltype_label\"] = predicted_classes\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.0,
                "model_info": "",
                "lineage": "1201"
            }
        ],
        "13": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.cluster import DBSCAN\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n\n    clustering = DBSCAN(eps=0.5, min_samples=5).fit(X_train_scaled)\n    adata_train.obs['cluster'] = clustering.labels_\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_train_scaled, y_train)\n\n    predictions = model.predict(X_test_scaled)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.7495,
                "model_info": "",
                "lineage": "1301"
            }
        ],
        "14": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.decomposition import PCA\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n\n    pca = PCA(n_components=0.95)\n    X_train_pca = pca.fit_transform(X_train_scaled)\n    X_test_pca = pca.transform(X_test_scaled)\n\n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_train_pca, y_train)\n\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.7511,
                "model_info": "",
                "lineage": "1401"
            }
        ],
        "15": [],
        "16": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.8151,
                "model_info": "",
                "lineage": "1601"
            }
        ],
        "17": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.cluster import KMeans\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.extend(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.array(y_resampled)\n\n    kmeans = KMeans(n_clusters=len(unique_labels), random_state=42)\n    cluster_labels = kmeans.fit_predict(X_resampled)\n    \n    model = RandomForestClassifier(n_estimators=100, random_state=42)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "1701"
            }
        ],
        "18": [],
        "19": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport umap\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    reducer = umap.UMAP()\n    X_resampled_umap = reducer.fit_transform(X_resampled)\n    X_test_umap = reducer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled_umap, y_resampled)\n\n    predictions = model.predict(X_test_umap)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.1574,
                "model_info": "",
                "lineage": "1901"
            }
        ],
        "20": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n    \n    pca = PCA(n_components=0.95)\n    X_train_pca = pca.fit_transform(X_train_scaled)\n    X_test_pca = pca.transform(X_test_scaled)\n\n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_pca[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.7262,
                "model_info": "",
                "lineage": "2001"
            }
        ],
        "21": [],
        "22": [],
        "23": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import QuantileTransformer\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    quantile_transformer = QuantileTransformer()\n    X_resampled = quantile_transformer.fit_transform(X_resampled)\n    X_test = quantile_transformer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.7759,
                "model_info": "",
                "lineage": "2301"
            }
        ],
        "24": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.7628,
                "model_info": "",
                "lineage": "2401"
            }
        ],
        "25": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    print(f\"Class balance before resampling: {dict(zip(unique_labels, counts))}\")\n\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n    \n    unique_labels_resampled, counts_resampled = np.unique(y_resampled, return_counts=True)\n    print(f\"Class balance after resampling: {dict(zip(unique_labels_resampled, counts_resampled))}\")\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8151,
                "model_info": "Class balance before resampling: {'CD16-negative, CD56-bright natural killer cell, human': 90, 'CD16-positive, CD56-dim natural killer cell, human': 265, 'CD4-positive helper T cell': 191, 'CD8-positive, alpha-beta memory T cell': 343, 'CD8-positive, alpha-beta memory T cell, CD45RO-positive': 139, 'T follicular helper cell': 171, 'alpha-beta T cell': 61, 'alveolar macrophage': 234, 'animal cell': 53, 'classical monocyte': 287, 'conventional dendritic cell': 15, 'dendritic cell, human': 5, 'effector memory CD4-positive, alpha-beta T cell': 252, 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated': 181, 'erythroid lineage cell': 6, 'gamma-delta T cell': 135, 'germinal center B cell': 7, 'group 3 innate lymphoid cell': 12, 'lymphocyte': 40, 'macrophage': 63, 'mast cell': 47, 'megakaryocyte': 4, 'memory B cell': 293, 'mucosal invariant T cell': 51, 'naive B cell': 125, 'naive thymus-derived CD4-positive, alpha-beta T cell': 424, 'naive thymus-derived CD8-positive, alpha-beta T cell': 75, 'non-classical monocyte': 35, 'plasma cell': 81, 'plasmablast': 29, 'plasmacytoid dendritic cell': 14, 'precursor B cell': 1, 'pro-B cell': 1, 'progenitor cell': 23, 'regulatory T cell': 132}\nClass balance after resampling: {'CD16-negative, CD56-bright natural killer cell, human': 424, 'CD16-positive, CD56-dim natural killer cell, human': 424, 'CD4-positive helper T cell': 424, 'CD8-positive, alpha-beta memory T cell': 424, 'CD8-positive, alpha-beta memory T cell, CD45RO-positive': 424, 'T follicular helper cell': 424, 'alpha-beta T cell': 424, 'alveolar macrophage': 424, 'animal cell': 424, 'classical monocyte': 424, 'conventional dendritic cell': 424, 'dendritic cell, human': 424, 'effector memory CD4-positive, alpha-beta T cell': 424, 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated': 424, 'erythroid lineage cell': 424, 'gamma-delta T cell': 424, 'germinal center B cell': 424, 'group 3 innate lymphoid cell': 424, 'lymphocyte': 424, ",
                "lineage": "2501"
            }
        ],
        "26": [],
        "27": [],
        "28": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    le = LabelEncoder()\n    y_train_encoded = le.fit_transform(y_train)\n\n    unique_labels, counts = np.unique(y_train_encoded, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train_encoded == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pca = PCA(n_components=50)\n    X_resampled_pca = pca.fit_transform(X_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled_pca, y_resampled)\n\n    X_test_pca = pca.transform(X_test)\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = le.inverse_transform(predictions)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.6874,
                "model_info": "",
                "lineage": "2801"
            }
        ],
        "29": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.metrics import accuracy_score\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8151,
                "model_info": "",
                "lineage": "2901"
            }
        ],
        "30": [],
        "31": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport umap\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    umap_model = umap.UMAP(n_neighbors=15, min_dist=0.1, metric='euclidean')\n    X_resampled_umap = umap_model.fit_transform(X_resampled)\n    X_test_umap = umap_model.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled_umap, y_resampled)\n\n    predictions = model.predict(X_test_umap)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.1065,
                "model_info": "",
                "lineage": "3101"
            }
        ],
        "32": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n    \n    pca = PCA(n_components=0.95)\n    X_train_pca = pca.fit_transform(X_train_scaled)\n    X_test_pca = pca.transform(X_test_scaled)\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_pca[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.7262,
                "model_info": "",
                "lineage": "3201"
            }
        ],
        "33": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n    \n    print(f\"Trainable vs Frozen Parameters Ratio: {np.sum(np.array(model.coef_) != 0)}/{model.coef_.size}\")\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8151,
                "model_info": "Trainable vs Frozen Parameters Ratio: 829010/1026725",
                "lineage": "3301"
            }
        ],
        "34": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8151,
                "model_info": "",
                "lineage": "3401"
            }
        ],
        "35": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectFromModel\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    selector = SelectFromModel(model, threshold='mean', prefit=True)\n    X_train_selected = selector.transform(X_resampled)\n    X_test_selected = selector.transform(X_test)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    multi_model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    multi_model.fit(X_train_selected, y_resampled)\n\n    predictions = multi_model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8098,
                "model_info": "",
                "lineage": "3501"
            }
        ],
        "36": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = make_pipeline(StandardScaler(), RandomForestClassifier())\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.6749,
                "model_info": "",
                "lineage": "3601"
            }
        ],
        "37": [],
        "38": [],
        "39": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    max_count = counts.max()\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if len(X_class) < max_count:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=max_count, \n                                                             replace=True)\n        else:\n            X_class_resampled, y_class_resampled = X_class, np.full(X_class.shape[0], label)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8148,
                "model_info": "",
                "lineage": "3901"
            }
        ],
        "40": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.7628,
                "model_info": "",
                "lineage": "4001"
            }
        ],
        "41": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.feature_selection import SelectFromModel\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    selector = SelectFromModel(LogisticRegression(max_iter=1000))\n    selector.fit(X_resampled, y_resampled)\n    X_selected = selector.transform(X_resampled)\n    X_test_selected = selector.transform(X_test)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000))\n    model.fit(X_selected, y_resampled)\n\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8098,
                "model_info": "",
                "lineage": "4101"
            }
        ],
        "42": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n\n    pca = PCA(n_components=0.95)\n    X_train_pca = pca.fit_transform(X_train_scaled)\n    X_test_pca = pca.transform(X_test_scaled)\n\n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_pca[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.7262,
                "model_info": "",
                "lineage": "4201"
            }
        ],
        "43": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.1)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XX.py",
                "accuracy": 0.8167,
                "model_info": "",
                "lineage": "4301"
            }
        ],
        "44": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    pca = PCA(n_components=50)\n    X_resampled = pca.fit_transform(X_resampled)\n    X_test = pca.transform(X_test)\n\n    y_resampled = np.expand_dims(y_resampled, axis=1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.1))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXX.py",
                "accuracy": 0.599,
                "model_info": "",
                "lineage": "4401"
            }
        ],
        "45": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.1))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXX.py",
                "accuracy": 0.8167,
                "model_info": "",
                "lineage": "4501"
            }
        ],
        "46": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n    \n    pca = PCA(n_components=0.95)\n    X_train_pca = pca.fit_transform(X_train_scaled)\n    X_test_pca = pca.transform(X_test_scaled)\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_pca[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.1)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXX.py",
                "accuracy": 0.7539,
                "model_info": "",
                "lineage": "4601"
            }
        ],
        "47": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "4701"
            }
        ],
        "48": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "4801"
            }
        ],
        "49": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if counts.max() > counts.min():\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "4901"
            }
        ],
        "50": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "5001"
            }
        ],
        "51": [],
        "52": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = Pipeline([\n        ('scaler', StandardScaler()),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "5201"
            }
        ],
        "53": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = RandomForestClassifier(n_estimators=100, random_state=42)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "5301"
            }
        ],
        "54": [],
        "55": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.metrics import accuracy_score\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    validation_accuracy = accuracy_score(adata_test.obs[\"celltype_label\"], predictions)\n    print(f\"Validation Accuracy: {validation_accuracy:.4f}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Validation Accuracy: 1.0000",
                "lineage": "5501"
            }
        ],
        "56": [],
        "57": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    low_confidence_threshold = 0.5\n    prediction_probabilities = model.predict_proba(X_test)\n    low_confidence_predictions = np.sum(np.max(prediction_probabilities, axis=1) < low_confidence_threshold) / len(predictions) * 100\n    print(f\"Low-confidence predictions: {low_confidence_predictions:.2f}%\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Low-confidence predictions: 12.56%",
                "lineage": "5701"
            }
        ],
        "58": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(f\"Unique labels: {len(unique_labels)}, Resampled data shape: {X_resampled.shape}, Model parameters: {model.get_params()}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Unique labels: 35, Resampled data shape: (14840, 29335), Model parameters: {'C': 0.05, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 1000, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",
                "lineage": "5801"
            }
        ],
        "59": [],
        "60": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.utils.class_weight import compute_class_weight\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)\n    class_weight_dict = dict(zip(np.unique(y_train), class_weights))\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight=class_weight_dict)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "6001"
            }
        ],
        "61": [],
        "62": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils.class_weight import compute_class_weight\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)\n    class_weight_dict = dict(zip(np.unique(y_train), class_weights))\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight=class_weight_dict)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "6201"
            }
        ],
        "63": [],
        "64": [],
        "65": [],
        "66": [],
        "67": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(\"Assumptions: Linearity, Homoscedasticity, No multicollinearity\")\n    print(\"Expectations: Input features should be scaled or normalized\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Assumptions: Linearity, Homoscedasticity, No multicollinearity\nExpectations: Input features should be scaled or normalized",
                "lineage": "6701"
            }
        ],
        "68": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.1, penalty='l2')\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8167,
                "model_info": "",
                "lineage": "6801"
            }
        ],
        "69": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7863,
                "model_info": "",
                "lineage": "6901"
            }
        ],
        "70": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = make_pipeline(RandomForestClassifier())\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6749,
                "model_info": "",
                "lineage": "7001"
            }
        ],
        "71": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import QuantileTransformer\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    qt = QuantileTransformer()\n    X_resampled = qt.fit_transform(X_resampled)\n    X_test = qt.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "7101"
            }
        ],
        "72": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    model = MultiOutputClassifier(RandomForestClassifier())\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6749,
                "model_info": "",
                "lineage": "7201"
            }
        ],
        "73": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.utils import resample\nfrom sklearn.metrics import confusion_matrix\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline([\n        ('scaler', StandardScaler()),\n        ('pca', PCA(n_components=50)),\n        ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced'))\n    ])\n    \n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    conf_matrix = confusion_matrix(adata_test.obs[\"celltype_label\"], predictions)\n    print(\"Confusion Matrix:\\n\", conf_matrix)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXXX.py",
                "accuracy": 0.5713,
                "model_info": "Confusion Matrix:\n [[ 27   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0  49   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0  35   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0  59   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0  35   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0 119   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0  27   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0  22   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   5   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0  32   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0  23   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0  25   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0  59   0   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   5   0   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  10   0   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0\n    0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   ",
                "lineage": "7301"
            }
        ],
        "74": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import QuantileTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = QuantileTransformer()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "7401"
            }
        ],
        "75": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import QuantileTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    qt = QuantileTransformer(output_distribution='uniform')\n    X_resampled = qt.fit_transform(X_resampled)\n    X_test = qt.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "7501"
            }
        ],
        "76": [],
        "77": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.cluster import KMeans\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n\n    kmeans = KMeans(n_clusters=len(unique_labels), random_state=42)\n    kmeans.fit(X_resampled)\n    cluster_labels = kmeans.labels_\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.2918,
                "model_info": "",
                "lineage": "7701"
            }
        ],
        "78": [],
        "79": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import RobustScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = RobustScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7316,
                "model_info": "",
                "lineage": "7901"
            }
        ],
        "80": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "8001"
            }
        ],
        "81": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    pca = PCA(n_components=50)\n    X_resampled = pca.fit_transform(X_resampled)\n    X_test = pca.transform(X_test)\n\n    y_resampled = np.expand_dims(y_resampled, axis=1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6019,
                "model_info": "",
                "lineage": "8101"
            }
        ],
        "82": [],
        "83": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    max_count = counts.max()\n    X_resampled, y_resampled = [], []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        n_samples = max_count if counts[counts > 0].size > 0 else X_class.shape[0]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=n_samples, \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "8301"
            }
        ],
        "84": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(f\"Training samples: {len(y_train)}, Unique labels: {len(unique_labels)}, Resampled shape: {X_resampled.shape}\")\n    \n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Training samples: 3885, Unique labels: 35, Resampled shape: (14840, 29335)",
                "lineage": "8401"
            }
        ],
        "85": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import FunctionTransformer\n\ndef log_transform(X):\n    return np.log1p(X)\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    X_resampled = log_transform(X_resampled)\n    X_test = log_transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8085,
                "model_info": "",
                "lineage": "8501"
            }
        ],
        "86": [],
        "87": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n\n    model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, max_depth=None))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6749,
                "model_info": "",
                "lineage": "8701"
            }
        ],
        "88": [],
        "89": [],
        "90": [],
        "91": [],
        "92": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectKBest, f_classif\nfrom sklearn.cluster import KMeans\nfrom sklearn.pipeline import Pipeline\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    kmeans = KMeans(n_clusters=len(unique_labels), random_state=42)\n    cluster_labels = kmeans.fit_predict(X_resampled)\n\n    feature_selector = SelectKBest(score_func=f_classif, k=100)\n    X_selected = feature_selector.fit_transform(X_resampled, cluster_labels)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_selected, y_resampled)\n\n    X_test_selected = feature_selector.transform(X_test)\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.0772,
                "model_info": "",
                "lineage": "9201"
            }
        ],
        "93": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(f\"Training complete: unique labels {unique_labels}, counts {counts}\")\n    \n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Training complete: unique labels ['CD16-negative, CD56-bright natural killer cell, human'\n 'CD16-positive, CD56-dim natural killer cell, human'\n 'CD4-positive helper T cell' 'CD8-positive, alpha-beta memory T cell'\n 'CD8-positive, alpha-beta memory T cell, CD45RO-positive'\n 'T follicular helper cell' 'alpha-beta T cell' 'alveolar macrophage'\n 'animal cell' 'classical monocyte' 'conventional dendritic cell'\n 'dendritic cell, human' 'effector memory CD4-positive, alpha-beta T cell'\n 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated'\n 'erythroid lineage cell' 'gamma-delta T cell' 'germinal center B cell'\n 'group 3 innate lymphoid cell' 'lymphocyte' 'macrophage' 'mast cell'\n 'megakaryocyte' 'memory B cell' 'mucosal invariant T cell' 'naive B cell'\n 'naive thymus-derived CD4-positive, alpha-beta T cell'\n 'naive thymus-derived CD8-positive, alpha-beta T cell'\n 'non-classical monocyte' 'plasma cell' 'plasmablast'\n 'plasmacytoid dendritic cell' 'precursor B cell' 'pro-B cell'\n 'progenitor cell' 'regulatory T cell'], counts [ 90 265 191 343 139 171  61 234  53 287  15   5 252 181   6 135   7  12\n  40  63  47   4 293  51 125 424  75  35  81  29  14   1   1  23 132]",
                "lineage": "9301"
            }
        ],
        "94": [],
        "95": [],
        "96": [],
        "97": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import QuantileTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    transformer = QuantileTransformer()\n    X_resampled = transformer.fit_transform(X_resampled)\n    X_test = transformer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "9701"
            }
        ],
        "98": [],
        "99": [],
        "100": [],
        "101": [],
        "102": [],
        "103": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7863,
                "model_info": "",
                "lineage": "10301"
            }
        ],
        "104": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import VarianceThreshold\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    selector = VarianceThreshold(threshold=0.01)\n    X_resampled = selector.fit_transform(X_resampled)\n    X_test = selector.transform(X_test)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8222,
                "model_info": "",
                "lineage": "10401"
            }
        ],
        "105": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.utils.class_weight import compute_class_weight\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)\n    class_weight_dict = dict(zip(np.unique(y_train), class_weights))\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight=class_weight_dict)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "10501"
            }
        ],
        "106": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.pipeline import Pipeline\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    transformer = FunctionTransformer(np.log1p, validate=True)\n    model = Pipeline(steps=[\n        ('transform', transformer),\n        ('classifier', MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05)))\n    ])\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8085,
                "model_info": "",
                "lineage": "10601"
            }
        ],
        "107": [],
        "108": [],
        "109": [],
        "110": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import VarianceThreshold\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    selector = VarianceThreshold(threshold=0.1)\n    X_resampled = selector.fit_transform(X_resampled)\n    X_test = selector.transform(X_test)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    \n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8188,
                "model_info": "",
                "lineage": "11001"
            }
        ],
        "111": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7863,
                "model_info": "",
                "lineage": "11101"
            }
        ],
        "112": [],
        "113": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    skewness = skew(X_train, axis=0)\n    kurt = kurtosis(X_train, axis=0)\n    print(\"Skewness:\", skewness)\n    print(\"Kurtosis:\", kurt)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Skewness: [ 2.0688245  6.2624288 19.469856  ...        nan        nan 54.857197 ]\nKurtosis: [   3.2316732   41.57276    399.12875   ...          nan          nan\n 3148.755    ]",
                "lineage": "11301"
            }
        ],
        "114": [],
        "115": [],
        "116": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.cluster import KMeans\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n    \n    kmeans = KMeans(n_clusters=len(np.unique(y_train)), random_state=42)\n    kmeans.fit(X_train_scaled)\n    cluster_labels = kmeans.predict(X_train_scaled)\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_scaled[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_scaled)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.754,
                "model_info": "",
                "lineage": "11601"
            }
        ],
        "117": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport logging\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    if np.any(np.isnan(X_resampled)) or np.any(np.isinf(X_resampled)):\n        nan_count = np.sum(np.isnan(X_resampled))\n        inf_count = np.sum(np.isinf(X_resampled))\n        logging.info(f\"Counts of NaN: {nan_count}, Counts of Inf: {inf_count}\")\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "11701"
            }
        ],
        "118": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pca = PCA(n_components=50)\n    X_resampled = pca.fit_transform(X_resampled)\n    X_test = pca.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7001,
                "model_info": "",
                "lineage": "11801"
            }
        ],
        "119": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "11901"
            }
        ],
        "120": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight='balanced')\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "12001"
            }
        ],
        "121": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.metrics import accuracy_score\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    val_accuracy = accuracy_score(adata_test.obs[\"celltype_label\"], predictions)\n    print(f'Validation Accuracy: {val_accuracy:.4f}')\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Validation Accuracy: 1.0000",
                "lineage": "12101"
            }
        ],
        "122": [],
        "123": [],
        "124": [],
        "125": [],
        "126": [],
        "127": [],
        "128": [],
        "129": [],
        "130": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import QuantileTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    transformer = QuantileTransformer(output_distribution='uniform')\n    X_resampled = transformer.fit_transform(X_resampled)\n    X_test = transformer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "13001"
            }
        ],
        "131": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=42))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "13101"
            }
        ],
        "132": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    print(\"Class balance before resampling:\", dict(zip(unique_labels, counts)))\n\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    unique_labels_resampled, counts_resampled = np.unique(y_resampled, return_counts=True)\n    print(\"Class balance after resampling:\", dict(zip(unique_labels_resampled, counts_resampled)))\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Class balance before resampling: {'CD16-negative, CD56-bright natural killer cell, human': 90, 'CD16-positive, CD56-dim natural killer cell, human': 265, 'CD4-positive helper T cell': 191, 'CD8-positive, alpha-beta memory T cell': 343, 'CD8-positive, alpha-beta memory T cell, CD45RO-positive': 139, 'T follicular helper cell': 171, 'alpha-beta T cell': 61, 'alveolar macrophage': 234, 'animal cell': 53, 'classical monocyte': 287, 'conventional dendritic cell': 15, 'dendritic cell, human': 5, 'effector memory CD4-positive, alpha-beta T cell': 252, 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated': 181, 'erythroid lineage cell': 6, 'gamma-delta T cell': 135, 'germinal center B cell': 7, 'group 3 innate lymphoid cell': 12, 'lymphocyte': 40, 'macrophage': 63, 'mast cell': 47, 'megakaryocyte': 4, 'memory B cell': 293, 'mucosal invariant T cell': 51, 'naive B cell': 125, 'naive thymus-derived CD4-positive, alpha-beta T cell': 424, 'naive thymus-derived CD8-positive, alpha-beta T cell': 75, 'non-classical monocyte': 35, 'plasma cell': 81, 'plasmablast': 29, 'plasmacytoid dendritic cell': 14, 'precursor B cell': 1, 'pro-B cell': 1, 'progenitor cell': 23, 'regulatory T cell': 132}\nClass balance after resampling: {'CD16-negative, CD56-bright natural killer cell, human': 424, 'CD16-positive, CD56-dim natural killer cell, human': 424, 'CD4-positive helper T cell': 424, 'CD8-positive, alpha-beta memory T cell': 424, 'CD8-positive, alpha-beta memory T cell, CD45RO-positive': 424, 'T follicular helper cell': 424, 'alpha-beta T cell': 424, 'alveolar macrophage': 424, 'animal cell': 424, 'classical monocyte': 424, 'conventional dendritic cell': 424, 'dendritic cell, human': 424, 'effector memory CD4-positive, alpha-beta T cell': 424, 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated': 424, 'erythroid lineage cell': 424, 'gamma-delta T cell': 424, 'germinal center B cell': 424, 'group 3 innate lymphoid cell': 424, 'lymphocyte': 424, ",
                "lineage": "13201"
            }
        ],
        "133": [],
        "134": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    model = make_pipeline(StandardScaler(), RandomForestClassifier(n_estimators=100))\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.5735,
                "model_info": "",
                "lineage": "13401"
            }
        ],
        "135": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import make_pipeline\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = make_pipeline(StandardScaler(), PCA(n_components=50), LogisticRegression(max_iter=1000, C=0.05))\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6019,
                "model_info": "",
                "lineage": "13501"
            }
        ],
        "136": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import FunctionTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    log_transformer = FunctionTransformer(np.log1p, validate=True)\n    X_resampled = log_transformer.fit_transform(X_resampled)\n    X_test = log_transformer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8085,
                "model_info": "",
                "lineage": "13601"
            }
        ],
        "137": [],
        "138": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "13801"
            }
        ],
        "139": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.metrics import classification_report\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n\n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_scaled[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_scaled)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    report = classification_report(y_train, model.predict(X_train_scaled), output_dict=True)\n    print(\"Class distribution in training data:\", counts)\n    print(\"Classification report:\", report)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.754,
                "model_info": "Class distribution in training data: [ 90 265 191 343 139 171  61 234  53 287  15   5 252 181   6 135   7  12\n  40  63  47   4 293  51 125 424  75  35  81  29  14   1   1  23 132]\nClassification report: {'CD16-negative, CD56-bright natural killer cell, human': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 90.0}, 'CD16-positive, CD56-dim natural killer cell, human': {'precision': 0.9962264150943396, 'recall': 0.9962264150943396, 'f1-score': 0.9962264150943396, 'support': 265.0}, 'CD4-positive helper T cell': {'precision': 0.9788359788359788, 'recall': 0.9685863874345549, 'f1-score': 0.9736842105263158, 'support': 191.0}, 'CD8-positive, alpha-beta memory T cell': {'precision': 0.9912280701754386, 'recall': 0.9883381924198251, 'f1-score': 0.9897810218978103, 'support': 343.0}, 'CD8-positive, alpha-beta memory T cell, CD45RO-positive': {'precision': 0.9928057553956835, 'recall': 0.9928057553956835, 'f1-score': 0.9928057553956835, 'support': 139.0}, 'T follicular helper cell': {'precision': 0.9770114942528736, 'recall': 0.9941520467836257, 'f1-score': 0.9855072463768116, 'support': 171.0}, 'alpha-beta T cell': {'precision': 0.9242424242424242, 'recall': 1.0, 'f1-score': 0.9606299212598425, 'support': 61.0}, 'alveolar macrophage': {'precision': 1.0, 'recall': 0.9914529914529915, 'f1-score': 0.9957081545064378, 'support': 234.0}, 'animal cell': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 53.0}, 'classical monocyte': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 287.0}, 'conventional dendritic cell': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 15.0}, 'dendritic cell, human': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 5.0}, 'effector memory CD4-positive, alpha-beta T cell': {'precision': 0.9836065573770492, 'recall': 0.9523809523809523, 'f1-score': 0.967741935483871, 'support': 252.0}, 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated': {'precision': 0.98895027624",
                "lineage": "13901"
            }
        ],
        "140": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.pipeline import Pipeline\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    model = Pipeline([\n        ('log_transform', FunctionTransformer(np.log1p, validate=True)),\n        ('classifier', MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05)))\n    ])\n    \n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8085,
                "model_info": "",
                "lineage": "14001"
            }
        ],
        "141": [],
        "142": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.datasets import make_classification\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if len(X_class) > 0:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "14201"
            }
        ],
        "143": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import RobustScaler\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = RobustScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7316,
                "model_info": "",
                "lineage": "14301"
            }
        ],
        "144": [],
        "145": [],
        "146": [],
        "147": [],
        "148": [],
        "149": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport pandas as pd\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n    \n    shap_values = pd.DataFrame(model.predict_proba(X_test), columns=model.classes_)\n    shap_summary = shap_values.describe()\n\n    print(\"SHAP Summary Statistics:\\n\", shap_summary)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "SHAP Summary Statistics:\n        CD16-negative, CD56-bright natural killer cell, human  ...  regulatory T cell\ncount                                       1.115000e+03      ...       1.115000e+03\nmean                                        3.555200e-02      ...       4.975831e-02\nstd                                         1.516780e-01      ...       1.697027e-01\nmin                                         3.416571e-08      ...       2.383014e-07\n25%                                         1.425403e-04      ...       4.060139e-04\n50%                                         4.946723e-04      ...       1.878753e-03\n75%                                         2.228157e-03      ...       1.288365e-02\nmax                                         9.856624e-01      ...       9.982722e-01\n\n[8 rows x 35 columns]",
                "lineage": "14901"
            }
        ],
        "150": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.semi_supervised import SelfTrainingClassifier\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = np.where(np.isin(y_resampled, unique_labels), y_resampled, -1)\n\n    base_model = LogisticRegression(max_iter=1000, C=0.05)\n    self_training_model = SelfTrainingClassifier(base_model)\n    self_training_model.fit(X_resampled, y_resampled.astype(object))\n\n    predictions = self_training_model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.astype(str)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "15001"
            }
        ],
        "151": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train = scaler.fit_transform(X_train)\n    X_test = scaler.transform(X_test)\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.754,
                "model_info": "",
                "lineage": "15101"
            }
        ],
        "152": [],
        "153": [],
        "154": [],
        "155": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.metrics import f1_score\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight='balanced')\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    f1 = f1_score(adata_test.obs[\"celltype_label\"], predictions, average='weighted')\n    \n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "15501"
            }
        ],
        "156": [],
        "157": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n    \n    misclassified_indices = np.where(predictions != adata_test.obs[\"celltype_label\"].values)[0]\n    misclassified_probs = model.predict_proba(X_test)[misclassified_indices]\n    misclassified_examples = [(adata_test.obs.index[i], predictions[i], misclassified_probs[j]) for j, i in enumerate(misclassified_indices)]\n\n    for example in misclassified_examples:\n        print(f\"Cell: {example[0]}, Predicted: {example[1]}, Probabilities: {example[2]}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "15701"
            }
        ],
        "158": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom scipy.stats import zscore\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    z_scores = zscore(X_test, axis=0)\n    outliers = np.where(np.abs(z_scores) > 3)\n    print(f\"Detected outliers: {len(outliers[0])}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Detected outliers: 365039",
                "lineage": "15801"
            }
        ],
        "159": [],
        "160": [],
        "161": [],
        "162": [],
        "163": [],
        "164": [],
        "165": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectFromModel\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    feature_selector = SelectFromModel(RandomForestClassifier(n_estimators=100))\n    X_resampled_selected = feature_selector.fit_transform(X_resampled, y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled_selected, y_resampled)\n\n    X_test_selected = feature_selector.transform(X_test)\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8238,
                "model_info": "",
                "lineage": "16501"
            }
        ],
        "166": [],
        "167": [],
        "168": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    model = RandomForestClassifier(class_weight='balanced', n_estimators=100, max_depth=None, random_state=42)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.5835,
                "model_info": "",
                "lineage": "16801"
            }
        ],
        "169": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "16901"
            }
        ],
        "170": [],
        "171": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport logging\n\ndef tuso_model(adata_train, adata_test):\n    logging.info(\"Input feature scaling: Normalized data assumed.\")\n    \n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "17101"
            }
        ],
        "172": [],
        "173": [],
        "174": [],
        "175": [],
        "176": [],
        "177": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "17701"
            }
        ],
        "178": [],
        "179": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.metrics import accuracy_score\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n    \n    validation_accuracy = accuracy_score(adata_test.obs[\"celltype_label\"], predictions)\n    print(f\"Validation Accuracy: {validation_accuracy:.4f}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Validation Accuracy: 1.0000",
                "lineage": "17901"
            }
        ],
        "180": [],
        "181": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    print(\"Gradient norms:\", np.linalg.norm(model.coef_))\n    \n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Gradient norms: 5.160147437517697",
                "lineage": "18101"
            }
        ],
        "182": [],
        "183": [],
        "184": [],
        "185": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.metrics import accuracy_score\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n    \n    low_confidence_threshold = 0.7\n    probabilities = model.predict_proba(X_test)\n    low_confidence_predictions = np.sum(np.max(probabilities, axis=1) < low_confidence_threshold)\n    low_confidence_percentage = low_confidence_predictions / len(predictions) * 100\n    print(f\"Low-confidence predictions: {low_confidence_percentage:.2f}%\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Low-confidence predictions: 27.98%",
                "lineage": "18501"
            }
        ],
        "186": [],
        "187": [],
        "188": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.1, penalty='l2')\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8167,
                "model_info": "",
                "lineage": "18801"
            }
        ],
        "189": [],
        "190": [],
        "191": [],
        "192": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    print(\"Dataset statistics: Unique labels:\", unique_labels, \"Counts:\", counts)\n\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Dataset statistics: Unique labels: ['CD16-negative, CD56-bright natural killer cell, human'\n 'CD16-positive, CD56-dim natural killer cell, human'\n 'CD4-positive helper T cell' 'CD8-positive, alpha-beta memory T cell'\n 'CD8-positive, alpha-beta memory T cell, CD45RO-positive'\n 'T follicular helper cell' 'alpha-beta T cell' 'alveolar macrophage'\n 'animal cell' 'classical monocyte' 'conventional dendritic cell'\n 'dendritic cell, human' 'effector memory CD4-positive, alpha-beta T cell'\n 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated'\n 'erythroid lineage cell' 'gamma-delta T cell' 'germinal center B cell'\n 'group 3 innate lymphoid cell' 'lymphocyte' 'macrophage' 'mast cell'\n 'megakaryocyte' 'memory B cell' 'mucosal invariant T cell' 'naive B cell'\n 'naive thymus-derived CD4-positive, alpha-beta T cell'\n 'naive thymus-derived CD8-positive, alpha-beta T cell'\n 'non-classical monocyte' 'plasma cell' 'plasmablast'\n 'plasmacytoid dendritic cell' 'precursor B cell' 'pro-B cell'\n 'progenitor cell' 'regulatory T cell'] Counts: [ 90 265 191 343 139 171  61 234  53 287  15   5 252 181   6 135   7  12\n  40  63  47   4 293  51 125 424  75  35  81  29  14   1   1  23 132]",
                "lineage": "19201"
            }
        ],
        "193": [],
        "194": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05, solver='lbfgs', multi_class='multinomial')\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "19401"
            }
        ],
        "195": [],
        "196": [],
        "197": [],
        "198": [],
        "199": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if len(X_class) > 0:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    if X_resampled:\n        X_resampled = np.vstack(X_resampled)\n        y_resampled = np.hstack(y_resampled)\n\n        model = LogisticRegression(max_iter=1000, C=0.05)\n        model.fit(X_resampled, y_resampled)\n\n        predictions = model.predict(X_test)\n        adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "19901"
            }
        ],
        "200": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.feature_selection import VarianceThreshold\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import FunctionTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    feature_selector = VarianceThreshold(threshold=0.1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n\n    pipeline = Pipeline(steps=[\n        ('log_transform', FunctionTransformer(np.log1p, validate=True)),\n        ('feature_selection', feature_selector),\n        ('classifier', model)\n    ])\n\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.796,
                "model_info": "",
                "lineage": "20001"
            }
        ],
        "201": [],
        "202": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n    \n    pca = PCA(n_components=0.95)\n    X_train_pca = pca.fit_transform(X_train_scaled)\n    X_test_pca = pca.transform(X_test_scaled)\n\n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_pca[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7526,
                "model_info": "",
                "lineage": "20201"
            }
        ],
        "203": [],
        "204": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import class_weight\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    class_weights = dict(zip(np.unique(y_train), class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)))\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight=class_weights)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "20401"
            }
        ],
        "205": [],
        "206": [],
        "207": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(f\"Model parameters: {model.coef_}\")\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Model parameters: [[ 2.01545397e-04 -2.23286757e-03  3.05350003e-03 ...  0.00000000e+00\n   0.00000000e+00 -1.19969990e-05]\n [-3.51830547e-03  2.17469620e-03 -2.17737168e-03 ...  0.00000000e+00\n   0.00000000e+00 -6.60845862e-06]\n [ 8.60945356e-03 -5.03266831e-04 -1.11017294e-03 ...  0.00000000e+00\n   0.00000000e+00 -1.88195970e-04]\n ...\n [ 9.68856522e-03 -2.00956374e-04 -2.43521539e-05 ...  0.00000000e+00\n   0.00000000e+00 -2.97919972e-06]\n [ 9.80186399e-04 -4.53047116e-04 -5.11296666e-05 ...  0.00000000e+00\n   0.00000000e+00 -1.00082406e-05]\n [ 1.61046224e-03 -5.75045945e-03 -4.59500453e-04 ...  0.00000000e+00\n   0.00000000e+00 -8.38356336e-06]]",
                "lineage": "20701"
            }
        ],
        "208": [],
        "209": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline([\n        ('scaler', StandardScaler()),\n        ('model', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "20901"
            }
        ],
        "210": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import QuantileTransformer\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    quantile_transformer = QuantileTransformer()\n    X_resampled = quantile_transformer.fit_transform(X_resampled)\n    X_test = quantile_transformer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "21001"
            }
        ],
        "211": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    max_count = counts.max()\n    \n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if X_class.shape[0] > 0:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=max_count, \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "21101"
            }
        ],
        "212": [],
        "213": [],
        "214": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import LabelEncoder\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    label_encoder = LabelEncoder()\n    y_train_encoded = label_encoder.fit_transform(y_train)\n\n    unique_labels, counts = np.unique(y_train_encoded, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train_encoded == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05, penalty='l2')\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = label_encoder.inverse_transform(predictions)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "21401"
            }
        ],
        "215": [],
        "216": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.metrics import confusion_matrix\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    conf_matrix = confusion_matrix(adata_test.obs[\"celltype_label\"], predictions)\n    print(\"Confusion Matrix:\\n\", conf_matrix)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Confusion Matrix:\n [[ 42   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0  60   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0  20   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0  30   0   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0  46   0   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0  66   0   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0  30   0   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0  25   0   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0  31   0   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0  78   0   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0  26   0   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0  35   0   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   6   0   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   2   0   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   7   0   0\n    0   0   0   0   0   0   0   0   0   0]\n [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0\n    0   0   0   0   0   0   0   0  ",
                "lineage": "21601"
            }
        ],
        "217": [],
        "218": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    print(f\"Dataset statistics: Unique labels - {unique_labels}, Counts - {counts}\")\n    \n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Dataset statistics: Unique labels - ['CD16-negative, CD56-bright natural killer cell, human'\n 'CD16-positive, CD56-dim natural killer cell, human'\n 'CD4-positive helper T cell' 'CD8-positive, alpha-beta memory T cell'\n 'CD8-positive, alpha-beta memory T cell, CD45RO-positive'\n 'T follicular helper cell' 'alpha-beta T cell' 'alveolar macrophage'\n 'animal cell' 'classical monocyte' 'conventional dendritic cell'\n 'dendritic cell, human' 'effector memory CD4-positive, alpha-beta T cell'\n 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated'\n 'erythroid lineage cell' 'gamma-delta T cell' 'germinal center B cell'\n 'group 3 innate lymphoid cell' 'lymphocyte' 'macrophage' 'mast cell'\n 'megakaryocyte' 'memory B cell' 'mucosal invariant T cell' 'naive B cell'\n 'naive thymus-derived CD4-positive, alpha-beta T cell'\n 'naive thymus-derived CD8-positive, alpha-beta T cell'\n 'non-classical monocyte' 'plasma cell' 'plasmablast'\n 'plasmacytoid dendritic cell' 'precursor B cell' 'pro-B cell'\n 'progenitor cell' 'regulatory T cell'], Counts - [ 90 265 191 343 139 171  61 234  53 287  15   5 252 181   6 135   7  12\n  40  63  47   4 293  51 125 424  75  35  81  29  14   1   1  23 132]",
                "lineage": "21801"
            }
        ],
        "219": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import FunctionTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = make_pipeline(FunctionTransformer(lambda x: x), LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "21901"
            }
        ],
        "220": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nimport numpy as np\nimport logging\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    trainable_params = model.coef_.size + model.intercept_.size\n    logging.info(f\"Trainable Parameters: {trainable_params}\")\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "22001"
            }
        ],
        "221": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    pca = PCA(n_components=50)\n    X_resampled = pca.fit_transform(X_resampled)\n    X_test = pca.transform(X_test)\n\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    y_resampled = y_resampled.reshape(-1, 1)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6019,
                "model_info": "",
                "lineage": "22101"
            }
        ],
        "222": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if len(X_class) > 0:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "22201"
            }
        ],
        "223": [],
        "224": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if X_class.shape[0] > 0:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    if X_resampled:\n        X_resampled = np.vstack(X_resampled)\n        y_resampled = np.hstack(y_resampled)\n\n        model = LogisticRegression(max_iter=1000, C=0.05)\n        model.fit(X_resampled, y_resampled)\n\n        predictions = model.predict(X_test)\n        adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "22401"
            }
        ],
        "225": [],
        "226": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline([\n        ('scaler', StandardScaler()),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    \n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "22601"
            }
        ],
        "227": [],
        "228": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "22801"
            }
        ],
        "229": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.metrics import f1_score\nfrom sklearn.model_selection import train_test_split\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight='balanced')\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    f1 = f1_score(adata_test.obs[\"celltype_label\"], predictions, average='weighted')\n    if f1 < 0.5:\n        model.set_params(C=0.1)\n        model.fit(X_resampled, y_resampled)\n        predictions = model.predict(X_test)\n        adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "22901"
            }
        ],
        "230": [],
        "231": [],
        "232": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.feature_selection import VarianceThreshold\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef log_transform(X):\n    return np.log1p(X)\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = np.expand_dims(y_resampled, axis=1)\n\n    model = Pipeline(steps=[\n        ('log_transform', FunctionTransformer(log_transform)),\n        ('feature_selection', VarianceThreshold(threshold=0.1)),\n        ('classifier', MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05)))\n    ])\n    \n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.796,
                "model_info": "",
                "lineage": "23201"
            }
        ],
        "233": [],
        "234": [],
        "235": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n    \n    print(f\"Model hyperparameters: max_iter={model.max_iter}, C={model.C}\")\n    print(f\"Training labels distribution: {dict(zip(unique_labels, counts))}\")\n    \n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Model hyperparameters: max_iter=1000, C=0.05\nTraining labels distribution: {'CD16-negative, CD56-bright natural killer cell, human': 90, 'CD16-positive, CD56-dim natural killer cell, human': 265, 'CD4-positive helper T cell': 191, 'CD8-positive, alpha-beta memory T cell': 343, 'CD8-positive, alpha-beta memory T cell, CD45RO-positive': 139, 'T follicular helper cell': 171, 'alpha-beta T cell': 61, 'alveolar macrophage': 234, 'animal cell': 53, 'classical monocyte': 287, 'conventional dendritic cell': 15, 'dendritic cell, human': 5, 'effector memory CD4-positive, alpha-beta T cell': 252, 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated': 181, 'erythroid lineage cell': 6, 'gamma-delta T cell': 135, 'germinal center B cell': 7, 'group 3 innate lymphoid cell': 12, 'lymphocyte': 40, 'macrophage': 63, 'mast cell': 47, 'megakaryocyte': 4, 'memory B cell': 293, 'mucosal invariant T cell': 51, 'naive B cell': 125, 'naive thymus-derived CD4-positive, alpha-beta T cell': 424, 'naive thymus-derived CD8-positive, alpha-beta T cell': 75, 'non-classical monocyte': 35, 'plasma cell': 81, 'plasmablast': 29, 'plasmacytoid dendritic cell': 14, 'precursor B cell': 1, 'pro-B cell': 1, 'progenitor cell': 23, 'regulatory T cell': 132}",
                "lineage": "23501"
            }
        ],
        "236": [],
        "237": [],
        "238": [],
        "239": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import QuantileTransformer\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = QuantileTransformer()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "23901"
            }
        ],
        "240": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom scipy.stats import skew, kurtosis\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    skewness = skew(X_train, axis=0)\n    kurt = kurtosis(X_train, axis=0)\n    for i, label in enumerate(unique_labels):\n        print(f\"Label: {label}, Skewness: {skewness[i]}, Kurtosis: {kurt[i]}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Label: CD16-negative, CD56-bright natural killer cell, human, Skewness: 2.068824529647827, Kurtosis: 3.231673240661621\nLabel: CD16-positive, CD56-dim natural killer cell, human, Skewness: 6.2624287605285645, Kurtosis: 41.57276153564453\nLabel: CD4-positive helper T cell, Skewness: 19.46985626220703, Kurtosis: 399.1287536621094\nLabel: CD8-positive, alpha-beta memory T cell, Skewness: 6.082261562347412, Kurtosis: 41.11890411376953\nLabel: CD8-positive, alpha-beta memory T cell, CD45RO-positive, Skewness: 5.572328567504883, Kurtosis: 32.21586608886719\nLabel: T follicular helper cell, Skewness: 62.30521774291992, Kurtosis: 3879.959716796875\nLabel: alpha-beta T cell, Skewness: 46.5509033203125, Kurtosis: 2229.202392578125\nLabel: alveolar macrophage, Skewness: nan, Kurtosis: nan\nLabel: animal cell, Skewness: 15.749039649963379, Kurtosis: 293.3151550292969\nLabel: classical monocyte, Skewness: 31.753942489624023, Kurtosis: 1018.6270141601562\nLabel: conventional dendritic cell, Skewness: 4.20035982131958, Kurtosis: 18.23314094543457\nLabel: dendritic cell, human, Skewness: 6.1721415519714355, Kurtosis: 39.144588470458984\nLabel: effector memory CD4-positive, alpha-beta T cell, Skewness: 62.303836822509766, Kurtosis: 3879.844970703125\nLabel: effector memory CD8-positive, alpha-beta T cell, terminally differentiated, Skewness: nan, Kurtosis: nan\nLabel: erythroid lineage cell, Skewness: nan, Kurtosis: nan\nLabel: gamma-delta T cell, Skewness: 44.04881286621094, Kurtosis: 1938.40478515625\nLabel: germinal center B cell, Skewness: 1.858576774597168, Kurtosis: 2.432321548461914\nLabel: group 3 innate lymphoid cell, Skewness: 3.2819268703460693, Kurtosis: 10.81342601776123\nLabel: lymphocyte, Skewness: 0.748845636844635, Kurtosis: -0.6977105140686035\nLabel: macrophage, Skewness: 5.026778697967529, Kurtosis: 26.062211990356445\nLabel: mast cell, Skewness: 2.267198324203491, Kurtosis: 4.161759376525879\nLabel: megakaryocyte, Skewness: 42.047428131103516, Kurtosis: 1811.1431884765625\nLabel: mem",
                "lineage": "24001"
            }
        ],
        "241": [],
        "242": [],
        "243": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectFromModel\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.extend([label] * X_class_resampled.shape[0])\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.array(y_resampled)\n\n    feature_selector = SelectFromModel(RandomForestClassifier(n_estimators=100))\n    model = Pipeline([\n        ('feature_selection', feature_selector),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    \n    model.fit(X_resampled, y_resampled)\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8238,
                "model_info": "",
                "lineage": "24301"
            }
        ],
        "244": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.feature_selection import VarianceThreshold\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n\n    model = Pipeline(steps=[\n        ('log_transform', FunctionTransformer(np.log1p, validate=True)),\n        ('feature_selection', VarianceThreshold(threshold=0.1)),\n        ('classifier', MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05)))\n    ])\n    \n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.796,
                "model_info": "",
                "lineage": "24401"
            }
        ],
        "245": [],
        "246": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import class_weight\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)\n    class_weight_dict = dict(zip(np.unique(y_train), class_weights))\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight=class_weight_dict)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "24601"
            }
        ],
        "247": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(f\"Unique labels in training data: {unique_labels}\")\n    print(f\"Counts of each label: {counts}\")\n    print(f\"Shape of resampled training data: {X_resampled.shape}\")\n    print(f\"Shape of test data: {X_test.shape}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Unique labels in training data: ['CD16-negative, CD56-bright natural killer cell, human'\n 'CD16-positive, CD56-dim natural killer cell, human'\n 'CD4-positive helper T cell' 'CD8-positive, alpha-beta memory T cell'\n 'CD8-positive, alpha-beta memory T cell, CD45RO-positive'\n 'T follicular helper cell' 'alpha-beta T cell' 'alveolar macrophage'\n 'animal cell' 'classical monocyte' 'conventional dendritic cell'\n 'dendritic cell, human' 'effector memory CD4-positive, alpha-beta T cell'\n 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated'\n 'erythroid lineage cell' 'gamma-delta T cell' 'germinal center B cell'\n 'group 3 innate lymphoid cell' 'lymphocyte' 'macrophage' 'mast cell'\n 'megakaryocyte' 'memory B cell' 'mucosal invariant T cell' 'naive B cell'\n 'naive thymus-derived CD4-positive, alpha-beta T cell'\n 'naive thymus-derived CD8-positive, alpha-beta T cell'\n 'non-classical monocyte' 'plasma cell' 'plasmablast'\n 'plasmacytoid dendritic cell' 'precursor B cell' 'pro-B cell'\n 'progenitor cell' 'regulatory T cell']\nCounts of each label: [ 90 265 191 343 139 171  61 234  53 287  15   5 252 181   6 135   7  12\n  40  63  47   4 293  51 125 424  75  35  81  29  14   1   1  23 132]\nShape of resampled training data: (14840, 29335)\nShape of test data: (1115, 29335)",
                "lineage": "24701"
            }
        ],
        "248": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.cluster import KMeans\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    kmeans = KMeans(n_clusters=len(unique_labels), random_state=42)\n    cluster_labels = kmeans.fit_predict(X_resampled)\n    \n    model = RandomForestClassifier(n_estimators=100, random_state=42)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "24801"
            }
        ],
        "249": [],
        "250": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.decomposition import PCA\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pca = PCA(n_components=50)\n    X_resampled = pca.fit_transform(X_resampled)\n    X_test = pca.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7001,
                "model_info": "",
                "lineage": "25001"
            }
        ],
        "251": [],
        "252": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if X_class.shape[0] > 0:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    if X_resampled and y_resampled:\n        X_resampled = np.vstack(X_resampled)\n        y_resampled = np.hstack(y_resampled)\n\n        model = LogisticRegression(max_iter=1000, C=0.05)\n        model.fit(X_resampled, y_resampled)\n\n        predictions = model.predict(X_test)\n        adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "25201"
            }
        ],
        "253": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    scaler = StandardScaler()\n    X_train = scaler.fit_transform(X_train)\n    X_test = scaler.transform(X_test)\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    misclassified_indices = np.where(predictions != adata_test.obs[\"celltype_label\"].values)[0]\n    misclassified_examples = [(i, predictions[i], adata_test.obs[\"celltype_label\"].values[i]) for i in misclassified_indices]\n    \n    print(\"Misclassified Instances with Predicted Probabilities:\")\n    for idx, pred, true in misclassified_examples:\n        prob = model.predict_proba(X_test[idx].reshape(1, -1))\n        print(f\"Index: {idx}, Predicted: {pred}, True: {true}, Probabilities: {prob.flatten()}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.754,
                "model_info": "Misclassified Instances with Predicted Probabilities:",
                "lineage": "25301"
            }
        ],
        "254": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = Pipeline([\n        ('scaler', StandardScaler()),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "25401"
            }
        ],
        "255": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectFromModel\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    feature_selector = SelectFromModel(RandomForestClassifier(n_estimators=100))\n    X_selected = feature_selector.fit_transform(X_resampled, y_resampled)\n\n    y_resampled_multi = np.expand_dims(y_resampled, axis=1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_selected, y_resampled_multi)\n\n    X_test_selected = feature_selector.transform(X_test)\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8238,
                "model_info": "",
                "lineage": "25501"
            }
        ],
        "256": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(\"Model assumptions: Linearity, Homoscedasticity, No multicollinearity\")\n    \n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Model assumptions: Linearity, Homoscedasticity, No multicollinearity",
                "lineage": "25601"
            }
        ],
        "257": [],
        "258": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.base import clone\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline([\n        ('scaler', StandardScaler()),\n        ('pca', PCA(n_components=min(X_resampled.shape[1], 50))),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6019,
                "model_info": "",
                "lineage": "25801"
            }
        ],
        "259": [],
        "260": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom scipy.stats import zscore\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    if len(y_train) < 100:\n        print(\"Warning: Insufficient training data volume.\")\n    \n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    z_scores = np.abs(zscore(X_resampled, axis=0))\n    outliers = np.sum(z_scores > 3)\n    print(f\"Detected outliers based on z-score analysis: {outliers}\")\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXXX.py",
                "accuracy": 0.8234,
                "model_info": "Detected outliers based on z-score analysis: 5112841",
                "lineage": "26001"
            }
        ],
        "261": [],
        "262": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport matplotlib.pyplot as plt\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    feature_importances = np.abs(model.coef_).flatten()\n    top_features = np.argsort(feature_importances)[-10:]\n\n    plt.bar(range(len(top_features)), feature_importances[top_features])\n    plt.xticks(range(len(top_features)), top_features)\n    plt.title(\"Feature Importances of Top Features\")\n    plt.show()\n\n    validation_score = model.score(X_test, adata_test.obs[\"celltype_label\"])\n    print(f\"Validation Score: {validation_score:.4f}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXXX.py",
                "accuracy": 0.8234,
                "model_info": "Validation Score: 1.0000",
                "lineage": "26201"
            }
        ],
        "263": [],
        "264": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(f'Unique labels: {unique_labels}, Counts: {counts}')\n    \n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Unique labels: ['CD16-negative, CD56-bright natural killer cell, human'\n 'CD16-positive, CD56-dim natural killer cell, human'\n 'CD4-positive helper T cell' 'CD8-positive, alpha-beta memory T cell'\n 'CD8-positive, alpha-beta memory T cell, CD45RO-positive'\n 'T follicular helper cell' 'alpha-beta T cell' 'alveolar macrophage'\n 'animal cell' 'classical monocyte' 'conventional dendritic cell'\n 'dendritic cell, human' 'effector memory CD4-positive, alpha-beta T cell'\n 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated'\n 'erythroid lineage cell' 'gamma-delta T cell' 'germinal center B cell'\n 'group 3 innate lymphoid cell' 'lymphocyte' 'macrophage' 'mast cell'\n 'megakaryocyte' 'memory B cell' 'mucosal invariant T cell' 'naive B cell'\n 'naive thymus-derived CD4-positive, alpha-beta T cell'\n 'naive thymus-derived CD8-positive, alpha-beta T cell'\n 'non-classical monocyte' 'plasma cell' 'plasmablast'\n 'plasmacytoid dendritic cell' 'precursor B cell' 'pro-B cell'\n 'progenitor cell' 'regulatory T cell'], Counts: [ 90 265 191 343 139 171  61 234  53 287  15   5 252 181   6 135   7  12\n  40  63  47   4 293  51 125 424  75  35  81  29  14   1   1  23 132]",
                "lineage": "26401"
            }
        ],
        "265": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline(steps=[\n        ('imputer', SimpleImputer(strategy='mean')),\n        ('scaler', StandardScaler()),\n        ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))\n    ])\n    \n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "26501"
            }
        ],
        "266": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.metrics import accuracy_score, f1_score\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n    \n    val_accuracy = accuracy_score(adata_test.obs[\"celltype_label\"], predictions)\n    val_f1 = f1_score(adata_test.obs[\"celltype_label\"], predictions, average='weighted')\n    \n    print(f\"Validation Accuracy: {val_accuracy:.4f}, Validation F1 Score: {val_f1:.4f}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Validation Accuracy: 1.0000, Validation F1 Score: 1.0000",
                "lineage": "26601"
            }
        ],
        "267": [],
        "268": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.metrics import log_loss\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    predicted_probabilities = model.predict_proba(X_test)\n    misclassified_indices = np.where(predictions != adata_test.obs[\"celltype_label\"].values)[0]\n    misclassified_probs = predicted_probabilities[misclassified_indices]\n\n    for idx in misclassified_indices:\n        print(f\"Misclassified instance index: {idx}, Predicted probabilities: {misclassified_probs[idx]}\")\n    \n    if np.any(np.isnan(X_train)) or np.any(np.isnan(X_test)):\n        print(\"Warning: Missing values detected in input features.\")\n    \n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "26801"
            }
        ],
        "269": [],
        "270": [],
        "271": [],
        "272": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport time\nimport resource\n\ndef tuso_model(adata_train, adata_test):\n    start_time = time.time()\n    mem_before = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n    \n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    end_time = time.time()\n    mem_after = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n    \n    print(f\"Time taken: {end_time - start_time:.2f} seconds\")\n    print(f\"Memory usage: Before - {mem_before} KB, After - {mem_after} KB\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Time taken: 107.23 seconds\nMemory usage: Before - 646172 KB, After - 6877056 KB",
                "lineage": "27201"
            }
        ],
        "273": [],
        "274": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline(steps=[\n        ('imputer', SimpleImputer(strategy='mean')),\n        ('scaler', StandardScaler()),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    \n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "27401"
            }
        ],
        "275": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = Pipeline([\n        ('scaler', StandardScaler()),\n        ('classifier', KNeighborsClassifier(n_neighbors=5))\n    ])\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.1207,
                "model_info": "",
                "lineage": "27501"
            }
        ],
        "276": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils.class_weight import compute_class_weight\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)\n    class_weight_dict = dict(zip(np.unique(y_train), class_weights))\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight=class_weight_dict)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "27601"
            }
        ],
        "277": [],
        "278": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.decomposition import PCA\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef feature_extraction(X):\n    # Implement feature extraction from pre-trained embeddings here\n    return X  # Placeholder for actual feature extraction logic\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline([\n        ('feature_extraction', FunctionTransformer(feature_extraction)),\n        ('imputer', SimpleImputer(strategy='mean')),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "27801"
            }
        ],
        "279": [],
        "280": [],
        "281": [],
        "282": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.cluster import AgglomerativeClustering\nfrom sklearn.metrics import pairwise_distances\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    scaler = StandardScaler()\n    X_train_scaled = scaler.fit_transform(X_train)\n    X_test_scaled = scaler.transform(X_test)\n\n    distance_matrix = pairwise_distances(X_train_scaled)\n    clustering = AgglomerativeClustering(n_clusters=len(np.unique(y_train)), linkage='ward')\n    cluster_labels = clustering.fit_predict(distance_matrix)\n\n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_scaled[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_scaled)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.754,
                "model_info": "",
                "lineage": "28201"
            }
        ],
        "283": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    \n    model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, max_depth=10))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6705,
                "model_info": "",
                "lineage": "28301"
            }
        ],
        "284": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import VarianceThreshold\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.ensemble import RandomForestClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n\n    pipeline = Pipeline([\n        ('variance_threshold', VarianceThreshold(threshold=0.01)),\n        ('multi_output', MultiOutputClassifier(RandomForestClassifier(n_estimators=100)))\n    ])\n\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7141,
                "model_info": "",
                "lineage": "28401"
            }
        ],
        "285": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.feature_selection import VarianceThreshold\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    selector = VarianceThreshold(threshold=0.1)\n    X_resampled = selector.fit_transform(X_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    \n    model = MultiOutputClassifier(RandomForestClassifier(max_depth=10, random_state=0))\n    model.fit(X_resampled, y_resampled)\n\n    X_test = selector.transform(X_test)\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6693,
                "model_info": "",
                "lineage": "28501"
            }
        ],
        "286": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pipeline = Pipeline(steps=[\n        ('imputer', SimpleImputer(strategy='mean')),\n        ('scaler', StandardScaler()),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    \n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "28601"
            }
        ],
        "287": [],
        "288": [],
        "289": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.metrics import f1_score\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    \n    f1 = f1_score(adata_train.obs[\"celltype_label\"], model.predict(X_train), average='weighted')\n    if f1 < 0.5:\n        model.set_params(C=model.C * 1.1)\n        model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "28901"
            }
        ],
        "290": [],
        "291": [],
        "292": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import FunctionTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n\n    transformer = FunctionTransformer(np.log1p, validate=True)\n    X_train_transformed = transformer.fit_transform(X_train)\n    X_test_transformed = transformer.transform(X_test)\n\n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_transformed[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_transformed)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8085,
                "model_info": "",
                "lineage": "29201"
            }
        ],
        "293": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = Pipeline([\n        ('scaler', StandardScaler()),\n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "29301"
            }
        ],
        "294": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import StandardScaler\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if counts.max() > counts.min():\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    scaler = StandardScaler()\n    X_resampled = scaler.fit_transform(X_resampled)\n    X_test = scaler.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7636,
                "model_info": "",
                "lineage": "29401"
            }
        ],
        "295": [],
        "296": [],
        "297": [],
        "298": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    \n    model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42))\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6377,
                "model_info": "",
                "lineage": "29801"
            }
        ],
        "299": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.decomposition import PCA\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    pca = PCA(n_components=50)\n    X_resampled_pca = pca.fit_transform(X_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    model = MultiOutputClassifier(LogisticRegression(max_iter=1000, C=0.05))\n    model.fit(X_resampled_pca, y_resampled)\n\n    X_test_pca = pca.transform(X_test)\n    predictions = model.predict(X_test_pca)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7001,
                "model_info": "",
                "lineage": "29901"
            }
        ],
        "300": [],
        "301": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.feature_selection import VarianceThreshold\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n\n    pipeline = Pipeline([\n        ('variance_threshold', VarianceThreshold(threshold=0.1)),\n        ('classifier', MultiOutputClassifier(RandomForestClassifier(n_estimators=100, max_depth=None)))\n    ])\n\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7006,
                "model_info": "",
                "lineage": "30101"
            }
        ],
        "302": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nfrom sklearn.metrics import f1_score\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    \n    if \"celltype_label\" in adata_test.obs and f1_score(adata_test.obs[\"celltype_label\"], predictions, average='weighted') < 0.5:\n        model.C *= 1.1\n        model.fit(X_resampled, y_resampled)\n        predictions = model.predict(X_test)\n\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "30201"
            }
        ],
        "303": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport tracemalloc\n\ndef tuso_model(adata_train, adata_test):\n    tracemalloc.start()\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    current, peak = tracemalloc.get_traced_memory()\n    print(f\"Memory consumption: Current={current / 10**6}MB; Peak={peak / 10**6}MB\")\n    tracemalloc.stop()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Memory consumption: Current=5914.027568MB; Peak=6308.186287MB",
                "lineage": "30301"
            }
        ],
        "304": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import class_weight\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)\n    class_weights_dict = dict(zip(np.unique(y_train), class_weights))\n\n    model = LogisticRegression(max_iter=1000, C=0.05, class_weight=class_weights_dict)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.811,
                "model_info": "",
                "lineage": "30401"
            }
        ],
        "305": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectKBest, f_classif\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    selector = SelectKBest(score_func=f_classif, k=100)\n    X_train_selected = selector.fit_transform(X_train, y_train)\n    X_test_selected = selector.transform(X_test)\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train_selected[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6021,
                "model_info": "",
                "lineage": "30501"
            }
        ],
        "306": [],
        "307": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "30701"
            }
        ],
        "308": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import VarianceThreshold\nfrom sklearn.pipeline import Pipeline\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    pipeline = Pipeline([\n        ('variance_threshold', VarianceThreshold(threshold=0.1)),\n        ('classifier', MultiOutputClassifier(RandomForestClassifier(n_estimators=100)))\n    ])\n    pipeline.fit(X_resampled, y_resampled)\n\n    predictions = pipeline.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7006,
                "model_info": "",
                "lineage": "30801"
            }
        ],
        "309": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport matplotlib.pyplot as plt\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    low_confidence_threshold = 0.5\n    low_confidence_predictions = np.sum(np.max(model.predict_proba(X_test), axis=1) < low_confidence_threshold)\n    total_predictions = len(predictions)\n    low_confidence_percentage = (low_confidence_predictions / total_predictions) * 100\n    print(f'Low-confidence predictions: {low_confidence_percentage:.2f}%')\n\n    plt.figure(figsize=(10, 5))\n    plt.title('Training Loss Curve Not Available')\n    plt.xlabel('Epochs')\n    plt.ylabel('Loss')\n    plt.legend()\n    plt.grid()\n    plt.show()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXXX.py",
                "accuracy": 0.8234,
                "model_info": "Low-confidence predictions: 12.56%",
                "lineage": "30901"
            }
        ],
        "310": [],
        "311": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import FunctionTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = Pipeline(steps=[\n        ('transformer', FunctionTransformer(lambda x: x)), \n        ('classifier', LogisticRegression(max_iter=1000, C=0.05))\n    ])\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "31101"
            }
        ],
        "312": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        if X_class.size > 0:\n            X_class_resampled, y_class_resampled = resample(X_class, \n                                                             np.full(X_class.shape[0], label), \n                                                             n_samples=counts.max(), \n                                                             replace=True)\n            X_resampled.append(X_class_resampled)\n            y_resampled.append(y_class_resampled)\n\n    if X_resampled and y_resampled:\n        X_resampled = np.vstack(X_resampled)\n        y_resampled = np.hstack(y_resampled)\n\n        model = LogisticRegression(max_iter=1000, C=0.05)\n        model.fit(X_resampled, y_resampled)\n\n        predictions = model.predict(X_test)\n        adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "31201"
            }
        ],
        "313": [],
        "314": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6704,
                "model_info": "",
                "lineage": "31401"
            }
        ],
        "315": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import QuantileTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    quantile_transformer = QuantileTransformer()\n    X_resampled = quantile_transformer.fit_transform(X_resampled)\n    X_test = quantile_transformer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "31501"
            }
        ],
        "316": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.utils import resample\nfrom sklearn.metrics import f1_score\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    if \"celltype_label\" in adata_test.obs:\n        initial_f1 = f1_score(adata_test.obs[\"celltype_label\"], model.predict(X_test), average='weighted')\n\n        if initial_f1 < 0.5:\n            model.C *= 1.5\n            model.fit(X_resampled, y_resampled)\n            predictions = model.predict(X_test)\n    else:\n        predictions = model.predict(X_test)\n\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "31601"
            }
        ],
        "317": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nfrom sklearn.preprocessing import QuantileTransformer\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    quantile_transformer = QuantileTransformer(output_distribution='uniform')\n    X_resampled = quantile_transformer.fit_transform(X_resampled)\n    X_test = quantile_transformer.transform(X_test)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7827,
                "model_info": "",
                "lineage": "31701"
            }
        ],
        "318": [],
        "319": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport warnings\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    if len(unique_labels) < 2:\n        warnings.warn(\"Insufficient unique labels for classification.\")\n    \n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    print(f\"Model coefficients: {model.coef_}\")\n    print(f\"Model intercept: {model.intercept_}\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "Model coefficients: [[ 2.01545397e-04 -2.23286757e-03  3.05350003e-03 ...  0.00000000e+00\n   0.00000000e+00 -1.19969990e-05]\n [-3.51830547e-03  2.17469620e-03 -2.17737168e-03 ...  0.00000000e+00\n   0.00000000e+00 -6.60845862e-06]\n [ 8.60945356e-03 -5.03266831e-04 -1.11017294e-03 ...  0.00000000e+00\n   0.00000000e+00 -1.88195970e-04]\n ...\n [ 9.68856522e-03 -2.00956374e-04 -2.43521539e-05 ...  0.00000000e+00\n   0.00000000e+00 -2.97919972e-06]\n [ 9.80186399e-04 -4.53047116e-04 -5.11296666e-05 ...  0.00000000e+00\n   0.00000000e+00 -1.00082406e-05]\n [ 1.61046224e-03 -5.75045945e-03 -4.59500453e-04 ...  0.00000000e+00\n   0.00000000e+00 -8.38356336e-06]]\nModel intercept: [-0.00389716  0.00218331 -0.00050674 -0.0107982  -0.00344683  0.00086441\n  0.01652418  0.01183748 -0.00892378  0.00173563 -0.00384434 -0.00182651\n -0.01249946  0.00301471 -0.00249962 -0.00905665  0.00253398 -0.00246094\n  0.01082547  0.00930569  0.00064633 -0.00164401  0.00117991 -0.00292977\n  0.00690013  0.00060779 -0.007801   -0.00489295  0.03427973 -0.01186999\n -0.00575484 -0.00109592 -0.0040794  -0.00193015 -0.0006805 ]",
                "lineage": "31901"
            }
        ],
        "320": [],
        "321": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\nimport matplotlib.pyplot as plt\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    feature_importances = np.abs(model.coef_[0])\n    top_features = np.argsort(feature_importances)[-5:]\n\n    plt.bar(range(len(top_features)), feature_importances[top_features])\n    plt.xticks(range(len(top_features)), top_features)\n    plt.savefig(\"feature_importance_plot.png\")\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.8234,
                "model_info": "",
                "lineage": "32101"
            }
        ],
        "322": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled).reshape(-1, 1)\n\n    model = MultiOutputClassifier(RandomForestClassifier())\n    model.fit(X_resampled, y_resampled)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6749,
                "model_info": "",
                "lineage": "32201"
            }
        ],
        "323": [],
        "324": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    \n    model = LogisticRegression(max_iter=1000, C=0.05)\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.7863,
                "model_info": "",
                "lineage": "32401"
            }
        ],
        "325": [],
        "326": [],
        "327": [],
        "328": [],
        "329": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.feature_selection import SelectFromModel\nimport numpy as np\nfrom sklearn.utils import resample\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    \n    unique_labels, counts = np.unique(y_train, return_counts=True)\n    X_resampled = []\n    y_resampled = []\n\n    for label in unique_labels:\n        X_class = X_train[y_train == label]\n        X_class_resampled, y_class_resampled = resample(X_class, \n                                                         np.full(X_class.shape[0], label), \n                                                         n_samples=counts.max(), \n                                                         replace=True)\n        X_resampled.append(X_class_resampled)\n        y_resampled.append(y_class_resampled)\n\n    X_resampled = np.vstack(X_resampled)\n    y_resampled = np.hstack(y_resampled)\n\n    feature_selector = SelectFromModel(RandomForestClassifier(n_estimators=100))\n    feature_selector.fit(X_resampled, y_resampled)\n    X_resampled_selected = feature_selector.transform(X_resampled)\n    X_test_selected = feature_selector.transform(X_test)\n\n    y_resampled = y_resampled.reshape(-1, 1)\n    model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, max_depth=10))\n    model.fit(X_resampled_selected, y_resampled)\n\n    predictions = model.predict(X_test_selected)\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_XXXX.py",
                "accuracy": 0.6845,
                "model_info": "",
                "lineage": "32901"
            }
        ]
    },
    "2": {
        "2": [
            {
                "code": "from sklearn.linear_model import LogisticRegression\nfrom sklearn.multioutput import MultiOutputClassifier\nfrom sklearn.preprocessing import LabelEncoder\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values\n    label_encoder = LabelEncoder()\n    y_train_encoded = label_encoder.fit_transform(y_train)\n\n    model = LogisticRegression(max_iter=1000)\n    model.fit(X_train, y_train_encoded)\n\n    predictions_encoded = model.predict(X_test)\n    predictions = label_encoder.inverse_transform(predictions_encoded)\n\n    adata_test.obs[\"celltype_label\"] = predictions\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.8027,
                "model_info": "",
                "lineage": "221"
            }
        ],
        "3": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.multioutput import MultiOutputClassifier\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test  = adata_test.layers[\"normalized\"].toarray()\n    \n    y_train = adata_train.obs[\"celltype_label\"].values.reshape(-1, 1)\n\n    model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, max_depth=10))\n    model.fit(X_train, y_train)\n\n    predictions = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = predictions.flatten()\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round14_multi_task_learning_X.py",
                "accuracy": 0.485,
                "model_info": "",
                "lineage": "321"
            }
        ],
        "4": []
    },
    "3": {
        "2": [
            {
                "code": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.utils import resample\nimport numpy as np\n\ndef tuso_model(adata_train, adata_test):\n    X_train = adata_train.layers[\"normalized\"].toarray()\n    X_test = adata_test.layers[\"normalized\"].toarray()\n\n    le = LabelEncoder()\n    y_train = le.fit_transform(adata_train.obs[\"celltype_label\"])\n\n    unique_labels = np.unique(y_train)\n    augmented_X_train = []\n    augmented_y_train = []\n\n    for label in unique_labels:\n        X_label = X_train[y_train == label]\n        if len(X_label) < 2:\n            continue\n        X_resampled = resample(X_label, replace=True, n_samples=X_label.shape[0] * 2, random_state=42)\n        augmented_X_train.append(X_resampled)\n        augmented_y_train.extend([label] * X_resampled.shape[0])\n\n    X_train_augmented = np.vstack(augmented_X_train)\n    y_train_augmented = np.array(augmented_y_train)\n\n    model = RandomForestClassifier()\n    model.fit(X_train_augmented, y_train_augmented)\n\n    y_pred = model.predict(X_test)\n    adata_test.obs[\"celltype_label\"] = le.inverse_transform(y_pred)\n\n    return adata_test",
                "file": "/projects/users/tusoml/label_proj_tusoml_round17_deep_transfer_learniXX.py",
                "accuracy": 0.5608,
                "model_info": "",
                "lineage": "231"
            }
        ]
    }
}