{
    "sub_tasks": [
        {
            "bullet_points": [
                "Load the 1000 merger trees from the provided file using PyTorch Geometric.",
                "Normalize node features (log10(mass), log10(concentration), log10(Vmax), scale factor) across all nodes.",
                "Split the dataset into training, validation, and test sets, ensuring all trees from the same simulation are in the same split.",
                "For each tree, identify substructures using adaptive thresholds on mass accretion rates and significant changes in halo properties.",
                "Extract physical features (mass ratio, merger time, concentration/Vmax difference, etc.) for each substructure.",
                "Save the processed and split data for downstream analysis."
            ],
            "sub_task": "Data Preprocessing and Multi-Scale Substructure Extraction",
            "sub_task_agent": "engineer"
        },
        {
            "bullet_points": [
                "Implement and train a GraphSAGE GNN on substructure graphs to learn topological embeddings using the normalized node features.",
                "Confirm that the features used in the GNN model are consistent with those used for tensor construction in Step 3, ensuring no discrepancies in data dimensions during concatenation and padding.",
                "For each substructure, generate a fixed-size topological embedding (e.g., 64 dimensions) using the trained GNN and global mean pooling.",
                "Concatenate physical features and topological embeddings for each substructure.",
                "Pad substructure lists per tree to a fixed length using embeddings from a canonical \"null\" substructure.",
                "Save the resulting tensors for each tree."
            ],
            "sub_task": "Topological Embedding Generation via GNN",
            "sub_task_agent": "engineer"
        },
        {
            "bullet_points": [
                "For each tree, construct a tensor of shape (max_N_sub, D_feat) from the concatenated features.",
                "Flatten and reshape each tensor into a high-order tensor suitable for QITT decomposition.",
                "Confirm and document the external library used for tensor train decomposition (e.g., TensorLy), and ensure its compatibility with PyTorch. Validate that the dependency is properly integrated.",
                "Apply QITT decomposition to each tree tensor, selecting TT-rank via cross-validation on the validation set.",
                "Flatten and concatenate the QITT cores to form feature vectors for regression.",
                "Document the chosen tensor shapes, TT-ranks, and any regularization applied."
            ],
            "sub_task": "Tensor Construction and QITT Decomposition",
            "sub_task_agent": "engineer"
        },
        {
            "bullet_points": [
                "Train regression models (Linear Regression, Random Forest, Gradient Boosting) on the QITT-derived features to predict Omega_m and sigma_8.",
                "Use explicit cross-validation procedures for tuning both regression model hyperparameters and TT-rank selection. Document the TT-rank sweep, specify the criteria for optimal TT-rank selection, and explain its impact on model performance.",
                "Train baseline models using aggregate graph-level features, raw substructure features, graphlet counts, and topology embeddings without QITT.",
                "Evaluate all models on the test set using RMSE and R-squared.",
                "Perform statistical significance tests (e.g., paired t-tests) on prediction errors.",
                "Generate plots: model performance comparisons, feature importances, and error distributions."
            ],
            "sub_task": "Regression Modeling and Baseline Comparisons",
            "sub_task_agent": "engineer"
        },
        {
            "bullet_points": [
                "Visualize representative substructure graphs, especially those corresponding to high-magnitude QITT core elements.",
                "Use dimensionality reduction (t-SNE/UMAP) to visualize the distribution of learned topological embeddings.",
                "Plot the distribution of TT-core magnitudes and their relation to physical/topological features.",
                "Clearly separate plots used for diagnostic/internal purposes from those intended for final publication.",
                "Label and reference all plots intended for the results section with figure numbers and captions, so they can be unambiguously cited in the results report.",
                "Summarize key statistics and generate all plots needed for the results section."
            ],
            "sub_task": "Visualization and Interpretation of QITT and Embeddings",
            "sub_task_agent": "engineer"
        },
        {
            "bullet_points": [
                "Write a comprehensive Results section summarizing all quantitative and qualitative findings.",
                "Reference all relevant plots by figure number or caption, and clearly communicate quantitative results (RMSE, R-squared, p-values) and qualitative analysis of topological insights.",
                "Interpret the performance of QITT-based models versus baselines, referencing all relevant plots and statistics.",
                "Discuss the physical and topological insights gained from the QITT decomposition and GNN embeddings.",
                "Highlight the significance of the findings for cosmological parameter estimation and potential future directions.",
                "Ensure all results, interpretations, and references to plots are included for the paper writer agents."
            ],
            "sub_task": "Results Synthesis and Interpretation",
            "sub_task_agent": "researcher"
        }
    ]
}