all_datasets = ["mice_protein","haberman_survival","iris","breast_cancer"]
all_kernels = ["linear"]
n_bootstrap = 20

wildcard_constraints:
    n_clusters="\d+",
    bootstrap="\d+",
    dataset="(mice_protein|haberman_survival|iris|breast_cancer)",
    kernel="(laplacian|linear|sigmoid|rbf|poly|polynomial|cosine)"

rule all:
    input:
        "model_selection.csv"

rule merge_results:
    input:
        expand("{kernel}/{dataset}_kauri_k{n_clusters}.csv", kernel=all_kernels, dataset=all_datasets, n_clusters=range(2, 16)),
        expand("{kernel}/{dataset}_kauri_k{n_clusters}_bootstrap{bootstrap}.csv", kernel=all_kernels, dataset=all_datasets, n_clusters=range(2, 16), bootstrap=range(n_bootstrap)),
        expand("euclidean/{dataset}_douglas_k{n_clusters}.csv", dataset=["haberman_survival","iris","breast_cancer"], n_clusters=range(2,16)),
        expand("linear/{dataset}_ktree_k{n_clusters}.csv", dataset=all_datasets, n_clusters=range(2,16)),
        expand("linear/{dataset}_ktree_k{n_clusters}_bootstrap{bootstrap}.csv", dataset=all_datasets, n_clusters=range(2,16), bootstrap=range(n_bootstrap)),
    output:
        "model_selection.csv"
    shell:
        "python ../scripts/analyse_model_selection.py --result_folder . --path_to_data ../data/datasets --all_kernels {all_kernels} --all_distances euclidean --methods kauri douglas ktree --all_datasets {all_datasets} --output_csv {output}"

rule make_kauri_run:
    output:
    	"{kernel}/{dataset}_kauri_k{n_clusters}.csv"
    shell:
    	"python ../scripts/predictions_main.py kauri --dataset {wildcards.dataset} --output_file {output} --n_clusters {wildcards.n_clusters} --subset_size 1 --kernel {wildcards.kernel}"
    	
rule make_kauri_bootstrap:
    output:
    	"{kernel}/{dataset}_kauri_k{n_clusters}_bootstrap{bootstrap}.csv"
    shell:
    	"python ../scripts/predictions_main.py kauri --dataset {wildcards.dataset} --output_file {output} --n_clusters {wildcards.n_clusters} --subset_size 1 --kernel {wildcards.kernel} --gap"

rule make_ktree_run:
    output:
        "linear/{dataset}_ktree_k{n_clusters}.csv"
    shell:
        "python ../scripts/predictions_main.py ktree --dataset {wildcards.dataset} --output_file {output} --n_clusters {wildcards.n_clusters} --subset_size 1"
        
rule make_ktree_bootstrap:
    output:
        "linear/{dataset}_ktree_k{n_clusters}_bootstrap{bootstrap}.csv"
    shell:
        "python ../scripts/predictions_main.py ktree --dataset {wildcards.dataset} --output_file {output} --n_clusters {wildcards.n_clusters} --subset_size 1 --gap"


rule make_douglas_run:
    output:
        "euclidean/{dataset}_douglas_k{n_clusters}.csv"
    shell:
        "python ../scripts/predictions_main.py douglas --dataset {wildcards.dataset} --output_file {output} --n_clusters {wildcards.n_clusters} --subset_size 1"
