import csv
import io
import itertools
import json
import os
import shutil
from typing import Dict, List, Tuple

import pandas as pd
import requests
from PIL import Image, UnidentifiedImageError
from tqdm import tqdm

from src.bongard_problems.classes import ClassificationAttempt

PROBLEMS_BY_AUTHOR = {
    "bongard": range(1, 101),
    "doughof": range(101, 157),
    # TODO: https://www.foundalis.com/res/bps/foundal/p300.htm is empty
    # "foundal": itertools.chain(range(157, 201), range(238, 240), range(300, 301)),
    "foundal": itertools.chain(range(157, 201), range(238, 240)),
    "insana": range(201, 233),
    "shanahan": range(233, 238),
    "howells": range(240, 245),
    "rispoli": itertools.chain(range(245, 247), range(260, 261)),
    "gunnarsson": range(247, 260),
    "ihde": range(261, 262),
    "barenbaum": range(262, 264),
    "merse": range(264, 281),
    # TODO: Some files below don't have the .gif page
    "joon": range(281, 284),
    "lewis": itertools.chain(range(284, 300), range(301, 311)),
    "stepo": range(311, 317),
    "fairbanks": range(317, 384),
    "collins": range(384, 395),
}

# fmt: off
PROBLEMS_BY_CATEGORY = {
    "Above / Below": [36, 37, 48, 67, 68, 337],
    "Absence seen as presence": [121, 193, 297],
    "Absolute location in the box": [8, 109, 207, 298],
    "Acceleration / Deceleration": [300, 357, 392],
    "Acute angle": [17, 32, 33, 292],
    "Addition (arithmetic)": [230, 233, 381],
    "Adjacent": [142],
    "All ( > 2, within the box) / Not all": [22, 39, 56, 78, 92, 108, 120, 133, 134, 161, 177, 189, 190, 208, 219, 244, 260, 296, 368, 376, 388],
    "Analogy (a : b :: c : d)": [361, 362],
    "Angle": [17, 32, 33, 77, 262, 328, 329],
    "Approximately": "see: Imperfection (small)",
    "Arrow": [52, 102, 118, 358, 363],
    "Average (of values of some feature)": [22, 48],
    "Background of box": [157],
    "Background of shape": [194],
    "Barycenter of cluster": [310],
    "Base of object": [116, 132, 199, 273],
    "Becoming larger / Becoming smaller": [43, 74, 108, 179, 180, 278, 286, 300, 318, 341, 350, 351, 352, 357, 365],
    "Belongs": "see: Categorization",
    "Between": [44, 94, 102, 111, 117],
    "Bongard Problem": [200],
    "Box (of Bongard Problem)": [8, 157, 209, 225, 231, 234, 257, 321],
    "Branching point on linear structure": [67, 68, 86, 92, 93, 113],
    "Categorization": [346, 349, 372, 393],
    "Center of box": [209],
    "Center of circle": [104, 117, 133, 134, 178],
    "Center of mass": [366, 367],
    "Chess-like board and concepts": [376],
    "Clockwise / Counterclockwise turn": [16, 54, 120, 208, 226, 259, 313, 336, 363],
    "Close to / Far from (spatially)": "see: Near / Far (spatially)",
    "Closedness / Openness": [15, 264, 313],
    "Clustering (within the box) based on feature value(s)": [25, 26, 27, 41, 48, 49, 58, 81, 89, 90, 141, 142, 143, 144, 145, 147, 149, 156, 166, 167, 169, 189, 205, 220, 255, 261, 307, 308, 349, 356, 364, 372],
    "Clustering (within the box) based on number": [349],
    "Clustering (within the box) based on relation": [29, 42, 49, 61, 66, 128, 143, 144, 190, 310, 347, 349, 356],
    "Clustering (within the box) based on shape": [28, 58, 65, 83, 84, 99, 141, 142, 143, 144, 145, 169, 186, 187, 205, 253, 349],
    "Clustering (within the box) needs cluster of 1": [81, 83, 89, 90, 149, 156, 166, 167, 169, 220, 347, 349],
    "Coincidence of 3+ lines at a point": [78, 86, 162, 287],
    "Collinearity (straight line or curve)": [40, 41, 42, 64, 66, 92, 93, 94, 96, 99, 133, 134, 147, 161, 212, 254, 285, 302],
    "Color (as in: outlined, filled)": "see: Outlined / Filled",
    "Completeness": [379],
    "Concave / Convex region of a curve": [75, 215, 274],
    "Concavity on object": "see: Indentation",
    "Concavity / Convexity": [4, 136, 174, 177, 182],
    "Conjunction (logical)": [26, 28, 140, 141, 142, 143, 144, 145, 325],
    "Continuous change": "see: Discrete / Continuous quantity",
    "Contrast high / low": [225],
    "Convex hull": [12, 65, 81, 82, 83, 84, 97, 138, 192, 242, 268, 272, 278, 280, 348],
    "Comparison of numbers": "see: Number (of feature or object); Sameness based on number",
    "Completed out of the box": [193, 210, 312],
    "Coordinate (X/Y) projected on box": [8, 112],
    "Corner of box": [243],
    "Correspondence": [121, 151, 295],
    "Counting": "see: Iteration (discrete, within box)",
    "Crossings of lines (X-points)": [30, 99, 113, 114, 199, 213, 240, 261, 266, 273],
    "Curves / Straight lines": [5, 130, 154, 155, 246, 247, 263],
    "Cusp": [32, 44],
    "Cyclic path": [118, 316],
    "Decreasing / Increasing": "see: Becoming larger / Becoming smaller",
    "Dent": "see: Indentation",
    "Diametrically opposite": [224],
    "Difference (arithmetic)": [164, 187, 244, 311],
    "Dimensionality": [330, 331],
    "Direction (absolute, w.r.t. the box) of straight line": [199, 215, 234, 238, 273, 274],
    "Direction (absolute, w.r.t. the box) of tangent to curve": [206],
    "Direction (absolute, w.r.t. the box) of rotation": [16, 54, 55, 208, 226],
    "Discrete / Continuous quantity": [351],
    "Disjunction (logical)": [13, 266, 270, 341],
    "Distractor": "see: Noise",
    "Division of integers / Fractions": [387],
    "Downward / Upward": [179, 199, 206, 215, 234, 273, 274, 286, 337],
    "Elongatedness / Roundness": [11, 12, 65, 168, 192, 268],
    "Empty / Nonempty": [1, 260],
    "Ending of curve (tangent at its end)": [72, 105, 206, 263],
    "Endpoints of a line or curve": [62, 68, 69, 94, 96, 103, 153, 154, 198, 206, 207, 216, 230, 239, 240, 276],
    "Entrance / Exit": [175, 191],
    "Entropy high / low": "see: Pattern / Randomness",
    "Equality": "see: Sameness (within the box)",
    "Equivalence relation": "see: If and only if",
    "Even / Odd": [150, 202, 267, 334],
    "Every other": [162, 336],
    "Exclusive or operator (logical)": [325],
    "Existence": [1, 24, 26, 92, 118, 119, 131, 138, 152, 209, 210, 221, 266, 276, 290, 296, 298, 343, 347, 349, 368, 391, 394],
    "Exists exactly one": [127, 342],
    "Exponential / Linear": [392],
    "Filled / Outlined": "see: Outlined / Filled",
    "Finite / Infinite": [380],
    "Fractal": [355, 356],
    "Fraction": "see: Ratio",
    "Front / Back (in 3-D)": [45, 46, 195],
    "Gear": [336],
    "Ghost figure": [193, 297],
    "Half": [284],
    "Hole": [34, 35],
    "Horizontal / Vertical": [7, 13, 19, 65, 66, 95, 116, 132, 147, 192, 206, 268, 270, 280, 281],
    "Identical objects (within the box)": [57, 58],
    "If and only if": [146, 227, 290],
    "Imaginary entity point": [52, 78, 102, 104, 117, 133, 134, 161, 162, 178, 183, 184, 249, 250, 257, 287, 310, 312, 368],
    "Imaginary entity line or curve": [64, 73, 74, 78, 81, 103, 106, 112, 117, 131, 154, 162, 163, 183, 199, 235, 238, 239, 240, 245, 250, 257, 261, 273, 275, 280, 287, 292, 312, 342, 358, 368, 383, 388, 394],
    "Imaginary entity shape": [96, 97, 101, 138, 193, 194, 344, 345, 362, 390, 391],
    "Imperfection (small)": [119, 130, 148, 160, 168, 222, 223, 224, 299, 338],
    "Increasing / Decreasing": "see: Becoming larger / Becoming smaller",
    "Indentation": [17, 55, 76, 91, 101, 125, 129, 175, 181, 231],
    "Infinite 2-D plane": [335],
    "Infinity": "see: Finite / Infinite",
    "Innermost / Outermost": [115],
    "Inside": [29, 47, 49, 53, 71, 83, 111, 128, 146, 198, 248, 249, 291, 306, 310, 317, 367],
    "Interior / Exterior (of a closed area)": [29, 42, 49, 84, 122, 128, 291, 306, 310],
    "Intersection": "see: Overlapping; also: Crossings of lines (X-points)",
    "Intersection of sets": "see: Union / Intersection (of sets)",
    "Irrelevant objects or features": "see: Noise",
    "Iteration (discrete, within the box)": [318, 350, 351, 352, 353, 355, 356],
    "Jigsaw puzzle": [323],
    "Knot": [382, 389],
    "Large / Small": "see: Size of objects or features",
    "Left / Right": [8, 63, 67, 68, 191, 234, 279, 280, 339],
    "Leftward / Rightward": [43, 206, 278],
    "Length of lines or curves": [14, 79, 80, 103, 112, 131, 151, 155, 176, 235, 237, 239, 241, 243, 256, 257, 279, 280, 314, 328],
    "Light / Dark": [196, 236],
    "Loop / no loop": [15, 353, 354],
    "Lying on (touching) a line or curve": [44, 55, 66, 69, 91, 104, 117, 190, 336],
    "Maximum / Minimum (local)": [309],
    "Maze": [294, 314, 315, 316],
    "Meeting at a point after tracing path": [52, 102, 358],
    "Meta-solution": [1, 200],
    "Middle point of two points": [112, 117, 161, 257],
    "Missing part / Extra part": [148],
    "Motion needing to be imagined": [175, 201, 234, 239, 323, 336, 358, 369, 370, 376, 389],
    "Motion real": [300],
    "Multiple (arithmetic)": [319],
    "Near / Far (spatially)": [20, 49, 51, 62, 79, 149, 151, 182, 212, 227, 243, 256, 310],
    "Neck, narrowing in area": [18, 19, 20],
    "Noise": [10, 25, 37, 40, 42, 58, 60, 64, 65, 66, 73, 82, 98, 105, 106, 109, 116, 123, 127, 128, 130, 131, 132, 135, 136, 162, 165, 174, 181, 183, 192, 201, 203, 204, 206, 207, 210, 222, 223, 231, 237, 242, 261, 298, 303],
    "Notch": [17, 55, 76, 91, 101, 125, 129, 175, 181, 231],
    "Number (countable quantity of feature or object)": [6, 10, 23, 27, 28, 29, 31, 53, 85, 86, 87, 88, 89, 90, 91, 96, 98, 107, 110, 114, 122, 126, 127, 141, 143, 144, 145, 151, 156, 159, 160, 164, 166, 167, 169, 181, 185, 187, 193, 194, 204, 209, 220, 231, 232, 233, 246, 247, 250, 262, 272, 277, 281, 296, 311, 318, 319],
    "Numerosity (sense of num. quantity)": [320],
    "Odd": [150, 202, 267, 334],
    "Order / Disorder": [173, 359, 360],
    "Ordinal position": [302, 318],
    "Outer outline of shape or region": [322, 341],
    "Outlined / Filled": [3, 25, 26, 27, 28, 34, 35, 41, 45, 48, 56, 58, 79, 90, 93, 94, 128, 157, 180, 189, 190, 211, 244, 261, 282, 283, 284, 288, 301, 337, 341],
    "Overlapping of two objects": [45, 46, 81, 259, 301],
    "Overlapping of line and object": [163, 275],
    "Outside": [84, 138],
    "Parallel / Perpendicular": [35, 39, 72, 73, 91, 101, 105, 131, 165, 178, 219, 238, 239, 255],
    "Path": [115, 118, 176, 314, 316, 321],
    "Pattern / Randomness": [173, 359, 360],
    "Percent": [288, 306],
    "Point (dot, no discernible shape)": [166, 167, 230, 317, 318, 319],
    "Pointing to": [52, 117],
    "Prime number": [203, 253],
    "Proportional magnitudes": [317, 319],
    "Protrusion": [32, 91, 108, 125, 129, 226],
    "Ratio": [288, 306],
    "Reachable / Unreachable": [115, 235, 294, 315, 321],
    "Recursion": [70, 71, 167, 186],
    "Regions on two sides of a curve": [67, 75, 135],
    "Regular change / Irregular change": [43, 139, 179, 218, 278, 286],
    "Remove (mentally) some object(s)": [151],
    "Rotation required": [175, 201, 228, 229, 259, 305],
    "Sameness (i.e., equality) (within the box) of feature": [56, 77, 80, 103, 112, 124, 189, 190, 238, 239, 257, 328, 329],
    "Sameness (i.e., equality) (within the box) of number": [61, 128, 137, 169, 244, 281],
    "Sameness (i.e., equality) (within the box) of shape": [57, 58, 125, 140, 141, 142, 143, 145, 195, 259, 293, 303, 305, 324, 338, 343],
    "Sameness (i.e., equality) (within the box) of concept": [378],
    "Self-reference": [390],
    "Semiplanes of a straight line": [61, 81, 121, 183],
    "Separable / Entangled": [81, 144],
    "Separation of objects joined together": [20, 44, 55, 88, 90, 91, 94, 126, 135, 202, 309],
    "Sequence (arithmetic)": [318],
    "Shape": [6, 10, 13, 24, 25, 26, 28, 29, 36, 37, 38, 46, 47, 54, 55, 57, 58, 61, 64, 73, 75, 79, 80, 82, 83, 84, 97, 98, 109, 110, 111, 119, 121, 123, 125, 127, 131, 139, 146, 149, 150, 151, 153, 154, 159, 170, 187, 188, 193, 194, 242, 245, 248, 249, 272, 283, 289, 297, 298, 323],
    "Similarity (but not equality) (of feature)": [65],
    "Similarity (but not equality) (of number)": [320],
    "Similarity (but not equality) (of shape)": [59, 60, 123, 139, 145, 188, 205, 248, 249, 282, 338],
    "Similarity (but not equality) (of representation)": [377],
    "Simple / Complex representation": [374, 375],
    "Size of objects or features": [2, 21, 22, 34, 38, 76, 126, 140, 211, 248, 256, 286, 295, 301],
    "Slope of straight line": [7, 13, 39, 106, 158],
    "Slope positive / negative": [106],
    "Specificity (of shape (topological))": [100, 170, 175],
    "Specificity (of style)": [153, 154, 155, 197, 214, 228],
    "Specificity (of value of a feature)": [158, 229],
    "Speed (or velocity)": [300, 358],
    "Square (as opposed to rectangle)": [242],
    "Square of a number": [384],
    "Sum (arithmetic)": "see: Addition (arithmetic)",
    "Symmetry (axis of)": [50, 152, 258, 265, 342],
    "Symmetry (radial)": [172, 269],
    "Tangent at point of curve": [183, 238, 326, 327],
    "Tessellation": [335],
    "Texture (as in: outlined / filled)": "see: Outlined / Filled",
    "Texture (of line or curve)": [9, 63, 107, 184, 185, 212, 216, 217, 221, 248, 249],
    "Texture (of area)": [95, 196, 236],
    "Tiling": [122, 201, 229, 283, 289, 323, 344, 386],
    "Together": "see: Near / Far (spatially)",
    "Topological transformation": [369, 370, 382, 389, 390],
    "Touchings of lines (T-points)": [113, 266],
    "Tracing along a line or curve": [16, 43, 52, 55, 67, 68, 69, 70, 72, 74, 102, 105, 115, 120, 179, 180, 198, 208, 278, 336, 340, 341, 350, 351, 352, 353, 354, 355, 356, 357, 363, 365],
    "Triangle (equilateral)": [82, 151],
    "Triangle (incenter)": [171],
    "Triangle (isosceles)": [80, 103],
    "Triangle (orthocenter)": [171],
    "Triangle (used as arrow to point to)": [117, 287],
    "True, correct / False, incorrect": [393],
    "Trunk of a tree-like structure": [69, 70],
    "Tumbles/ Stays put": [199, 273],
    "Turning direction": [120],
    "Union / Intersection (of sets)": [339, 345, 373],
    "Uppermost / Lowermost": [116, 179],
    "Variance (statistical notion)": [173],
    "Vertex of two lines that meet": [116, 132, 230, 254, 307, 312, 340],
    "Visual disparity": "see: 3-D Depth",
    "Visual illusion": [237],
    "Wave": [381],
    "Wrapping around the box": [244],
    "Written in single-stroke": [271, 340],
    "3-D Depth": [195],
    "3-D Impossible solid / Possible solid": [252],
    "3-D Net": [371, 385],
    "3-D Solid": [250, 251, 370, 371]
}

def get_problem_labels_csv(output_file: str):
    URL = "https://www.foundalis.com/res/bps/bongard_problems_solutions.htm"
    df = pd.read_html(URL, header=0, index_col=0)[1]
    df.to_csv(output_file)


def get_problem_images(
    output_directory_path: str, indexes: List[int], author: str = "bongard"
):
    if not os.path.exists(output_directory_path):
        os.makedirs(output_directory_path, exist_ok=True)

    for i in tqdm(indexes, desc=author):
        output_path = f"{output_directory_path}/{i}.png"
        if not os.path.exists(output_path):
            url = f"http://www.foundalis.com/res/bps/{author}/p{i:03d}.gif"
            response = requests.get(url)

            try:
                Image.open(io.BytesIO(response.content)).save(output_path)
            except UnidentifiedImageError as e:
                print(f"Couldn't open image. author={author}, i={i}, url={url}")
                raise e


def get_bongard_open_world_labels(
    output_file_path: str,
    url: str = "https://raw.githubusercontent.com/joyjayng/Bongard-OpenWorld/main/assets/data/bongard-ow/bongard_ow.json",
):
    response = requests.get(url)
    labels_json = response.json()

    with open(output_file_path, "w") as output_file:
        csv_writer = csv.writer(output_file, lineterminator="\n")
        csv_writer.writerow(["uid", "Left-side Rule", "Right-side Rule"])

        for problem in labels_json:
            caption = problem["caption"]
            csv_writer.writerow([problem["uid"], caption, f"NOT {caption}"])


def copy_bongard_hoi_side_images(
    problem_id,
    side: str,
    dataset_path: str,
    output_directory_path: str,
    images_data: List[dict],
):
    file_counter = 0

    for image_data in images_data:
        image_path = image_data["im_path"]
        _, ext = os.path.splitext(image_path)
        image_dataset_path = f"{dataset_path}/{image_path}"

        destination_directory = f"{output_directory_path}/{problem_id}/{side}"

        if not os.path.exists(destination_directory):
            os.makedirs(destination_directory, exist_ok=True)

        shutil.copy(
            image_dataset_path,
            f"{output_directory_path}/{problem_id}/{side}/{file_counter}{ext}",
        )
        file_counter += 1

    pass


def get_bongard_hoi_images(
    number_of_problems: int,
    dataset_path: str,
    annonations_path: str,
    output_images_directory_path: str,
    output_labels_path: str,
):
    if not os.path.exists(output_images_directory_path):
        os.makedirs(output_images_directory_path, exist_ok=True)

    annonations = json.load(open(annonations_path))
    labels = pd.DataFrame(columns=["uid", "Left-side Rule", "Right-side Rule"])
    collected_labels = set()

    for problem in annonations:
        label: str = problem[2]

        if label in collected_labels:
            continue

        problem_id = len(collected_labels) + 1

        copy_bongard_hoi_side_images(
            problem_id,
            "left",
            dataset_path,
            output_images_directory_path,
            problem[0],
        )

        copy_bongard_hoi_side_images(
            problem_id,
            "right",
            dataset_path,
            output_images_directory_path,
            problem[1],
        )

        labels.loc[problem_id - 1] = [
            problem_id,
            label.replace("++", " "),
            f"NOT {label}",
        ]
        collected_labels.add(label)
        if len(collected_labels) == number_of_problems:
            break

    labels.to_csv(output_labels_path, index=False)


def get_problems_with_all_correct_answers(attempt: ClassificationAttempt):
    problem_id_to_all_correct_flag: Dict[int, bool] = {}

    for solution in attempt.get_solutions().values():
        problem_id = solution.problem_id

        if problem_id not in problem_id_to_all_correct_flag:
            problem_id_to_all_correct_flag[problem_id] = "OK" in solution.evaluation
        elif "WRONG" in solution.evaluation:
            problem_id_to_all_correct_flag[problem_id] = False

    return [
        problem_id
        for problem_id, all_correct in problem_id_to_all_correct_flag.items()
        if all_correct
    ]


def get_all_problems(attempt: ClassificationAttempt):
    return [solution.problem_id for solution in attempt.get_solutions().values()]


def print_classification_summary(
    answers_file_path: str,
):
    attempt = ClassificationAttempt()
    attempt.load(answers_file_path)
    solutions = attempt.get_solutions()
    all_problems = set(get_all_problems(attempt))

    correct_solutions_count = sum(
        [1 for solution in solutions.values() if solution.evaluation == "OK"]
    )
    print(f"Correct solutions: {correct_solutions_count}/{len(solutions)}")

    all_problems_count = len(all_problems)
    problems_with_all_correct_answers = get_problems_with_all_correct_answers(attempt)
    print(f"Problems with all correct answers: {problems_with_all_correct_answers}")
    print(
        f"Number of problems with all correct answers: {len(problems_with_all_correct_answers)}/{all_problems_count}"
    )

    left_answers_count = sum(
        [1 for solution in solutions.values() if "LEFT" in solution.answer]
    )
    print(f"'LEFT' answers: {left_answers_count}/{len(solutions)}")

    right_answers_count = sum(
        [1 for solution in solutions.values() if "RIGHT" in solution.answer]
    )
    print(f"'RIGHT' answers: {right_answers_count}/{len(solutions)}")

    if any([solution.explanation_evaluation != "" for solution in solutions.values()]):
        problems_with_good_explanations = [
            solution.problem_id
            for solution in solutions.values()
            if "OK" in solution.explanation_evaluation
        ]

        good_explanations_count = sum(
            [
                1
                for solution in solutions.values()
                if "OK" in solution.explanation_evaluation
            ]
        )

        print(
            f"Problems with good explanations: {set(problems_with_good_explanations)}"
        )
        print(f"Count of good explanations: {good_explanations_count}/{len(solutions)}")

    if any([solution.concept_evaluation != "" for solution in solutions.values()]):
        problems_with_good_concepts = [
            solution.problem_id
            for solution in solutions.values()
            if "OK" in solution.concept_evaluation
        ]

        good_concepts_count = sum(
            [
                1
                for solution in solutions.values()
                if "OK" in solution.concept_evaluation
            ]
        )

        print(f"Problems with good concepts: {set(problems_with_good_concepts)}")
        print(f"Count of good concepts: {good_concepts_count}/{len(solutions)}")


def compare_answers(
    first_answers_file_path: str,
    second_answers_file_path: str,
):
    first_attempt = ClassificationAttempt()
    first_attempt.load(first_answers_file_path)

    second_attempt = ClassificationAttempt()
    second_attempt.load(second_answers_file_path)

    first_attempt_solutions = list(first_attempt.get_solutions().values())
    second_attempt_solutions = list(second_attempt.get_solutions().values())
    same_answers = 0

    for i in range(len(first_attempt_solutions)):
        if (
            "LEFT" in first_attempt_solutions[i].answer
            and "LEFT" in second_attempt_solutions[i].answer
            or "RIGHT" in first_attempt_solutions[i].answer
            and "RIGHT" in second_attempt_solutions[i].answer
        ):
            same_answers += 1

    print(f"Same answers: {same_answers/len(first_attempt_solutions) * 100}%")


def get_data_paths(data_dir: str, dataset: str) -> Tuple[str, str]:
    if dataset == "synthetic":
        splitted_data_path = f"{data_dir}/raw/bongard_splitted"
        labels_file = f"{data_dir}/raw/labels.csv"
    elif dataset == "hoi":
        splitted_data_path = f"{data_dir}/raw/bongard_hoi_splitted_mix"
        labels_file = f"{data_dir}/raw/bongard_hoi_mix_labels.csv"
    elif dataset == "openworld":
        splitted_data_path = f"{data_dir}/raw/bongard_open_world_splitted"
        labels_file = f"{data_dir}/raw/bongard_open_world_labels.csv"
    elif dataset == "rwr":
        splitted_data_path = f"{data_dir}/raw/bongard_rwr"
        labels_file = f"{data_dir}/raw/labels.csv"
    else:
        raise ValueError(f"Unsupported dataset: {dataset}")
    return splitted_data_path, labels_file
