import os
import stat
import subprocess
import sys
import tempfile
import urllib
import gymnasium as gym

from logging import info, warning
from platform import machine
from typing import Union, List, Optional

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR

from grabbo.benchmarks import SyntheticBenchmark, EffectiveDimBenchmark
from typing import Union, List

from copy import deepcopy
class MujocoBenchmarkBase(SyntheticBenchmark):
    def __init__(self, dim: int, lb: np.ndarray, ub: np.ndarray, task_id: str, env_var_name: str, noise_std: float = 0, negate: bool = False, **kwargs):
        super().__init__(dim=dim, lb=lb, ub=ub, noise_std=noise_std)
        self.task_id = task_id
        self._exec_cmd = os.environ.get(env_var_name)
        if self._exec_cmd is None:
            raise RuntimeError(f"Environment variable {env_var_name} not set. Please point it to the MuJoCo executable/command.")
        if not os.path.exists(self._exec_cmd) or not os.access(self._exec_cmd, os.X_OK):
             raise FileNotFoundError(f"Executable '{self._exec_cmd}' not found or not executable.")
        self._negate_output = negate
        self.ARG_LIST_BASE = [self._exec_cmd, '--benchmark_name', self.task_id, '--x']

    def _call(self, x_single: np.ndarray) -> float:
        assert x_single.ndim == 1, "Input to _call must be 1D"
        assert x_single.shape[0] == self.dim, f"Input dimension mismatch: expected {self.dim}, got {x_single.shape[0]}"
        x_str = [np.format_float_positional(xi, unique=False, precision=6) + '0' for xi in x_single]

        try:
            result = subprocess.run(
                self.ARG_LIST_BASE + x_str, 
                capture_output=True,
                text=True,
                check=True,
                timeout=120
            )
            output_val = float(result.stdout.strip())
            return -output_val if self._negate_output else output_val
        except subprocess.CalledProcessError as e:
             warning(f"Subprocess for task '{self.task_id}' failed with command '{e.cmd}'. Return code: {e.returncode}")
             warning(f"Stderr: {e.stderr}")
             warning(f"Stdout: {e.stdout}")
             return -np.inf
        except subprocess.TimeoutExpired:
             warning(f"Subprocess for task '{self.task_id}' timed out.")
             return -np.inf
        except ValueError as e:
             warning(f"Could not convert subprocess output to float for task '{self.task_id}'. Output: '{result.stdout}'. Error: {e}")
             return -np.inf
        except Exception as e:
            warning(f"An unexpected error occurred during subprocess call for task '{self.task_id}': {e}")
            return -np.inf


    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)

        if x.ndim == 0:
            raise ValueError("Input cannot be a scalar.")
        elif x.ndim == 1:
            if x.shape[0] != self.dim:
                 raise ValueError(f"Input dimension mismatch: expected {self.dim}, got {x.shape[0]}")
            val = self._call(x)
        elif x.ndim == 2:
            if x.shape[1] != self.dim:
                 raise ValueError(f"Input dimension mismatch: expected {self.dim} columns, got {x.shape[1]}")
            vals = np.array([self._call(row) for row in x])
            val = vals
        else:
            raise ValueError("Input must be a 1D or 2D array.")
        noise = 0.0
        if self.noise_std > 0:
            noise_shape = val.shape if isinstance(val, np.ndarray) else (1,)
            noise = np.random.normal(0, self.noise_std, noise_shape)
            if not isinstance(val, np.ndarray) and isinstance(noise, np.ndarray) and noise.size == 1:
                 noise = noise[0]

        return val + noise

class AntBenchmark(MujocoBenchmarkBase):
    """
    888-D Ant policy search benchmark.
    Requires environment variable 'ANT' pointing to the executable.
    """
    _DIM = 888
    _BOUNDS = (-1.0, 1.0)

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):
        super().__init__(
            dim=self._DIM,
            lb=np.full(self._DIM, self._BOUNDS[0]),
            ub=np.full(self._DIM, self._BOUNDS[1]),
            task_id='ant',
            env_var_name='ANT',
            noise_std=noise_std,
            negate=False
        )

    @property
    def optimal_value(self) -> Optional[float]:
         return None

class HumanoidBenchmark(MujocoBenchmarkBase):
    """
    6392-D Humanoid policy search benchmark.
    Requires environment variable 'HUMANOID' pointing to the executable.
    """
    _DIM = 6392
    _BOUNDS = (-1.0, 1.0)

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):
        super().__init__(
            dim=self._DIM,
            lb=np.full(self._DIM, self._BOUNDS[0]),
            ub=np.full(self._DIM, self._BOUNDS[1]),
            task_id='humanoid',
            env_var_name='HUMANOID',
            noise_std=noise_std,
            negate=False,
            **kwargs
        )

    @property
    def optimal_value(self) -> Optional[float]:
         return None

class MoptaSoftConstraints(SyntheticBenchmark):
    """
    Mopta08 benchmark with soft constraints as described in https://arxiv.org/pdf/2103.00349.pdf
    Supports i386, x86_84, armv7l

    Args:
        temp_dir: Optional[str]: directory to which to write the input and output files (if not specified, a temporary directory will be created automatically)
        binary_path: Optional[str]: path to the binary, if not specified, the default path will be used
    """
    _DIM = 124

    def __init__(
            self,
            temp_dir: Optional[str] = None,
            binary_path: Optional[str] = None,
            noise_std: Optional[float] = 0,
             negate: bool = True,
            **kwargs,
    ):
        super().__init__(self._DIM, np.ones(self._DIM), np.zeros(self._DIM), noise_std=noise_std)
        self._negate_output = negate
        if binary_path is None:
            self.sysarch = 64 if sys.maxsize > 2 ** 32 else 32
            self.machine = machine().lower()
            mopta_dir = os.path.join(os.getcwd(), "grabbo", "benchmarks", "mopta08")
            os.makedirs(mopta_dir, exist_ok=True)

            if self.machine == "armv7l":
                assert self.sysarch == 32, "Not supported"
                self._mopta_exectutable_name = "mopta08_armhf.bin"
            elif self.machine == "x86_64":
                assert self.sysarch == 64, "Not supported"
                self._mopta_exectutable_name = "mopta08_elf64.bin"
            elif self.machine == "i386":
                assert self.sysarch == 32, "Not supported"
                self._mopta_exectutable_name = "mopta08_elf32.bin"
            elif self.machine == "amd64": # For Windows potentially
                assert self.sysarch == 64, "Not supported"
                self._mopta_exectutable_name = "mopta08_amd64.exe" # Check exact name if using Windows
            else:
                raise RuntimeError(f"Machine with architecture '{self.machine}' is not supported for Mopta")

            self._mopta_exectutable = os.path.join(mopta_dir, self._mopta_exectutable_name)

            if not os.path.exists(self._mopta_exectutable):
                basename = os.path.basename(self._mopta_exectutable)
                info(f"Mopta08 executable '{basename}' not locally available. Downloading...")
                # Make sure the download URL is correct
                download_url = f"https://mopta.papenmeier.io/{basename}"
                try:
                    urllib.request.urlretrieve(download_url, self._mopta_exectutable)
                    # Set execute permission for the owner
                    os.chmod(self._mopta_exectutable, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)
                except Exception as e:
                    raise RuntimeError(f"Failed to download or set permissions for Mopta binary from {download_url}. Error: {e}")
        else:
            self._mopta_exectutable = binary_path
            if not os.path.exists(self._mopta_exectutable) or not os.access(self._mopta_exectutable, os.X_OK):
                 raise FileNotFoundError(f"Provided Mopta binary path '{self._mopta_exectutable}' not found or not executable.")

        # Temporary directory handling
        if temp_dir is None:
            # Create a unique temp dir for this instance that will be cleaned up automatically
            self.directory_file_descriptor = tempfile.TemporaryDirectory()
            self.directory_name = self.directory_file_descriptor.name
        else:
            if not os.path.exists(temp_dir):
                warning(f"Given directory '{temp_dir}' does not exist. Creating...")
                os.makedirs(temp_dir, exist_ok=True)
            self.directory_name = temp_dir
            self.directory_file_descriptor = None # Not owned by this instance


    def _call(self, x: np.ndarray):
        """ Evaluate Mopta08 benchmark for one point """
        assert x.ndim == 1
        input_file = os.path.join(self.directory_name, "input.txt")
        output_file = os.path.join(self.directory_name, "output.txt")

        try:
            # Write input to file
            with open(input_file, "w+") as tmp_file:
                for _x in x:
                    tmp_file.write(f"{_x}\n")

            # Run subprocess
            popen = subprocess.Popen(
                self._mopta_exectutable,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE, # Capture stderr
                cwd=self.directory_name,
                text=True # Ensure text mode for stdout/stderr
            )
            stdout, stderr = popen.communicate(timeout=60) # Add a timeout

            if popen.returncode != 0:
                warning(f"Mopta binary exited with error code {popen.returncode}. Stderr: {stderr}")
                return np.inf if not self._negate_output else -np.inf # Penalize errors

            # Read and parse output file
            with open(output_file, "r") as out_f:
                 output_lines = out_f.read().split("\n")

            output = [line.strip() for line in output_lines if len(line.strip()) > 0]
            output_vals = np.array([float(val) for val in output])

            if len(output_vals) == 0:
                warning(f"Mopta output file '{output_file}' was empty or contained no valid numbers.")
                return np.inf if not self._negate_output else -np.inf

            value = output_vals[0]
            constraints = output_vals[1:]
            # Apply soft constraints penalty
            penalty = 10 * np.sum(np.clip(constraints, a_min=0, a_max=None))
            result = value + penalty

            # Apply negation if needed for GRABBO maximization
            return -result if self._negate_output else result

        except subprocess.TimeoutExpired:
             warning(f"Mopta binary timed out for input.")
             return np.inf if not self._negate_output else -np.inf
        except FileNotFoundError:
            warning(f"Mopta output file '{output_file}' not found.")
            return np.inf if not self._negate_output else -np.inf
        except ValueError as e:
            warning(f"Error parsing Mopta output: {e}. Output lines: {output_lines}")
            return np.inf if not self._negate_output else -np.inf
        except Exception as e:
            warning(f"An unexpected error occurred during Mopta evaluation: {e}")
            return np.inf if not self._negate_output else -np.inf
        finally:
             # Clean up input/output files within the temp directory if they exist
            if os.path.exists(input_file): os.remove(input_file)
            if os.path.exists(output_file): os.remove(output_file)
            # Temp directory itself is handled by TemporaryDirectory context manager if created here

    def __call__(self, x):
        # Basic input validation
        x = np.array(x, dtype=np.double)
        if x.ndim == 0: x = np.expand_dims(x, 0)
        if x.ndim == 1: x = np.expand_dims(x, 0) # Ensure 2D for iteration
        assert x.ndim == 2, "Input must be convertible to a 2D array"
        assert x.shape[1] == self.dim, f"Input dimension mismatch: expected {self.dim}, got {x.shape[1]}"

        # Evaluate each point
        vals = np.array([self._call(y) for y in x]).squeeze()

        # Add noise
        noise = 0.0
        if self.noise_std > 0:
            noise_shape = vals.shape if isinstance(vals, np.ndarray) else (1,)
            noise = np.random.normal(0, self.noise_std, noise_shape)
            if not isinstance(vals, np.ndarray) and isinstance(noise, np.ndarray) and noise.size == 1:
                 noise = noise[0]

        return vals + noise

    @property
    def optimal_value(self) -> Optional[float]:
        """ Return the known optimal value, adjusted for maximization/minimization. """
        # Original Mopta is minimization, optimal is around -160 to -170.
        # If GRABBO maximizes (negate=True), return positive value.
        # -200 is a safe lower bound from the paper.
        return 200.0 if self._negate_output else -200.0

    # __del__ is not needed if using TemporaryDirectory correctly
class MoptaSoftConstraints(SyntheticBenchmark):
    """
    Mopta08 benchmark with soft constraints as described in https://arxiv.org/pdf/2103.00349.pdf
    Supports i386, x86_84, armv7l

    Args:
        temp_dir: Optional[str]: directory to which to write the input and output files (if not specified, a temporary directory will be created automatically)
        binary_path: Optional[str]: path to the binary, if not specified, the default path will be used
    """

    def __init__(
            self,
            temp_dir: Optional[str] = None,
            binary_path: Optional[str] = None,
            noise_std: Optional[float] = 0,
            **kwargs,
    ):
        super().__init__(124, np.ones(124), np.zeros(124), noise_std=noise_std)
        if binary_path is None:
            self.sysarch = 64 if sys.maxsize > 2 ** 32 else 32
            self.machine = machine().lower()
            if self.machine == "armv7l":
                assert self.sysarch == 32, "Not supported"
                self._mopta_exectutable = "mopta08_armhf.bin"
            elif self.machine == "x86_64":
                assert self.sysarch == 64, "Not supported"
                self._mopta_exectutable = "mopta08_elf64.bin"
            elif self.machine == "i386":
                assert self.sysarch == 32, "Not supported"
                self._mopta_exectutable = "mopta08_elf32.bin"
            elif self.machine == "amd64":
                assert self.sysarch == 64, "Not supported"
                self._mopta_exectutable = "mopta08_amd64.exe"
            else:
                raise RuntimeError("Machine with this architecture is not supported")
            self._mopta_exectutable = os.path.join(
                os.getcwd(), "grabbo", "benchmarks", "mopta08", self._mopta_exectutable
            )

            if not os.path.exists(self._mopta_exectutable):
                basename = os.path.basename(self._mopta_exectutable)
                info(f"Mopta08 executable for this architecture not locally available. Downloading '{basename}'...")
                urllib.request.urlretrieve(
                    f"https://mopta.papenmeier.io/{os.path.basename(self._mopta_exectutable)}",
                    self._mopta_exectutable)
                os.chmod(self._mopta_exectutable, stat.S_IXUSR)

        else:
            self._mopta_exectutable = binary_path
        if temp_dir is None:
            self.directory_file_descriptor = tempfile.TemporaryDirectory()
            self.directory_name = self.directory_file_descriptor.name
        else:
            if not os.path.exists(temp_dir):
                warning(f"Given directory '{temp_dir}' does not exist. Creating...")
                os.mkdir(temp_dir)
            self.directory_name = temp_dir

    def __call__(self, x):
        super(MoptaSoftConstraints, self).__call__(x)
        x = np.array(x)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        # create tmp dir for mopta binary

        vals = np.array([self._call(y) for y in x]).squeeze()
        return vals + np.random.normal(
            np.zeros_like(vals), np.ones_like(vals) * self.noise_std, vals.shape
        )

    def _call(self, x: np.ndarray):
        """
        Evaluate Mopta08 benchmark for one point

        Args:
            x: one input configuration

        Returns:value with soft constraints

        """
        assert x.ndim == 1
        # write input to file in dir
        with open(os.path.join(self.directory_name, "input.txt"), "w+") as tmp_file:
            for _x in x:
                tmp_file.write(f"{_x}\n")
        # pass directory as working directory to process
        popen = subprocess.Popen(
            self._mopta_exectutable,
            stdout=subprocess.PIPE,
            cwd=self.directory_name,
        )
        popen.wait()
        # read and parse output file
        output = (
            open(os.path.join(self.directory_name, "output.txt"), "r")
            .read()
            .split("\n")
        )
        output = [x.strip() for x in output]
        output = np.array([float(x) for x in output if len(x) > 0])
        value = output[0]
        constraints = output[1:]
        # see https://arxiv.org/pdf/2103.00349.pdf E.7
        return value + 10 * np.sum(np.clip(constraints, a_min=0, a_max=None))

    @property
    def optimal_value(self) -> Optional[np.ndarray]:
        """
        Return the "optimal" value.

        Returns:
            np.ndarray: -200, some guessed optimal value we never beat

        """
        return np.array(-200.0)


class LassoLeukemiaBenchmark(EffectiveDimBenchmark):
    """
    7129-D Leukemia benchmark from https://github.com/ksehic/LassoBench

    Args:
        noise_std: ignored
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):

        from LassoBench import LassoBench

        self._b: LassoBench.RealBenchmark = LassoBench.RealBenchmark(
            pick_data="leukemia", mf_opt="discrete_fidelity"
        )
        dim = self._b.n_features

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=22,
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        result = np.array(result_list).squeeze()
        return result + np.random.normal(
            np.zeros_like(result), np.ones_like(result) * self.noise_std, result.shape
        )


class LassoBreastCancerBenchmark(EffectiveDimBenchmark):
    """
    10-D breast cancer benchmark from https://github.com/ksehic/LassoBench

    Args:
        noise_std: ignored
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):

        from LassoBench import LassoBench

        self._b: LassoBench.RealBenchmark = LassoBench.RealBenchmark(
            pick_data="breast_cancer", mf_opt="discrete_fidelity"
        )
        dim = self._b.n_features

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=3,
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        result = np.array(result_list).squeeze()
        return result + np.random.normal(
            np.zeros_like(result), np.ones_like(result) * self.noise_std, result.shape
        )


class LassoDiabetesBenchmark(EffectiveDimBenchmark):
    """
   8-D diabetes benchmark from https://github.com/ksehic/LassoBench

   Args:
       noise_std: ignored
       **kwargs:
   """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):

        from LassoBench import LassoBench

        self._b: LassoBench.RealBenchmark = LassoBench.RealBenchmark(
            pick_data="diabetes", mf_opt="discrete_fidelity"
        )
        dim = self._b.n_features

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=5,
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        result = np.array(result_list).squeeze()
        return result + np.random.normal(
            np.zeros_like(result), np.ones_like(result) * self.noise_std, result.shape
        )


class LassoDNABenchmark(EffectiveDimBenchmark):
    """
    180-D DNA benchmark from https://github.com/ksehic/LassoBench

    Args:
        noise_std: ignored
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):

        from LassoBench import LassoBench

        self._b: LassoBench.RealBenchmark = LassoBench.RealBenchmark(
            pick_data="dna", mf_opt="discrete_fidelity"
        )
        dim = self._b.n_features

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=43,
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        result = np.array(result_list).squeeze()
        return result + np.random.normal(
            np.zeros_like(result), np.ones_like(result) * self.noise_std, result.shape
        )


class LassoRCV1Benchmark(EffectiveDimBenchmark):
    """
    19 959-D RCV1 benchmark from https://github.com/ksehic/LassoBench

    Args:
        noise_std: ignored
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):

        from LassoBench import LassoBench

        self._b: LassoBench.RealBenchmark = LassoBench.RealBenchmark(
            pick_data="rcv1", mf_opt="discrete_fidelity"
        )
        dim = self._b.n_features

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=75,
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        result = np.array(result_list).squeeze()
        return result + np.random.normal(
            np.zeros_like(result), np.ones_like(result) * self.noise_std, result.shape
        )


class LassoSimpleBenchmark(EffectiveDimBenchmark):
    """
    60-D synthetic Lasso simple benchmark from https://github.com/ksehic/LassoBench .
    Effective dimensionality: 5% of input dimensionality.

    Args:
        noise_std: if > 0: noisy version with fixed SNR, noiseless version otherwise
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):

        from LassoBench import LassoBench

        if noise_std > 0:
            warning(
                f"LassoBenchmark with noise_std {noise_std} chosen. Will use noisy version with snr ratio 10. The exact value of noise_std will be ignored."
            )
        self._b: LassoBench.SyntheticBenchmark = LassoBench.SyntheticBenchmark(
            pick_bench="synt_simple", noise=noise_std > 0
        )
        dim = self._b.n_features

        self.effective_dims = np.arange(dim)[self._b.w_true != 0]
        info(f"function effective dimensions: {self.effective_dims.tolist()}")

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=len(self.effective_dims),
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        return np.array(result_list).squeeze()


class LassoMediumBenchmark(EffectiveDimBenchmark):
    """
    100-D synthetic Lasso medium benchmark from https://github.com/ksehic/LassoBench .
    Effective dimensionality: 5% of input dimensionality.

    Args:
        noise_std: if > 0: noisy version with fixed SNR, noiseless version otherwise
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):
        from LassoBench import LassoBench

        if noise_std > 0:
            warning(
                f"LassoBenchmark with noise_std {noise_std} chosen. Will use noisy version with snr ratio 10. The exact value of noise_std will be ignored."
            )
        self._b: LassoBench.SyntheticBenchmark = LassoBench.SyntheticBenchmark(
            pick_bench="synt_medium", noise=noise_std > 0
        )
        dim = self._b.n_features

        self.effective_dims = np.arange(dim)[self._b.w_true != 0]
        info(f"function effective dimensions: {self.effective_dims.tolist()}")

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=len(self.effective_dims),
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        return np.array(result_list).squeeze()


class LassoHighBenchmark(EffectiveDimBenchmark):
    """
    300-D synthetic Lasso high benchmark from https://github.com/ksehic/LassoBench .
    Effective dimensionality: 5% of input dimensionality.

    Args:
        noise_std: if > 0: noisy version with fixed SNR, noiseless version otherwise
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):
        from LassoBench import LassoBench

        if noise_std > 0:
            warning(
                f"LassoBenchmark with noise_std {noise_std} chosen. Will use noisy version with snr ratio 10. The exact value of noise_std will be ignored."
            )
        self._b: LassoBench.SyntheticBenchmark = LassoBench.SyntheticBenchmark(
            pick_bench="synt_high", noise=noise_std > 0
        )
        dim = self._b.n_features

        self.effective_dims = np.arange(dim)[self._b.w_true != 0]
        info(f"function effective dimensions: {self.effective_dims.tolist()}")

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=len(self.effective_dims),
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        return np.array(result_list).squeeze()


class LassoHardBenchmark(EffectiveDimBenchmark):
    """
    1000-D synthetic Lasso hard benchmark from https://github.com/ksehic/LassoBench .
    Effective dimensionality: 5% of input dimensionality.

    Args:
        noise_std: if > 0: noisy version with fixed SNR, noiseless version otherwise
        **kwargs:
    """

    def __init__(self, noise_std: Optional[float] = 0, **kwargs):
        from LassoBench import LassoBench

        if noise_std > 0:
            warning(
                f"LassoBenchmark with noise_std {noise_std} chosen. Will use noisy version with snr ratio 10. The exact value of noise_std will be ignored."
            )
        self._b: LassoBench.SyntheticBenchmark = LassoBench.SyntheticBenchmark(
            pick_bench="synt_hard", noise=noise_std > 0
        )
        dim = self._b.n_features

        self.effective_dims = np.arange(dim)[self._b.w_true != 0]
        info(f"function effective dimensions: {self.effective_dims.tolist()}")

        super().__init__(
            dim=dim,
            ub=np.full(dim, fill_value=1.0),
            lb=np.full(dim, fill_value=-1.0),
            effective_dim=len(self.effective_dims),
            noise_std=noise_std,
        )

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        x = np.array(x, dtype=np.double)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        result_list = []
        for y in x:
            result = self._b.evaluate(y)
            result_list.append(result)
        return np.array(result_list).squeeze()


class SVMBenchmark(SyntheticBenchmark):
    def __init__(
            self,
            data_folder: Optional[str] = None,
            noise_std: Optional[float] = 0,
            **kwargs,
    ):
        """
        SVM Benchmark from https://arxiv.org/abs/2103.00349

        Support also a noisy version where the model is trained on random subset of 250 points
        which is used whenever noise_std is greater than 0.

        Args:
            data_folder: the folder where the slice_localization_data.csv is located
            noise_std: noise standard deviation. Anything greater than 0 will lead to a noisy benchmark
            **kwargs:
        """
        self.value = np.inf
        self.best_config = None
        self.noisy = noise_std > 0
        if self.noisy:
            warning("Using a noisy version of SVMBenchmark where training happens on a random subset of 250 points."
                    "However, the exact value of noise_std is ignored.")
        super(SVMBenchmark, self).__init__(
            388, lb=np.zeros(388), ub=np.ones(388), noise_std=noise_std
        )
        self.X, self.y = self._load_data(data_folder)
        if not self.noisy:
            np.random.seed(388)
            idxs = np.random.choice(np.arange(len(self.X)), min(10000, len(self.X)), replace=False)
            half = len(idxs) // 2
            self._X_train = self.X[idxs[:half]]
            self._X_test = self.X[idxs[half:]]
            self._y_train = self.y[idxs[:half]]
            self._y_test = self.y[idxs[half:]]

    def _load_data(self, data_folder: Optional[str] = None):
        if data_folder is None:
            data_folder = os.path.join(os.getcwd(), "data")
        if not os.path.exists(os.path.join(data_folder, "CT_slice_X.npy")):
            sld_dir = os.path.join(data_folder, "slice_localization_data.csv.xz")
            sld_bn = os.path.basename(sld_dir)
            info(f"Slice localization data not locally available. Downloading '{sld_bn}'...")
            urllib.request.urlretrieve(
                f"http://mopta-executables.s3-website.eu-north-1.amazonaws.com/{sld_bn}",
                sld_dir)
            data = pd.read_csv(
                os.path.join(data_folder, "slice_localization_data.csv.xz")
            ).to_numpy()
            X = data[:, :385]
            y = data[:, -1]
            np.save(os.path.join(data_folder, "CT_slice_X.npy"), X)
            np.save(os.path.join(data_folder, "CT_slice_y.npy"), y)
        X = np.load(os.path.join(data_folder, "CT_slice_X.npy"))
        y = np.load(os.path.join(data_folder, "CT_slice_y.npy"))
        X = MinMaxScaler().fit_transform(X)
        y = MinMaxScaler().fit_transform(y.reshape(-1, 1)).squeeze()
        return X, y

    def __call__(self, x: Union[np.ndarray, List[float], List[List[float]]]):
        super(SVMBenchmark, self).__call__(x)
        x = np.array(x)
        if x.ndim == 0:
            x = np.expand_dims(x, 0)
        if x.ndim == 1:
            x = np.expand_dims(x, 0)
        assert x.ndim == 2
        x = x ** 2

        errors = []
        for y in x:
            C = 0.01 * (500 ** y[387])
            gamma = 0.1 * (30 ** y[386])
            epsilon = 0.01 * (100 ** y[385])
            length_scales = np.exp(4 * y[:385] - 2)

            svr = SVR(gamma=gamma, epsilon=epsilon, C=C, cache_size=1500, tol=0.001)
            if self.noisy:
                np.random.seed(None)
                idxs = np.random.choice(np.arange(len(self.X)), min(500, len(self.X)), replace=False)
                half = len(idxs) // 2
                X_train = self.X[idxs[:half]]
                X_test = self.X[idxs[half:]]
                y_train = self.y[idxs[:half]]
                y_test = self.y[idxs[half:]]
                svr.fit(X_train / length_scales, y_train)
                pred = svr.predict(X_test / length_scales)
                error = np.sqrt(np.mean(np.square(pred - y_test)))
            else:
                svr.fit(self._X_train / length_scales, self._y_train)
                pred = svr.predict(self._X_test / length_scales)
                error = np.sqrt(np.mean(np.square(pred - self._y_test)))

            errors.append(error)
            if errors[-1] < self.value:
                self.best_config = np.log(y)
                self.value = errors[-1]
        return np.array(errors).squeeze()
