import numpy as np
from typing import Union, Tuple, Dict, List, Callable
from copy import deepcopy
import types

from envs.env import Env
from utils import check_for_allowed_dist, sample_from_dist, get_numpy_distribution_mean, adjust_distribution_kwargs_to_bound_mean_inplace, adjust_mean_lambda_and_args

class GridWorld(Env):
    def __init__(self,
                 grid_size: Tuple[int,int] = (4,4), # Default grid size is 4x4
                 state_type_loc: Dict[str,Tuple[List[Tuple[int,int]],bool]] = None, # The dictionary mapping state names to their locations and if they are terminal
                 rewards: Dict[str,Union[int,float,List[Union[str,Dict]]]] = None, # Rewards for various state types
                 hovering = True, # Whether the player is allowed to choose to bump into the wall and by doing so hover in place
                 windy: bool = False, # Whether there is wind affecting movement
                 wind_prob: float = 0.25, # Probability of wind occurring
                 wind_dir: str = "down", # Direction of the wind (up, right, down, left)
                 slippery: bool = False, # Whether the grid is slippery
                 slip_prob: float = 0.1, # Probability of slipping
                 random_actions: bool = False, # Whether random actions occur
                 random_prob: float = 0.1, # Probability of taking a random action
                 random_vec: List[float] = None, # Probability vector for random actions
                 rng_seed: int = 42, # Random number generator seed
                 randomization_seed: int = 42, # Randomization random number generator seed
                 checks: str = "all_checks", # Should all checks, only initial checks, or no checks be performed
                 ) -> None:
        
        """
        Initializes the Gridworld environment with the provided parameters. The agent moves in a grid. In each
        step it can move up, right, down, or left, but without exiting the perimeter of the grid. If hovering is off
        the agent can not bump into the wall, meaning near a wall the allowed action space is limited. It starts from
        a specified state and when it reaches a goal state or a losing state the game terminates and begins again
        from the starting state. There may be wind pushing the agent in a direction instead of the intended one,
        the grid may be frozen and slippery, in which case the agent may accidentally do an adjacent action instead
        of the intended one, or there may be a probability of choosing a random action instead of the intended one.
        In the case of windy or slippery and hovering deactivated, the agent may still hover in a place if the wind or 
        the slippery action would push it over the edge of the grid. In the case of random action activated but hovering 
        deactivated the environment chooses a random action based off the passed random vector, but refactored.

        Parameters:
        - grid_size (tuple): Size of the grid, given as (rows, columns). Default is (4, 4).
        - state_type_loc (dict): A dictionary mapping state types to locations and terminal status. 
          Each entry is a tuple where the first element is a list of coordinates for that state type, 
          and the second element is a boolean indicating whether the state is terminal. Needs to contain
          locations of goal and start. Goal must be terminal, while start can not be terminal.
          Commonly used state types are:
          - goal: Highest reward, we want to find this state. Must be specified and is terminal.
          - start: Where the game starts. Must be specified.
          - hole: Hole in the grid with high negative reward. Typically terminal.
          - false_goal: Low positive reward, seems like a goal but is not. Typically terminal.
          - stoch_goal: Stochastic goal state with high reward. Alternative to goal. Typically terminal.
          - stoch: Stochastic region.
        - rewards (dict): A dictionary mapping state types to rewards. The special key "default" is used for 
          all states not specified in the dictionary. Some states may have stochastic rewards represented 
          by a distribution (e.g., normal). In this case a list containing the distribution name and a
          dictionary of keyword arguments compatible with the numpy random generator need to be passed. 
          Additionally, the mean of the distribution may be added as a third element of the list.
        - hovering (bool): Wheter the player is allowed to bump into the wall and thus hover in the same place.
        - windy (bool): Whether wind is applied to the environment. Default is False.
        - wind_prob (float): The probability that wind will affect the environment in each step. Default is 0.25.
        - wind_dir (str): Direction of the wind as one of the following: "up", "right", "down", "left". Default is "down"
        - slippery (bool): Whether the environment is slippery, causing random movement adjacent to action. Default is False.
        - slip_prob (float): Probability that a random slip occurs in a slippery environment. Default is 0.1.
        - random_actions (bool): Whether random actions can be taken instead of the chosen action. Default is False.
        - random_prob (float): Probability of taking a random action. Default is 0.1.
        - random_vec (list): A list of probabilities for each action when taking random actions. Default is evenly distributed.
        - rng_seed (int): Seed for the random number generator. Default is 42.
        - randomization_seed (int): Seed for the randomization random number generator. Default is 42.
        - checks (str): Checking mode. "all_checks", "only_initial_checks", and "no_checks" is available.
        """

        # Default arguments if arguments are None
        if state_type_loc is None:
            state_type_loc = { 
                     "goal": ([(3,3)],True), 
                     "start": ([(0,0)],False), 
                     "hole": ([(2,2),(2,1)],True),
                     "stoch": ([(1,1)],False)}
        if rewards is None:
            rewards = {
                     "goal": 8.5,
                     "hole": -10,
                     "stoch": ["normal",{"loc":0, "scale":1}],
                     "default": -1}
        if random_vec is None:
            random_vec = [1/4,1/4,1/4,1/4]

        # Standard initialization of arguments from the input
        self.grid_size = grid_size 
        self.state_type_loc = deepcopy(state_type_loc) # Modified if env is randomized
        self.rewards = deepcopy(rewards) # Modified if env is randomized
        self.hovering = hovering
        self.windy = windy
        self.wind_prob = wind_prob
        self.slippery = slippery
        self.slip_prob = slip_prob
        self.rng_seed = rng_seed
        self.randomization_seed = randomization_seed
        self.wind_dir = wind_dir
        self.random_actions = random_actions
        self.random_prob = random_prob   
        self.random_vec = deepcopy(random_vec) #  Modified if env is randomized
        if isinstance(checks,str):
            if not (checks == "all_checks" or checks == "no_checks" or checks == "only_initial_checks"):
                raise ValueError("Checks needs to be either all_checks, no_checks, or only_initial_checks")
        else:
            raise TypeError("Flag for type and value checking must be a string!")
        self.checks = checks   
        self.init_check1_done = False
        self.init_check2_done = False
        self.init_check3_done = False
        self.init_check4_done = False
        self.original_positions_fixed_done = False
        self.printed_impossibility_warning = False
        self.printed_max_tries_exhausted_warning = False

        # Basic initialization for input checks
        self.rng = np.random.default_rng(seed = self.rng_seed) # Random number generator for stochastic draws
        self.randenv_rng = np.random.default_rng(seed = self.randomization_seed) # Random number generator for Environment randomization

        # Checking input constraints
        if self.checks != "no_checks":
            self.inputcheck()

        # Advanced initializations
        self.num_states = grid_size[0]*grid_size[1] # Total number of states (grid size)
        self.num_actions = 4 # Number of actions (up, down, left, right)
        self.wind_dir_num = self.action_words_to_nums(self.wind_dir) # Convert wind direction to numerical representation
        self.reset_states_num = [
            self.coord_to_state(coord) # Convert coordinates of reset states to state numbers
            for key in self.state_type_loc.keys()
            for coord in self.state_type_loc[key][0]
            if self.state_type_loc[key][1] # Only include terminal states
        ]
        self.start_state_num = self.coord_to_state(self.state_type_loc["start"][0][0]) # Convert start state coordinates to state number
        self.state_action_rewards_dict = {} # Dictionary for state rewards
        for act in [0,1,2,3]:
            for key in self.rewards.keys():
                if key == "default":
                    pass
                elif key in self.state_type_loc.keys():
                    for coord in self.state_type_loc[key][0]:
                        self.state_action_rewards_dict[(self.coord_to_state(coord),act)] = self.rewards[key] # Set reward for each state
        self.default_reward = self.rewards["default"] # Default reward for non-special states
        self.allowed_actions = {}
        for state in range(self.num_states):
            if hovering:
                if state in self.reset_states_num:
                    self.allowed_actions[state] = [0]
                else:
                    self.allowed_actions[state] = [0,1,2,3]
            else:
                if state == 0:
                    self.allowed_actions[state] = [1,2]
                elif state in self.reset_states_num:
                    self.allowed_actions[state] = [0]
                elif 0 < state < self.grid_size[1] - 1:
                    self.allowed_actions[state] = [1,2,3]
                elif state == self.grid_size[1] - 1:
                    self.allowed_actions[state] = [2,3]
                elif state % self.grid_size[1] == 0 and state != (self.grid_size[1] * (self.grid_size[0] - 1)):
                    self.allowed_actions[state] = [0,1,2]
                elif state == (self.grid_size[1] * (self.grid_size[0] - 1)):
                    self.allowed_actions[state] = [0,1]
                elif (state + 1) % self.grid_size[1] == 0 and (state + 1) != (self.grid_size[1] * self.grid_size[0]):
                    self.allowed_actions[state] = [0,2,3]
                elif state == self.num_states - 1:
                    self.allowed_actions[state] = [0,3]
                elif self.num_states - self.grid_size[1] < state < self.num_states - 1:
                    self.allowed_actions[state] = [0,1,3]
                else:
                    self.allowed_actions[state] = [0,1,2,3]
        game_probs = {(next_state,state,action):0 for next_state in range(self.num_states) for state in range(self.num_states) for action in self.allowed_actions[state] if not (state in self.reset_states_num)}
        for state in range(self.num_states):
            if state in self.reset_states_num:
                pass
            else:
                for action in self.allowed_actions[state]:
                    next_state = self.get_next_state_det(state,action)
                    if not (self.windy or self.slippery or self.random_actions):
                        game_probs[(next_state,state,action)] = 1
                    elif self.windy:
                        game_probs[(next_state,state,action)] = 1 - self.wind_prob
                        if self.wind_dir_num in self.allowed_actions[state]:
                            next_state_windy = self.get_next_state_det(state,self.wind_dir_num)
                        else:
                            next_state_windy = state
                        game_probs[(next_state_windy,state,action)] += self.wind_prob
                    elif self.slippery:
                        game_probs[(next_state,state,action)] = 1 - 2/3 * self.slip_prob
                        next_state_slip1 = self.get_next_state_det(state , (action + 1) % self.num_actions)
                        next_state_slip2 = self.get_next_state_det(state, (action - 1) % self.num_actions)
                        game_probs[(next_state_slip1,state,action)] += 1/3 * self.slip_prob
                        game_probs[(next_state_slip2,state,action)] += 1/3 * self.slip_prob
                    elif self.random_actions:
                        game_probs[(next_state,state,action)] = 1 - self.random_prob
                        allowed_indices_prob = [self.random_vec[act] for act in self.allowed_actions[state]]
                        factor = sum(allowed_indices_prob)
                        probs = [val * self.random_prob/factor for val in allowed_indices_prob]
                        for i,a in enumerate(self.allowed_actions[state]):
                            next_state_random_act = self.get_next_state_det(state,a)
                            game_probs[(next_state_random_act,state,action)] += probs[i]
        self.game_probabilities = {key: val for key,val in game_probs.items() if val != 0}
    
    def randomize_environment(self,check_goal_is_goal: bool = True, discounted_reward_goal_limit: float = 0.95, gamma: float = 0.99, reward_normalization: bool = True,reward_normalization_factor_for_negatives: Union[int,float,Callable,str] = "standard",reward_normalization_num_tries: int = -1,**randomization_kwargs: Dict) -> Dict: 
        '''
        Randomizes the parameters of the GridWorld environment according to the passed parameters.
        No comprehensive sanity checks for a sensible environment generation are in place, so use this 
        feature with caution! Returns the new game parameters. 

        Parameters:
        - check_goal_is_goal (bool): If True, a check is performed that the state labeled "goal" indeed
          has the highest (discounted) average reward. States' rewards, which have higher means after being
          drawn will be shifted downwards.
        - discounted_reward_goal_limit (float): The maximum ratio of reward to discounted goal value that
          states can get assigned. This value will be used to determine the downwards shift of reward means.
        - gamma (float): The discounting factor.
        - reward_normalization (bool): If True, the scores on the optimum path to the goal are normalized
          to give a discounted sum of 1. If this is not possible, warnings will be displayed.
        - reward_normalization_factor_for_negatives (float,int,callable,str): If reward_normalization is
          on, but the discounted sum of rewards on the best path is negative, the negative values need to 
          be scaled first such that a positive reward is achieved. This parameter decides how they get scaled.
          If it is numerical, they will be scaled by the amount presented. If it is a lambda function, the
          sum of discounted rewards as an input will determine the scaling factor. If it is the string
          'default', the absolute value of the sum of discounted rewards plus one will be the scaling factor.
        - reward_normalization_num_tries (int): The number of maximum tries for reward normalization if the
          discounted sum of rewards on the best path is negative. If it is -1, no maximum number of tries 
          is specified.
        - randomization_kwargs (dict): A dictionary that may contain the following objects:
          - randomize_rewards_kwargs (dict): A dictionary containing state names present in the original
            game as keys mapping to a tuple. Its first element specifies if it should be a random reward 
            after drawing (True) or not (False). In case not random was selected the second element of the 
            tuple should specify how to draw the deterministic reward in the format as described in the 
            init-function. In case random was assigned, you should specify how to draw the reward in the 
            format as described in the init-function, but instead of any values you may write strings, in which
            case you also need to pass the "codenames" dict, which maps theses strings to a method of how to draw 
            the value they represent. In this case, additionally the mean should be added to the tuple for faster 
            computing. If it is dependent on some drawn constant, you can use the alias from the "codenames" dict. 
          - codenames (dict): A dictionary specifying random drawings for the randomly drawn numbers in 
            randomize_rewards_kwargs. It should use the same codewords as contained in the randomize_rewards_kwargs 
            dictionary.
          - randomize_locations_kwargs (dict): A dictionary containing valid state names as keys mapping 
            to a tuple. The first entry specifies the number of states of this type to be drawn and the second
            the locations from which it can be drawn. The number of states can be either a fixed number or a
            random drawing specification in the format described in the init-function. The locations to be
            drawn from need to be passed as either a list of state numbers (where in case the grid does not allow 
            for this many states, the rest of the states are ejected), a tuple of tuples of row and column, delimiting 
            an area (upper left to lower right point of the Grid, starting with state 0 in the upper left corner), or
            the string "all" (meaning randomly from all states). "Goal" and "Start" will be chosen first. Then any state
            that does not undergo randomization is set if the new grid size allows it and then all other random state
            locations will be drawn in the order of their appearance in the dictionary.
          - randomize_gridsize_kwargs (tuple): A tuple containing instructions on how to randomly draw the components of 
            the grid_size parameter (first and second entry) in the format as described in the init-function. If this is
            activated, randomize_locations_kwargs needs to be specified as well.
          - randomize_game_modifications_kwargs (dict): A dictionary containing information regarding the arguments 
            hovering, windy, wind_prob, wind_dir, slippery, slip_prob, random_actions, random_prob, and random_vec. It 
            maps these to lists following the same conventions as in randomize_rewards_kwargs, with random_prob being a
            list of 4 of those.
        
        Returns:
        - new_parameter_dict (dict): A dictionary containing the new parameters that were generated.
        '''

        # Default arguments if arguments are None
        if randomization_kwargs is None:
            randomization_kwargs = {}

        # Checking input constraints
        if self.checks != "no_checks" and not self.init_check4_done:
            self.randomization_inputcheck(check_goal_is_goal=check_goal_is_goal,discounted_reward_goal_limit=discounted_reward_goal_limit,gamma=gamma,reward_normalization=reward_normalization,reward_normalization_factor_for_negatives=reward_normalization_factor_for_negatives,reward_normalization_num_tries=reward_normalization_num_tries,**randomization_kwargs)
            if self.checks == "only_initial_check":
                self.init_check4_done = True

        # If grid_size should be randomized, draw new grid_size 
        if "randomize_gridsize_kwargs" in randomization_kwargs.keys():
            if isinstance(randomization_kwargs["randomize_gridsize_kwargs"][0],list):
                new_grid_size_comp1 = int(np.ceil(np.abs(sample_from_dist(self.randenv_rng,randomization_kwargs["randomize_gridsize_kwargs"][0][0],1,**randomization_kwargs["randomize_gridsize_kwargs"][0][1])[0])))
            else:
                new_grid_size_comp1 = randomization_kwargs["randomize_gridsize_kwargs"][0]
            if new_grid_size_comp1 == 0:
                new_grid_size_comp1 = 1
            if isinstance(randomization_kwargs["randomize_gridsize_kwargs"][1],list):
                new_grid_size_comp2 = int(np.ceil(np.abs(sample_from_dist(self.randenv_rng,randomization_kwargs["randomize_gridsize_kwargs"][1][0],1,**randomization_kwargs["randomize_gridsize_kwargs"][1][1])[0])))
            else:
                new_grid_size_comp2 = randomization_kwargs["randomize_gridsize_kwargs"][1]
            if new_grid_size_comp1 == 1:
                if new_grid_size_comp2 < 2:
                    new_grid_size_comp2 = 2
            if new_grid_size_comp2 == 0:
                new_grid_size_comp2 = 1
            new_grid_size = (new_grid_size_comp1,new_grid_size_comp2)
        else:
            new_grid_size = self.grid_size

        # If state_type_loc should be randomized draw new locations for state types.
        if "randomize_locations_kwargs" in randomization_kwargs.keys():

            # Initialize dictionary with relevant original positions at the beginning, new_state_type_loc, and the occupied coordinates
            if not self.original_positions_fixed_done:
                self.relevant_original_positions = {"goal":[(deepcopy(self.state_type_loc["goal"][0][0][0]),deepcopy(self.state_type_loc["goal"][0][0][1]))],
                                                    "start":[(deepcopy(self.state_type_loc["start"][0][0][0]),deepcopy(self.state_type_loc["start"][0][0][1]))]}
            new_state_type_loc = {}
            occupied_coordinates = []

            # Get the new goal coordinates. If outside of Grid, put it at bottom right.
            if "goal" in randomization_kwargs["randomize_locations_kwargs"].keys():
                if randomization_kwargs["randomize_locations_kwargs"]["goal"][1] == "all":
                    new_0_coord_goal = int(self.randenv_rng.choice(range(new_grid_size[0])))
                    new_1_coord_goal = int(self.randenv_rng.choice(range(new_grid_size[1])))
                elif isinstance(randomization_kwargs["randomize_locations_kwargs"]["goal"][1],list):
                    list_to_choose_from = [item for item in randomization_kwargs["randomize_locations_kwargs"]["goal"][1] if item < new_grid_size[0]*new_grid_size[1]]
                    if len(list_to_choose_from) > 0:
                        chosen_state = int(self.randenv_rng.choice(list_to_choose_from))
                        new_0_coord_goal = int(np.floor(chosen_state / new_grid_size[1]))
                        new_1_coord_goal = chosen_state % new_grid_size[1]
                    else:
                        new_0_coord_goal = new_grid_size[0] - 1
                        new_1_coord_goal = new_grid_size[1] - 1
                else:
                    list_to_choose_from = []
                    for coord0 in range(randomization_kwargs["randomize_locations_kwargs"]["goal"][1][0][0],randomization_kwargs["randomize_locations_kwargs"]["goal"][1][1][0]+1):
                        if coord0 < new_grid_size[0]:
                            for coord1 in range(randomization_kwargs["randomize_locations_kwargs"]["goal"][1][0][1],randomization_kwargs["randomize_locations_kwargs"]["goal"][1][1][1]+1):
                                if coord1 < new_grid_size[1]:
                                    list_to_choose_from.append((coord0,coord1))
                    if len(list_to_choose_from) > 0:
                        chosen_coords = self.randenv_rng.choice(list_to_choose_from)
                        new_0_coord_goal = int(chosen_coords[0])
                        new_1_coord_goal = int(chosen_coords[1])
                    else:
                        new_0_coord_goal = new_grid_size[0] - 1
                        new_1_coord_goal = new_grid_size[1] - 1
                new_state_type_loc["goal"] = ([(new_0_coord_goal,new_1_coord_goal)],True)
                occupied_coordinates.append((new_0_coord_goal,new_1_coord_goal))
            elif "randomize_gridsize_kwargs" in randomization_kwargs.keys():
                if self.relevant_original_positions["goal"][0][0] >= new_grid_size[0]:
                    new_0_coord_goal = new_grid_size[0] - 1
                    if self.relevant_original_positions["goal"][0][1] >= new_grid_size[1]:
                        new_1_coord_goal = new_grid_size[1] - 1
                    else:
                        new_1_coord_goal = self.relevant_original_positions["goal"][0][1]
                else:
                    new_0_coord_goal = self.relevant_original_positions["goal"][0][0]
                    if self.relevant_original_positions["goal"][0][1] >= new_grid_size[1]:
                        new_1_coord_goal = new_grid_size[1] - 1
                    else:
                        new_1_coord_goal = self.relevant_original_positions["goal"][0][1]
                new_state_type_loc["goal"] = ([(new_0_coord_goal,new_1_coord_goal)],True)
                occupied_coordinates.append((new_0_coord_goal,new_1_coord_goal))
            else: 
                new_state_type_loc["goal"] = self.state_type_loc["goal"]
                occupied_coordinates.append((new_state_type_loc["goal"][0][0][0],new_state_type_loc["goal"][0][0][1]))

            # Get the new start coordinates. If outside of Grid, or coincides with Goal, put it at top left (or right of top left if Goal is top left).
            if "start" in randomization_kwargs["randomize_locations_kwargs"].keys():
                if randomization_kwargs["randomize_locations_kwargs"]["start"][1] == "all":
                    new_0_coord_start = int(self.randenv_rng.choice(range(new_grid_size[0])))
                    new_1_coord_start = int(self.randenv_rng.choice(range(new_grid_size[1])))
                elif isinstance(randomization_kwargs["randomize_locations_kwargs"]["start"][1],list):
                    list_to_choose_from = [item for item in randomization_kwargs["randomize_locations_kwargs"]["start"][1] if item < new_grid_size[0]*new_grid_size[1]]
                    if len(list_to_choose_from) > 0:
                        chosen_state = int(self.randenv_rng.choice(list_to_choose_from))
                        new_0_coord_start = int(np.floor(chosen_state / new_grid_size[1]))
                        new_1_coord_start = chosen_state % new_grid_size[1]
                    else:
                        new_0_coord_goal = 0
                        new_1_coord_goal = 0
                else:
                    list_to_choose_from = []
                    for coord0 in range(randomization_kwargs["randomize_locations_kwargs"]["start"][1][0][0],randomization_kwargs["randomize_locations_kwargs"]["start"][1][1][0]+1):
                        if coord0 < new_grid_size[0]:
                            for coord1 in range(randomization_kwargs["randomize_locations_kwargs"]["start"][1][0][1],randomization_kwargs["randomize_locations_kwargs"]["start"][1][1][1]+1):
                                if coord1 < new_grid_size[1]:
                                    list_to_choose_from.append((coord0,coord1))
                    if len(list_to_choose_from) > 0:
                        chosen_coords = self.randenv_rng.choice(list_to_choose_from)
                        new_0_coord_start = int(chosen_coords[0])
                        new_1_coord_start = int(chosen_coords[1])
                    else:
                        new_0_coord_start = 0
                        new_1_coord_start = 0
                if (new_0_coord_start,new_1_coord_start) == new_state_type_loc["goal"][0][0]:
                    if new_state_type_loc["goal"][0][0] != (0,0):
                        new_0_coord_start = 0
                        new_1_coord_start = 0
                    else:
                        new_0_coord_start = int(np.min([1,new_grid_size[0]-1]))
                        if new_0_coord_start == 1:
                            new_1_coord_start = 0
                        else:
                            new_1_coord_start = 1
                new_state_type_loc["start"] = ([(new_0_coord_start,new_1_coord_start)],False)
                occupied_coordinates.append((new_0_coord_start,new_1_coord_start))
            elif "randomize_gridsize_kwargs" in randomization_kwargs.keys():
                if self.relevant_original_positions["start"][0][0] >= new_grid_size[0]:
                    new_0_coord_start = 0
                    if self.relevant_original_positions["start"][0][1] >= new_grid_size[1]:
                        new_1_coord_start = 0
                    else:
                        new_1_coord_start = self.relevant_original_positions["start"][0][1]
                else:
                    new_0_coord_start = self.relevant_original_positions["start"][0][0]
                    if self.relevant_original_positions["start"][0][1] >= new_grid_size[1]:
                        new_1_coord_start = 0
                    else:
                        new_1_coord_start = self.relevant_original_positions["start"][0][1]
                if (new_0_coord_start,new_1_coord_start) == new_state_type_loc["goal"][0][0]:
                    if new_state_type_loc["goal"][0][0] != (0,0):
                        new_0_coord_start = 0
                        new_1_coord_start = 0
                    else:
                        new_0_coord_start = int(np.min([1,new_grid_size[0]-1]))
                        if new_0_coord_start == 1:
                            new_1_coord_start = 0
                        else:
                            new_1_coord_start = 1
                new_state_type_loc["start"] = ([(new_0_coord_start,new_1_coord_start)],False)
                occupied_coordinates.append((new_0_coord_start,new_1_coord_start))
            else:
                new_state_type_loc["start"] = self.state_type_loc["start"]
                occupied_coordinates.append((new_state_type_loc["start"][0][0][0],new_state_type_loc["start"][0][0][1]))

            # If not logged, get original positions of non-randomized states
            if not self.original_positions_fixed_done:
                for key in self.state_type_loc.keys():
                    if key == "goal" or key == "start":
                        pass
                    elif key in randomization_kwargs["randomize_locations_kwargs"].keys():
                        self.relevant_original_positions[key] = ["True" if self.state_type_loc[key][1] else "False"]
                    else:
                        self.relevant_original_positions[key] = deepcopy(self.state_type_loc[key][0])
                        self.relevant_original_positions[key].append("True" if self.state_type_loc[key][1] else "False")
                self.original_positions_fixed_done = True
            
            # Set positions of non-randomized states via original states except for if they fall on Goal or Start or are outside the newly drawn Grid
            for key in self.relevant_original_positions.keys():
                if key == "goal" or key == "start":
                    pass
                elif len(self.relevant_original_positions[key]) == 1:
                    pass
                else:
                    if "randomize_gridsize_kwargs" in randomization_kwargs.keys():
                        new_positions_list = []
                        for coord_position in self.relevant_original_positions[key]:
                            if not isinstance(coord_position,str):
                                if coord_position[0] < new_grid_size[0] and coord_position[1] < new_grid_size[1]: 
                                    if not(coord_position in occupied_coordinates):
                                        new_positions_list.append(coord_position)
                                        occupied_coordinates.append(coord_position)
                        if len(new_positions_list) > 0:
                            new_state_type_loc[key] = (new_positions_list,True if self.relevant_original_positions[key][-1]=="True" else False)
                    else:
                        new_positions_list = []
                        for coord_position in self.relevant_original_positions[key]:
                            if not isinstance(coord_position,str): 
                                if not(coord_position in occupied_coordinates):
                                    new_positions_list.append(coord_position)
                                    occupied_coordinates.append(coord_position)
                        if len(new_positions_list) > 0:
                            new_state_type_loc[key] = (new_positions_list,True if self.relevant_original_positions[key][-1]=="True" else False)

            # Set positions of randomized states except for if the fall on positions that are already taken
            for key in self.relevant_original_positions.keys():
                if key == "goal" or key == "start":
                    pass
                elif len(self.relevant_original_positions[key]) == 1:
                    if isinstance(randomization_kwargs["randomize_locations_kwargs"][key][0],int):
                        num_states_to_draw = randomization_kwargs["randomize_locations_kwargs"][key][0]
                    else:
                        num_states_to_draw = int(np.floor(np.abs(sample_from_dist(self.randenv_rng,randomization_kwargs["randomize_locations_kwargs"][key][0][0],1,**randomization_kwargs["randomize_locations_kwargs"][key][0][1])[0])))
                    new_positions_list = []
                    for _ in range(num_states_to_draw):
                        set_coord = False
                        if randomization_kwargs["randomize_locations_kwargs"][key][1] == "all":
                            new_0_coord = int(self.randenv_rng.choice(range(new_grid_size[0])))
                            new_1_coord = int(self.randenv_rng.choice(range(new_grid_size[1])))
                            set_coord = True
                        elif isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1],list):
                            list_to_choose_from = [item for item in randomization_kwargs["randomize_locations_kwargs"][key][1] if item < new_grid_size[0]*new_grid_size[1]]
                            if len(list_to_choose_from) > 0:
                                chosen_state = int(self.randenv_rng.choice(list_to_choose_from))
                                new_0_coord = int(np.floor(chosen_state / new_grid_size[1]))
                                new_1_coord = chosen_state % new_grid_size[1]
                                set_coord = True
                        else:
                            list_to_choose_from = []
                            for coord0 in range(randomization_kwargs["randomize_locations_kwargs"][key][1][0][0],randomization_kwargs["randomize_locations_kwargs"][key][1][1][0]+1):
                                if coord0 < new_grid_size[0]:
                                    for coord1 in range(randomization_kwargs["randomize_locations_kwargs"][key][1][0][1],randomization_kwargs["randomize_locations_kwargs"][key][1][1][1]+1):
                                        if coord1 < new_grid_size[1]:
                                            list_to_choose_from.append((coord0,coord1))
                            if len(list_to_choose_from) > 0:
                                chosen_coords = self.randenv_rng.choice(list_to_choose_from)
                                new_0_coord = int(chosen_coords[0])
                                new_1_coord = int(chosen_coords[1])
                                set_coord = True
                        if set_coord:
                            if not((new_0_coord,new_1_coord) in occupied_coordinates):
                                new_positions_list.append((new_0_coord,new_1_coord))
                                occupied_coordinates.append((new_0_coord,new_1_coord))
                    if len(new_positions_list) > 0:
                        new_state_type_loc[key] = (new_positions_list,True if self.relevant_original_positions[key][-1]=="True" else False)

        else:
            new_state_type_loc = self.state_type_loc

        # Initialized the dictionary carrying all evaluated codenames and the one carrying the means if we need them
        evaluated_codenames = {}
        if reward_normalization or check_goal_is_goal:
            self.means = {}

        # If rewards should be randomized draw new rewards
        if "randomize_rewards_kwargs" in randomization_kwargs.keys():

            # Intialize new rewards dictionary and dictionary where to save original_parameters
            new_rewards = {}

            # Handle goal rewards separately
            if "goal" in randomization_kwargs["randomize_rewards_kwargs"].keys():
                self.original_rewards = deepcopy(self.rewards)
                if randomization_kwargs["randomize_rewards_kwargs"]["goal"][0]:
                    randomization_params_goal = deepcopy(randomization_kwargs["randomize_rewards_kwargs"]["goal"][1])
                    for key_to_check_for_codes_goal in randomization_params_goal[1].keys():
                        if isinstance(randomization_params_goal[1][key_to_check_for_codes_goal],str):
                            codeword_goal = randomization_params_goal[1][key_to_check_for_codes_goal]
                            if codeword_goal in evaluated_codenames.keys():
                                evaluated_codename_goal = evaluated_codenames[codeword_goal]
                            else:
                                evaluated_codename_goal = float((sample_from_dist(self.randenv_rng,randomization_kwargs["codenames"][codeword_goal][0],1,**randomization_kwargs["codenames"][codeword_goal][1])[0]))   
                                evaluated_codenames[codeword_goal] = evaluated_codename_goal
                            randomization_params_goal[1][key_to_check_for_codes_goal] = evaluated_codename_goal
                        elif isinstance(randomization_params_goal[1][key_to_check_for_codes_goal],list):
                            for list_item_index, list_item in enumerate(randomization_params_goal[1][key_to_check_for_codes_goal]):
                                if isinstance(list_item,str):
                                    codeword_goal = list_item
                                    if codeword_goal in evaluated_codenames.keys():
                                        evaluated_codename_goal = evaluated_codenames[codeword_goal]
                                    else:
                                        evaluated_codename_goal = float((sample_from_dist(self.randenv_rng,randomization_kwargs["codenames"][codeword_goal][0],1,**randomization_kwargs["codenames"][codeword_goal][1])[0])) 
                                        evaluated_codenames[codeword_goal] = evaluated_codename_goal
                                    randomization_params_goal[1][key_to_check_for_codes_goal][list_item_index] = evaluated_codename_goal
                    new_goal_reward = deepcopy(randomization_params_goal)
                    if reward_normalization or check_goal_is_goal:
                        if len(new_goal_reward) == 3:
                            if isinstance(new_goal_reward[2],str):
                                self.means["goal"] = evaluated_codenames[new_goal_reward[2]]
                            elif isinstance(new_goal_reward[2],list):
                                for i, val in enumerate(new_goal_reward[2]):
                                    if i != 1:
                                        if isinstance(val,str):
                                            new_goal_reward[2][i] = evaluated_codenames[val]
                                self.means["goal"] = new_goal_reward[2][0](*new_goal_reward[2][1:])
                            else:
                                self.means["goal"] = new_goal_reward[2]
                        else:
                            self.means["goal"] = get_numpy_distribution_mean(dist_name=new_goal_reward[0],**new_goal_reward[1])
                else:
                    if isinstance(randomization_kwargs["randomize_rewards_kwargs"]["goal"][1],list):
                        new_goal_reward = float(np.abs((sample_from_dist(self.randenv_rng,randomization_kwargs["randomize_rewards_kwargs"]["goal"][1][0],1,**randomization_kwargs["randomize_rewards_kwargs"]["goal"][1][1])[0])))
                    else:
                        new_goal_reward = randomization_kwargs["randomize_rewards_kwargs"]["goal"][1]
                    if reward_normalization or check_goal_is_goal:
                        self.means["goal"] = new_goal_reward
                new_rewards["goal"] = new_goal_reward
            else:
                new_rewards["goal"] = self.rewards["goal"]
                if reward_normalization or check_goal_is_goal: 
                    if "goal" not in self.means.keys():
                        if isinstance(new_rewards["goal"],list):
                            if len(new_rewards["goal"]) == 3:
                                self.means["goal"] = new_rewards["goal"][2]
                            else:
                                self.means["goal"] = get_numpy_distribution_mean(dist_name=new_rewards["goal"][0],**new_rewards["goal"][1])
                        else:
                            self.means["goal"] = new_rewards["goal"]
            if self.means["goal"] == 0:
                new_rewards["goal"] = 1
                self.means["goal"] = 1

            # Handle rewards for all other states
            for state_type in self.rewards.keys():
                if state_type == "goal":
                    pass
                elif (state_type not in new_state_type_loc.keys()) and state_type != "default":
                    new_rewards[state_type] = self.rewards[state_type]
                elif state_type not in randomization_kwargs["randomize_rewards_kwargs"].keys():
                    if ("goal" in randomization_kwargs["randomize_rewards_kwargs"].keys()):
                        if check_goal_is_goal and state_type != "default":
                            min_distance = int(np.max([np.abs(coord[0]-new_state_type_loc["goal"][0][0][0]) + np.abs(coord[1] - new_state_type_loc["goal"][0][0][1]) for coord in new_state_type_loc[state_type][0]]))
                            max_mean_reward_for_state_type = self.means["goal"] * (gamma ** min_distance)
                            if isinstance(self.original_rewards[state_type],list):
                                new_rewards[state_type] = deepcopy(self.original_rewards[state_type])
                                try:
                                    adjust_distribution_kwargs_to_bound_mean_inplace(dist_name=new_rewards[state_type][0],kwargs=new_rewards[state_type][1],max_mean=max_mean_reward_for_state_type)
                                except:
                                    new_rewards[state_type] = max_mean_reward_for_state_type
                            else:
                                new_rewards[state_type] = float(np.min([max_mean_reward_for_state_type,self.original_rewards[state_type]]))
                            if reward_normalization:
                                if isinstance(new_rewards[state_type],list):
                                    if len(new_rewards[state_type]) == 3:
                                        self.means[state_type] = new_rewards[state_type][2]
                                    else:
                                        self.means[state_type] = get_numpy_distribution_mean(dist_name=new_rewards[state_type][0],**new_rewards[state_type][1])
                                else:
                                    self.means[state_type] = new_rewards[state_type]
                        else:
                            new_rewards[state_type] = self.original_rewards[state_type]
                            if reward_normalization:
                                if state_type not in self.means.keys():
                                    if isinstance(new_rewards[state_type],list):
                                        if len(new_rewards[state_type]) == 3:
                                            self.means[state_type] = new_rewards[state_type][2]
                                        else:
                                            self.means[state_type] = get_numpy_distribution_mean(dist_name=new_rewards[state_type][0],**new_rewards[state_type][1])
                                    else:
                                        self.means[state_type] = new_rewards[state_type]
                    else:
                        new_rewards[state_type] = self.rewards[state_type]
                        if reward_normalization:
                            if state_type not in self.means.keys():
                                if isinstance(new_rewards[state_type],list):
                                    if len(new_rewards[state_type]) == 3:
                                        self.means[state_type] = new_rewards[state_type][2]
                                    else:
                                        self.means[state_type] = get_numpy_distribution_mean(dist_name=new_rewards[state_type][0],**new_rewards[state_type][1])
                                else:
                                    self.means[state_type] = new_rewards[state_type]
                else: 
                    if check_goal_is_goal and state_type != "default":
                        min_distance = float(np.max([np.abs(coord[0] - new_state_type_loc["goal"][0][0][0]) + np.abs(coord[1] - new_state_type_loc["goal"][0][0][1]) for coord in new_state_type_loc[state_type][0]]))
                        max_mean_reward_for_state_type = self.means["goal"] * (gamma ** min_distance)
                        if randomization_kwargs["randomize_rewards_kwargs"][state_type][0]:
                            randomization_params = deepcopy(randomization_kwargs["randomize_rewards_kwargs"][state_type][1])
                            for key_to_check_for_codes in randomization_params[1].keys():
                                if isinstance(randomization_params[1][key_to_check_for_codes],str):
                                    codeword = randomization_params[1][key_to_check_for_codes]
                                    if codeword in evaluated_codenames.keys():
                                        evaluated_codename = evaluated_codenames[codeword]
                                    else:
                                        evaluated_codename = float((sample_from_dist(self.randenv_rng,randomization_kwargs["codenames"][codeword][0],1,**randomization_kwargs["codenames"][codeword][1])[0]))   
                                        evaluated_codenames[codeword] = evaluated_codename
                                    randomization_params[1][key_to_check_for_codes] = evaluated_codename
                                elif isinstance(randomization_params[1][key_to_check_for_codes],list):
                                    for list_item_index, list_item in enumerate(randomization_params[1][key_to_check_for_codes]):
                                        if isinstance(list_item,str):
                                            codeword = list_item
                                            if codeword in evaluated_codenames.keys():
                                                evaluated_codename = evaluated_codenames[codeword]
                                            else:
                                                evaluated_codename = float((sample_from_dist(self.randenv_rng,randomization_kwargs["codenames"][codeword][0],1,**randomization_kwargs["codenames"][codeword][1])[0])) 
                                                evaluated_codenames[codeword] = evaluated_codename
                                            randomization_params[1][key_to_check_for_codes][list_item_index] = evaluated_codename
                            new_reward = deepcopy(randomization_params)
                            try:
                                adjust_distribution_kwargs_to_bound_mean_inplace(dist_name=new_reward[0],kwargs=new_reward[1],max_mean=max_mean_reward_for_state_type)
                            except:
                                new_reward = max_mean_reward_for_state_type
                            if reward_normalization:
                                if len(new_reward) == 3:
                                    if isinstance(new_reward[2],str):
                                        self.means[state_type] = evaluated_codenames[new_reward[2]]
                                    elif isinstance(new_reward[2],list):
                                        for i, val in enumerate(new_reward[2]):
                                            if i != 1:
                                                if isinstance(val,str):
                                                    new_reward[2][i] = evaluated_codenames[val]
                                        self.means[state_type] = new_reward[2][0](*new_reward[2][1:])
                                    else:
                                        self.means[state_type] = new_reward[2]
                                else:
                                    self.means[state_type] = get_numpy_distribution_mean(dist_name=new_reward[0],**new_reward[1])
                        else:
                            if isinstance(randomization_kwargs["randomize_rewards_kwargs"][state_type][1],list):
                                new_reward = float(np.max([max_mean_reward_for_state_type,np.abs((sample_from_dist(self.randenv_rng,randomization_kwargs["randomize_rewards_kwargs"][state_type][1][0],1,**randomization_kwargs["randomize_rewards_kwargs"][state_type][1][1])[0]))]))
                            else:
                                new_reward = randomization_kwargs["randomize_rewards_kwargs"][state_type][1]
                        new_rewards[state_type] = new_reward
                    elif check_goal_is_goal and state_type == "default":
                        min_distance = 1
                        max_mean_reward_for_state_type = self.means["goal"] * (gamma ** min_distance)
                        if randomization_kwargs["randomize_rewards_kwargs"][state_type][0]:
                            randomization_params = deepcopy(randomization_kwargs["randomize_rewards_kwargs"][state_type][1])
                            for key_to_check_for_codes in randomization_params[1].keys():
                                if isinstance(randomization_params[1][key_to_check_for_codes],str):
                                    codeword = randomization_params[1][key_to_check_for_codes]
                                    if codeword in evaluated_codenames.keys():
                                        evaluated_codename = evaluated_codenames[codeword]
                                    else:
                                        evaluated_codename = float((sample_from_dist(self.randenv_rng,randomization_kwargs["codenames"][codeword][0],1,**randomization_kwargs["codenames"][codeword][1])[0]))   
                                        evaluated_codenames[codeword] = evaluated_codename
                                    randomization_params[1][key_to_check_for_codes] = evaluated_codename
                                elif isinstance(randomization_params[1][key_to_check_for_codes],list):
                                    for list_item_index, list_item in enumerate(randomization_params[1][key_to_check_for_codes]):
                                        if isinstance(list_item,str):
                                            codeword = list_item
                                            if codeword in evaluated_codenames.keys():
                                                evaluated_codename = evaluated_codenames[codeword]
                                            else:
                                                evaluated_codename = float((sample_from_dist(self.randenv_rng,randomization_kwargs["codenames"][codeword][0],1,**randomization_kwargs["codenames"][codeword][1])[0])) 
                                                evaluated_codenames[codeword] = evaluated_codename
                                            randomization_params[1][key_to_check_for_codes][list_item_index] = evaluated_codename
                            new_reward = deepcopy(randomization_params)
                            try:
                                adjust_distribution_kwargs_to_bound_mean_inplace(dist_name=new_reward[0],kwargs=new_reward[1],max_mean=max_mean_reward_for_state_type)
                            except:
                                new_reward = max_mean_reward_for_state_type
                            if reward_normalization:
                                if len(new_reward) == 3:
                                    if isinstance(new_reward[2],str):
                                        self.means[state_type] = evaluated_codenames[new_reward[2]]
                                    elif isinstance(new_reward[2],list):
                                        for i, val in enumerate(new_reward[2]):
                                            if i != 1:
                                                if isinstance(val,str):
                                                    new_reward[2][i] = evaluated_codenames[val]
                                        self.means[state_type] = new_reward[2][0](*new_reward[2][1:])
                                    else:
                                        self.means[state_type] = new_reward[2]
                                else:
                                    self.means[state_type] = get_numpy_distribution_mean(dist_name=new_reward[0],**new_reward[1])
                        else:
                            if isinstance(randomization_kwargs["randomize_rewards_kwargs"][state_type][1],list):
                                new_reward = float(np.max([max_mean_reward_for_state_type,np.abs((sample_from_dist(self.randenv_rng,randomization_kwargs["randomize_rewards_kwargs"][state_type][1][0],1,**randomization_kwargs["randomize_rewards_kwargs"][state_type][1][1])[0]))]))
                            else:
                                new_reward = randomization_kwargs["randomize_rewards_kwargs"][state_type][1]
                        new_rewards[state_type] = new_reward
                    else:
                        if randomization_kwargs["randomize_rewards_kwargs"][state_type][0]:
                            randomization_params = deepcopy(randomization_kwargs["randomize_rewards_kwargs"][state_type][1])
                            for key_to_check_for_codes in randomization_params[1].keys():
                                if isinstance(randomization_params[1][key_to_check_for_codes],str):
                                    codeword = randomization_params[1][key_to_check_for_codes]
                                    if codeword in evaluated_codenames.keys():
                                        evaluated_codename = evaluated_codenames[codeword]
                                    else:
                                        evaluated_codename = float((sample_from_dist(self.randenv_rng,randomization_kwargs["codenames"][codeword][0],1,**randomization_kwargs["codenames"][codeword][1])[0]))   
                                        evaluated_codenames[codeword] = evaluated_codename
                                    randomization_params[1][key_to_check_for_codes] = evaluated_codename
                            new_reward = deepcopy(randomization_params)
                            if reward_normalization:
                                if len(new_reward) == 3:
                                    if isinstance(new_reward[2],str):
                                        self.means[state_type] = evaluated_codenames[new_reward[2]]
                                    elif isinstance(new_reward[2],list):
                                        for i, val in enumerate(new_reward[2]):
                                            if i != 1:
                                                if isinstance(val,str):
                                                    new_reward[2][i] = evaluated_codenames[val]
                                        self.means[state_type] = new_reward[2][0](*new_reward[2][1:])
                                    else:
                                        self.means[state_type] = new_reward[2]
                                else:
                                    self.means[state_type] = get_numpy_distribution_mean(dist_name=new_reward[0],**new_reward[1])
                        else:
                            if isinstance(randomization_kwargs["randomize_rewards_kwargs"][state_type][1],list):
                                new_reward = float(np.abs((sample_from_dist(self.randenv_rng,randomization_kwargs["randomize_rewards_kwargs"][state_type][1][0],1,**randomization_kwargs["randomize_rewards_kwargs"][state_type][1][1])[0])))
                            else:
                                new_reward = randomization_kwargs["randomize_rewards_kwargs"][state_type][1]
                        new_rewards[state_type] = new_reward
                    if reward_normalization:
                        if isinstance(new_rewards[state_type],list):
                            if len(new_rewards[state_type]) == 3:
                                self.means[state_type] = new_rewards[state_type][2]
                            else:
                                self.means[state_type] = get_numpy_distribution_mean(dist_name=new_rewards[state_type][0],**new_rewards[state_type][1])
                        else:
                            self.means[state_type] = new_rewards[state_type]

            # Handle normalization of scores if necessary
            if reward_normalization:
                maximum_reward = -1
                num_tries = 0
                while maximum_reward <= 0:
                    num_tries += 1
                    list_for_checking = [((new_state_type_loc["goal"][0][0][0] * new_grid_size[1]) + new_state_type_loc["goal"][0][0][1],self.means["goal"],(new_state_type_loc["goal"][0][0][0] * new_grid_size[1]) + new_state_type_loc["goal"][0][0][1])]
                    finished_rewards = []
                    new_start_num = (new_state_type_loc["start"][0][0][0] * new_grid_size[1]) + new_state_type_loc["start"][0][0][1]
                    new_terminal_state_nums = [(coord[0] * new_grid_size[1]) + coord[1] for st in new_state_type_loc.keys() if new_state_type_loc[st][1] for coord in new_state_type_loc[st][0]]
                    for _ in range(int(np.max([(new_grid_size[0]*new_grid_size[1])//2,new_grid_size[0]+new_grid_size[1]-2]))):
                        updated_list_for_checking = []
                        for item in list_for_checking:
                            next_state_nums = []
                            if not (item[0] - new_grid_size[1] < 0):
                                next_state_nums.append(item[0] - new_grid_size[1])
                            if not (((item[0] + 1) % new_grid_size[1]) == 0):
                                next_state_nums.append(item[0] + 1)
                            if not (item[0] + new_grid_size[1] >= new_grid_size[0]*new_grid_size[1]):
                                next_state_nums.append(item[0] + new_grid_size[1])
                            if not (item[0] % new_grid_size[1] == 0):
                                next_state_nums.append(item[0] - 1)
                            for next_state_num in next_state_nums:
                                if next_state_num not in new_terminal_state_nums:
                                    if next_state_num not in item[2:]:
                                        next_state_coord = ( int(np.floor(next_state_num / new_grid_size[1])) , next_state_num % new_grid_size[1]) 
                                        state_name_found = False
                                        for special_state in new_state_type_loc.keys():
                                            if next_state_coord in new_state_type_loc[special_state][0] and special_state != "start":
                                                state_name_found = True
                                                next_state_name = special_state
                                        if next_state_num == new_start_num:
                                            finished_rewards.append(item[1])
                                        else:
                                            next_reward = gamma * item[1] + (self.means[next_state_name] if state_name_found else self.means["default"])
                                            updated_list_for_checking.append((next_state_num,next_reward,*item[2:],item[0]))       
                        list_for_checking = deepcopy(updated_list_for_checking)
                    if len(finished_rewards) == 0:
                        if not self.printed_impossibility_warning:
                            print("Warning: At least one of the randomly generated games is invalid. Most likely, terminal states are blocking the goal from being reachable. Random environment normalization could not be performed!")
                            self.printed_impossibility_warning = True
                        break
                    elif num_tries == reward_normalization_num_tries and float(np.max([finished_rewards])) <= 0:
                        if not self.printed_max_tries_exhausted_warning:
                            print("Warning: Exceeded the maximum number of tries and could not properly normalize at least one environment!")
                            self.printed_max_tries_exhausted_warning = True
                        maximum_reward = float(np.abs(np.max([finished_rewards])))
                        break
                    elif float(np.max([finished_rewards])) <= 0:
                        maximum_reward = float(np.max([finished_rewards]))
                        if isinstance(reward_normalization_factor_for_negatives,(int,float)):
                            norm_factor = reward_normalization_factor_for_negatives
                        elif reward_normalization_factor_for_negatives == "standard":
                            norm_factor = float(np.abs(maximum_reward)) + 1
                        else:
                            norm_factor = reward_normalization_factor_for_negatives(maximum_reward)
                        for state in new_rewards.keys():
                            if isinstance(new_rewards[state],list):
                                mean_of_dist = get_numpy_distribution_mean(new_rewards[state][0],**new_rewards[state][1])
                                if mean_of_dist < 0:
                                    adjust_distribution_kwargs_to_bound_mean_inplace(dist_name=new_rewards[state][0],kwargs=new_rewards[state][1],min_mean = (mean_of_dist / norm_factor))
                                    self.means[state] = mean_of_dist / norm_factor
                            elif new_rewards[state] < 0:
                                new_rewards[state] = new_rewards[state] / norm_factor
                                self.means[state] = new_rewards[state] / norm_factor
                    else:
                        maximum_reward = float(np.max([finished_rewards]))
                if maximum_reward != -1:
                    for state in new_rewards.keys():
                        if isinstance(new_rewards[state],List):
                            mean_of_dist = get_numpy_distribution_mean(new_rewards[state][0],**new_rewards[state][1])
                            adjust_mean_lambda_and_args(target_mean=(mean_of_dist/maximum_reward),dist_name=new_rewards[state][0],kwargs=new_rewards[state][1])
                            self.means[state] = mean_of_dist/maximum_reward
                        else:
                            new_rewards[state] = new_rewards[state] / maximum_reward
                            self.means[state] = new_rewards[state] / maximum_reward
        else:
            new_rewards = self.rewards

        # If rest of the game modifications should be randomized, draw them
        if "randomize_game_modification_kwargs" in randomization_kwargs.keys():
            if "hovering" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_hovering = bool(sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["hovering"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["hovering"][1])[0])
            else:
                new_hovering = self.hovering
            if "windy" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_windy = bool(sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["windy"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["windy"][1])[0])
            else:
                new_windy = self.windy
            if "wind_prob" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_wind_prob = float(np.max(np.min(sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"][1])[0],1),0))
            else:
                new_wind_prob = self.wind_prob
            if "wind_dir" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_wind_dir = sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"][1])[0]
                if not isinstance(new_wind_dir,str):
                    new_wind_dir = int(new_wind_dir)
                    if new_wind_dir <= 0:
                        new_wind_dir = "up"
                    elif new_wind_dir == 1:
                        new_wind_dir = "right"
                    elif new_wind_dir == 2:
                        new_wind_dir = "down"
                    else:
                        new_wind_dir = "left"
            else:
                new_wind_dir = self.wind_dir
            if "slippery" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_slippery = bool(sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["slippery"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["slippery"][1])[0])
            else:
                new_slippery = self.slippery
            if "slip_prob" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_slip_prob = float(np.max(np.min(sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["slip_prob"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["slip_prob"][1])[0],1),0))
            else:
                new_slip_prob = self.slip_prob
            if "random_actions" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_random_actions = bool(sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["random_actions"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["random_actions"][1])[0])
            else:
                new_random_actions = self.random_actions
            if "random_prob" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                new_random_prob = float(np.max(np.min(sample_from_dist(rng=self.randenv_rng,dist=randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"][0],size=1,**randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"][1])[0],1),0))
            else:
                new_random_prob = self.random_prob
            if "random_vec" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                n_random_vec = []
                for item in randomization_kwargs["randomize_game_modification_kwargs"]["random_vec"]:
                    if isinstance(item,list):
                        n_random_vec.append(float(sample_from_dist(rng=self.randenv_rng,dist=item[0],size=1,**item[1])[0]))
                    else:
                        n_random_vec.append(item)
                factor = float(np.sum(new_random_vec))
                new_random_vec = [item / factor for item in n_random_vec]
            else:
                new_random_vec = self.random_vec
        else:
            new_hovering = self.hovering
            new_windy = self.windy
            new_wind_prob = self.wind_prob
            new_wind_dir = self.wind_dir
            new_slippery = self.slippery
            new_slip_prob = self.slip_prob
            new_random_actions = self.random_actions
            new_random_prob = self.random_prob
            new_random_vec = self.random_vec

        # Initialize game with new parameters
        self.__init__(grid_size=new_grid_size,state_type_loc=new_state_type_loc,rewards=new_rewards,hovering=new_hovering,windy=new_windy,wind_prob=new_wind_prob,wind_dir=new_wind_dir,slippery=new_slippery,slip_prob=new_slip_prob,random_actions=new_random_actions,random_prob=new_random_prob,random_vec=new_random_vec,rng_seed=self.rng_seed,randomization_seed=self.randomization_seed,checks=self.checks)

        return {"grid_size": new_grid_size,"state_type_loc":new_state_type_loc,"rewards":new_rewards,"hovering":new_hovering,"windy":new_windy,"wind_prob":new_wind_prob,"wind_dir":new_wind_dir,"slippery":new_slippery,"slip_prob":new_slip_prob,"random_actions":new_random_actions,"random_prob":new_random_prob,"random_vec":new_random_vec,"checks":self.checks}

    def __str__(self):
        return "GridWorld"

    def get_next_state(self, state: int, action: int) -> Tuple[int,bool]:

        """
        Returns the next state after taking an action from a given state.
        Takes into account wind, slippery surfaces, and random actions.

        Parameters:
        - state (int): The current state, represented as a state number.
        - action (int): The action taken, where 0 = up, 1 = right, 2 = down, 3 = left.

        Returns:
        - int: The state number resulting from the action taken.
        - bool: Signals, if the game is restarted due to being in a terminal state
        """

        # Terminal and restarting?
        t = False

        # Check if state is valid integer and action is allowed
        if self.checks != "no_checks" and not self.init_check1_done:
            if isinstance(state,int) and isinstance(action,int):
                if not ((state in range(self.num_states)) and (action in range(self.num_actions))):
                    raise ValueError("State and action pair outside of state action space!")
                else:
                    if not (action in self.allowed_actions[state]):
                        raise ValueError("Action is not allowed in this state!")
            else:
                raise TypeError("State and action need to be integers!")
            if self.checks == "only_initial_check":
                self.init_check1_done = True
        
        # Reset needed? If so, next state is always start
        if state in self.reset_states_num:
            next_state = self.start_state_num
            t = True
            return next_state, t
        
        # Apply wind effect, if applicable
        if self.windy:
            if self.rng.uniform(0,1) < self.wind_prob:
                if self.wind_dir_num in self.allowed_actions[state]:
                    action = self.wind_dir_num
                else:
                    next_state = state
                    return next_state, t

        # Apply slippery effect, if applicable
        if self.slippery:
            if self.rng.uniform(0,1) < self.slip_prob:
                neighbor = self.rng.choice([-1, 0, 1], p=[1/3, 1/3, 1/3])
                if (action + neighbor) % self.num_actions in self.allowed_actions[state]:
                    action = (action + neighbor) % self.num_actions
                else:
                    next_state = state
                    return next_state, t

        # Apply random action, if applicable
        if self.random_actions:
            if self.rng.uniform(0,1) < self.random_prob:
                allowed_indices_prob = [self.random_vec[act] for act in self.allowed_actions[state]]
                factor = sum(allowed_indices_prob)
                probs = [val/factor for val in allowed_indices_prob]
                action = self.rng.choice(self.allowed_actions[state], p=probs)

        # Determine next state based on action and return it
        if action == 0: # Action "up"
            if not (state - self.grid_size[1] < 0):
                return state - self.grid_size[1], t
            else:
                return state, t  
        elif action ==1: # Action "right"
            if not (((state + 1) % self.grid_size[1]) == 0):
                return state + 1, t
            else:
                return state, t
        elif action ==2: # Action "down"
            if not (state + self.grid_size[1]) >= self.num_states:
                return state + self.grid_size[1], t
            else:
                return state, t
        elif action ==3: # Action "left"
            if not (state % self.grid_size[1] == 0):
                return state - 1, t
            else:
                return state, t
        else:
            raise ValueError("Something went wrong and the action was somehow chosen outside of the action space while sampling the next state!")
    
    def get_reward(self,state: int,action: int) -> Union[int,float]:

        """
        Returns the reward for a given state-action pair.

        Parameters:
        - state (int): The current state, represented as a state number.
        - action (int): The action taken, where 0 = up, 1 = right, 2 = down, 3 = left.

        Returns:
        - float: The reward for the state-action pair.
        """

        # Check if state is valid integer and action is allowed
        if self.checks != "no_checks" and not self.init_check2_done:
            if isinstance(state,int) and isinstance(action,int):
                if not ((state in range(self.num_states)) and (action in range(self.num_actions))):
                    raise ValueError("State and action pair outside of state action space!")
                else:
                    if not (action in self.allowed_actions[state]):
                        raise ValueError("Action is not allowed in this state!")
            else:
                raise TypeError("State and action need to be integers!")
            if self.checks == "only_initial_check":
                self.init_check2_done = True
        
        # Assign the reward for the state, or sample from the distribution specified in case of stochastic rewards
        if (state,action) in self.state_action_rewards_dict.keys():
            if isinstance(self.state_action_rewards_dict[(state,action)], (int,float)):
                return self.state_action_rewards_dict[(state,action)]
            else:
                return float(sample_from_dist(self.rng,self.state_action_rewards_dict[(state,action)][0],1,**self.state_action_rewards_dict[(state,action)][1])[0])
        else:
            if isinstance(self.default_reward,(int,float)):
                return self.default_reward
            else:
                return float(sample_from_dist(self.rng,self.default_reward[0],1,**self.default_reward[1])[0])
        
    def get_next_state_and_reward(self, state: int,action: int) -> Tuple[int,bool,Union[int,float]]:

        """
        Returns the next state and reward for a given state-action pair.

        Parameters:
        - state (int): The current state, represented as a state number.
        - action (int): The action taken, where 0 = up, 1 = right, 2 = down, 3 = left.

        Returns:
        - int: The state number resulting from the action taken.
        - bool: Signals, if the game is restarted due to being in a terminal state.
        - float: The reward for the state-action pair.
        """

        next_state, t = self.get_next_state(state,action)
        reward = self.get_reward(state,action)
        return next_state, t, reward
    
    def get_next_state_det(self, state: int, action: int) -> Tuple[int,bool]:

        """
        Returns the next state after taking an action from a given state.
        Does not take into account wind, slippery surfaces, and random actions.

        Parameters:
        - state (int): The current state, represented as a state number.
        - action (int): The action taken, where 0 = up, 1 = right, 2 = down, 3 = left.

        Returns:
        - int: The state number resulting from the action taken.
        """
        
        # Reset needed? If so, next state is always start
        if state in self.reset_states_num:
            next_state = self.start_state_num
            return next_state

        # Determine next state based on action and return it
        if action == 0: # Action "up"
            if not (state - self.grid_size[1] < 0):
                return state - self.grid_size[1]
            else:
                return state 
        elif action ==1: # Action "right"
            if not (((state + 1) % self.grid_size[1]) == 0):
                return state + 1
            else:
                return state
        elif action ==2: # Action "down"
            if not (state + self.grid_size[1]) >= self.num_states:
                return state + self.grid_size[1]
            else:
                return state
        elif action ==3: # Action "left"
            if not (state % self.grid_size[1] == 0):
                return state - 1
            else:
                return state
        
    def mean_rewards_to_state_action(self, mean_rewards: Dict = None, mc_runs: int = 100000) -> Dict[Tuple[int,int],Union[int,float]]:

        """
        Returns a dictionary containing mean rewards. Can be based on a dictionary shaped like the rewards dictionary containing 
        the means for the state action pairs or alternatively uses MC runs to determine a proxy for the mean.

        Parameters:
        - mean_rewards (Dict): Eiter an empty dictionary if mc runs should be performed to get the values or a dictionary
          of the same shape as rewards
        - mc_runs (int): The number of MC-Runs to be performed.

        Returns:
        - dict: The dictionary containing the mean reward for each state action pair.
        """

        if mean_rewards is None:
            mean_rewards = {}

        # Check if state is valid integer and action is allowed
        if self.checks != "no_checks" and not self.init_check3_done:
            if isinstance(mc_runs,int):
                if mc_runs <= 0:
                    raise ValueError("Number of Monte Carlo runs needs to be bigger than 0!")
            else:
                raise TypeError("Number of Monte Carlo runs needs to be numerical!")
            if isinstance(mean_rewards,dict):
                if mean_rewards == {}:
                    pass
                elif set(mean_rewards.keys()) == set(self.rewards.keys()) and len(mean_rewards.keys()) == len(self.rewards.keys()):
                    for key in mean_rewards.keys():
                        if not isinstance(mean_rewards[key],(int,float)):
                            raise ValueError("Mean rewards need to take numerical values!")
                else:
                    raise ValueError("The mean rewards dictionary needs to either be empty or have the same shape as the rewards dictionary!")
            else:
                raise TypeError("Mean rewards need to be passed in a dictionary!")
            if self.checks == "only_initial_checks":
                self.init_check3_done = True
        
        # Fill dictionary with MC-Runs if it is empty or self.means if it exists
        if mean_rewards == {}:
            m_rewards = deepcopy(self.rewards)
            for key in m_rewards.keys():
                if not isinstance(m_rewards[key],(int,float)):
                    if hasattr(self,"means"):
                        m_rewards[key] = self.means[key]
                    else:
                        mc_runs_values = []
                        for _ in range(mc_runs):
                            reward = float(sample_from_dist(self.rng,m_rewards[key][0],1,**m_rewards[key][1])[0])
                            mc_runs_values.append(reward)
                        mc_estimator = sum(mc_runs_values) / mc_runs
                        m_rewards[key] = mc_estimator
        
        # Initialization if dictionary is non-empty
        else:
            m_rewards = deepcopy(mean_rewards)

        # Fill the state action rewards_dict to return
        s_a_rewards_dict = {(state,action): m_rewards["default"] for state in range(self.num_states) for action in self.allowed_actions[state]}
        for key in m_rewards.keys():
            if key == "default":
                pass
            elif key not in self.state_type_loc.keys():
                pass
            else:
                for coord in self.state_type_loc[key][0]:
                    for act in self.allowed_actions[self.coord_to_state(coord)]:
                        s_a_rewards_dict[(self.coord_to_state(coord),act)] = m_rewards[key] # Set mean reward for each state that is not considered a default state
        
        return s_a_rewards_dict

    def coord_to_state(self, coordinate_tuple: Tuple[int,int]) -> int:

        """
        Converts a coordinate tuple (row, col) to a state number.

        Parameters:
        - coordinate_tuple (tuple): A tuple representing the (row, col) position in the grid.

        Returns:
        - int: The state number corresponding to the given coordinates.
        """

        return (coordinate_tuple[0] * self.grid_size[1]) + coordinate_tuple[1]    
    
    def state_to_coord(self, state: int) -> Tuple[int,int]:

        """
        Converts a state number to a coordinate tuple (row, col).

        Parameters:
        - state (int): The state number to convert.

        Returns:
        - tuple: The (row, col) coordinates corresponding to the given state.
        """

        return ( int(np.floor(state / self.grid_size[1])) , state % self.grid_size[1]) 
    
    def action_words_to_nums(self,action: str) -> int:

        """
        Converts action words ("up", "right", "down", "left") to action numbers (0, 1, 2, 3).

        Parameters:
        - action (str): The action word, one of "up", "right", "down", or "left".

        Returns:
        - int: The corresponding action number (0 = up, 1 = right, 2 = down, 3 = left).
        """

        if action == "up":
            return 0
        elif action == "right":
            return 1
        elif action == "down":
            return 2
        elif action == "left":
            return 3
        else:
            raise ValueError("Only up, right, down, or left are valid as actions or directions!")

    def inputcheck(self) -> int:

        """
        Validates the input parameters to ensure they follow the expected formats and constraints.

        Raises:
        - ValueError: If any of the input parameters are invalid.
        - TypeError: If any of the input types are invalid.
        """

        # Grid size is tuple of length 2 containing positive integers
        if isinstance(self.grid_size,tuple):
            if len(self.grid_size) != 2:
                raise ValueError("Grid size must be tuple of length two!")
            else: 
                for value in self.grid_size:
                    if not isinstance(value,int):
                        raise ValueError("Grid size components must be integers!")
                    else:
                        if value <= 0:
                            raise ValueError("Grid size components must be strictly positive")
        else:
            raise TypeError("Grid size needs to be a tuple!") 
        
        # Hovering needs to be boolean
        if not isinstance(self.hovering, bool):
            raise TypeError("Variable hovering needs to be a boolean value!")
        
        # Windy, slippery, and random are boolean values
        if not isinstance(self.windy,bool):
            raise TypeError("Variable windy needs to be a boolean value!")
        if not isinstance(self.slippery,bool):
            raise TypeError("Variable slippery needs to be a boolean value!")
        if not isinstance(self.random_actions,bool):
            raise TypeError("Variable random actions needs to be a boolean value!")
        
        # Only one of windy, slippery, and random
        if self.windy and self.slippery or self.windy and self.random_actions or self.slippery and self.random_actions:
            raise ValueError("Only one of the modes windy, slippery, and random actions can not be on at the same time!")
        
        # Wind_prob, slip_prob, and random_prob are values between 0 and 1
        if isinstance(self.wind_prob, (int, float)):
            if not (0 <= self.wind_prob <= 1):
                raise ValueError("The wind probability needs to be between 0 and 1!")
        else:
            raise TypeError("Variable wind_prob needs to be numeric")  
        if isinstance(self.slip_prob, (int, float)):
            if not (0 <= self.slip_prob <= 1):
                raise ValueError("The wind probability needs to be between 0 and 1!")
        else:
            raise TypeError("Variable wind_prob needs to be numeric")
        if isinstance(self.random_prob, (int, float)):
            if not (0 <= self.random_prob <= 1):
                raise ValueError("The wind probability needs to be between 0 and 1!")
        else:
            raise TypeError("Variable random_prob needs to be numeric")  
        
        # Seed is in valid range:
        if isinstance(self.rng_seed, int):
            if not (0 <= self.rng_seed < 2**32):
                raise ValueError("The provided seed is not in the range of acceptable integer seeds!")
        else:
            raise TypeError("The seed needs to be an integer!")
        
        # Environment randomization seed is in valid range:
        if isinstance(self.randomization_seed, int):
            if not (0 <= self.randomization_seed < 2**32):
                raise ValueError("The provided seed is not in the range of acceptable integer seeds!")
        else:
            raise TypeError("The seed needs to be an integer!")
        
        # Wind direction is a valid action
        if isinstance(self.wind_dir, str):
            if not (self.wind_dir in ["up","right","left","down"]):
                raise ValueError("Wind direction is not contained in the action space!")
        else:
            raise TypeError("Wind direction must be a string!") 
        
        # Random_vec needs to be a probability vector of length 4 as a list
        if isinstance(self.random_vec, list):
            if all(isinstance(prob,(int,float)) for prob in self.random_vec):
                if not np.sum(self.random_vec) == 1:
                    raise ValueError("Probability values in random_vec need to add up to 1!")
            else:
                raise TypeError("Random_vec needs to be a list containing numerical values!")
        else:
            raise TypeError("Random_vec needs to be a list!")
        
        # State_type_loc needs to be dict containing at least goal and start and map to 2-tuples of lists of tuples of valid coordinates and boolean values representing if the state terminates
        if isinstance(self.state_type_loc,dict):
            if "goal" in self.state_type_loc.keys() and "start" in self.state_type_loc.keys():
                for key in self.state_type_loc.keys():
                    if isinstance(self.state_type_loc[key],tuple):
                        if len(self.state_type_loc[key]) == 2:
                            if isinstance(self.state_type_loc[key][0],list):
                                if key == "goal" or key == "start":
                                    if len(self.state_type_loc[key][0]) != 1:
                                        raise ValueError(f"There can only be one location for {key}!")
                                for coord_tuple in self.state_type_loc[key][0]:
                                    if isinstance(coord_tuple,tuple):
                                        if len(coord_tuple) != 2:
                                            raise ValueError(f"Locations of {key} must be tuples of length two!")
                                        else: 
                                            for value in coord_tuple:
                                                if not isinstance(value,int):
                                                    raise ValueError(f"Location components of {key} must be integers!")
                                                else:
                                                    if (coord_tuple[0] >= self.grid_size[0]) or (coord_tuple[1] >= self.grid_size[1]):
                                                        raise ValueError(f"The location {coord_tuple} for the state type {key} is out of bounds!")
                                    else:
                                        raise TypeError(f"Locations of {key} need to be tuples!")
                                if not isinstance(self.state_type_loc[key][1],bool):
                                    raise TypeError(f"The value for if the state type {key} terminates or not needs to be a boolean one!")
                            else:
                                raise TypeError("Dictionary keys of state_type_loc need to map to lists of coordinates!")
                        else:
                            raise TypeError("For each keyword, the dictionary of state types and locations needs to contain a tuple of length 2 containing a list of locations and a boolean indicating if the state is terminating or not!")
                    else:
                        raise ValueError(f"State types must map to a tuple containing a list of coordinates and if the state terminates!")
            else:
                raise ValueError("The location of the goal and the start state need to be specified!")
        else:
            raise TypeError("Variable state_type_loc needs to be a dictionary!") 
        
        # No double assignments in state type locations dictionary
        coordinates = []
        for key in self.state_type_loc.keys():
            for coord in self.state_type_loc[key][0]:
                coordinates.append(coord)
        if len(coordinates) != len(set(coordinates)):
            raise ValueError("There are coordinates used for multiple state types!")
        
        # Goal needs to be terminal and Start can not be terminal
        if not self.state_type_loc["goal"][1] or self.state_type_loc["start"][1]:
            raise ValueError("Goal must be terminal and start can not be terminal!")
        
        # Rewards needs to be a dictionary mapping all appearing state types and default to their rewards, which can be either stochastic or fixed
        if isinstance(self.rewards,dict):
            if all([key in self.rewards.keys() or key == "start" for key in self.state_type_loc.keys()]):
                if "goal" in self.rewards.keys() and "default" in self.rewards.keys():
                    for key in self.rewards.keys():
                        if isinstance(self.rewards[key],list):
                            if len(self.rewards[key]) == 2:
                                if (isinstance(self.rewards[key][0],str) and isinstance(self.rewards[key][1],dict)):
                                    check_for_allowed_dist(self.rng,self.rewards[key][0],**self.rewards[key][1])
                                else:
                                    raise ValueError("For stochastic rewards a list containing the distribution and a dictionary of keyword arguments need to be passed!")
                            elif len(self.rewards[key]) == 3:
                                if (isinstance(self.rewards[key][0],str) and isinstance(self.rewards[key][1],dict) and isinstance(self.rewards[key][2],(int,float,str))):
                                    check_for_allowed_dist(self.rng,self.rewards[key][0],**self.rewards[key][1])
                                else:
                                    raise ValueError("For stochastic rewards a list containing the distribution and a dictionary of keyword arguments need to be passed!")
                            else:
                                raise ValueError("For stochastic rewards a list containing the distribution and a dictionary of keyword arguments need to be passed!")
                        elif (isinstance(self.rewards[key],(int,float))):
                            pass
                        else:
                            raise TypeError("The rewards need to be either numeric or a list containing a sample distribution and a dictionary of keyword arguments!")
                else:
                    raise ValueError("Rewards for goal and default value need to be provided!")
            else:
                raise ValueError("A state type with a specified location was provided that has no specified reward!")
        else:
            raise TypeError("Variable rewards needs to be a dictionary!")
        
        return 1
    
    def randomization_inputcheck(self,check_goal_is_goal: bool = True, discounted_reward_goal_limit: float = 0.95, gamma: float = 0.99, reward_normalization: bool = True,reward_normalization_factor_for_negatives: Union[int,float,Callable,str] = "standard",reward_normalization_num_tries: int = -1,**randomization_kwargs: Dict) -> int:
        
        """
        Validates the parameters passed to the randomization function to ensure they follow the expected formats and constraints.

        Raises:
        - ValueError: If any of the parameters are invalid.
        - TypeError: If any of the parameter types are invalid.
        """

        # check_goal_is_goal needs to be boolean
        if not isinstance(check_goal_is_goal,bool):
            raise TypeError("Parameter check_goal_is_goal needs to be boolean!")

        # discounted_reward_goal_limit needs to be numeric between 0 and 1
        if isinstance(discounted_reward_goal_limit,(int,float)):
            if not (0 <= discounted_reward_goal_limit <= 1):
                raise ValueError("Parameter discounted_reward_goal_limit needs to be numeric and between 0 and 1!")
        else:
            raise TypeError("Parameter discounted_reward_goal_limit needs to be numeric!")

        # gamma needs to be numeric between 0 and 1
        if isinstance(gamma,(int,float)):
            if not (0 <= gamma <= 1):
                raise ValueError("Parameter gamma needs to be numeric and between 0 and 1!")
        else:
            raise TypeError("Parameter gamma needs to be numeric!") 
        
        # reward_normalization needs to be boolean
        if not isinstance(reward_normalization,bool):
            raise TypeError("Parameter reward_normalization needs to be boolean!")
        
        # reward_normalization_factor_for_negatives numeric, callable, or "standard"
        if isinstance(reward_normalization_factor_for_negatives,(int,float)):
            if not (reward_normalization_factor_for_negatives > 1):
                raise ValueError("If parameter reward_normalization_factor_for_negatives is numerical, it needs to be bigger than one!")
        elif reward_normalization_factor_for_negatives == "standard":
            pass
        elif isinstance(reward_normalization_factor_for_negatives, types.LambdaType) and reward_normalization_factor_for_negatives.__name__ == "<lambda>":
            pass
        else:
            raise ValueError("Parameter reward_normalization_factor_for_negatives needs to be numerical, lambda function, or 'standard'!")

        # reward_normalization_num_tries needs to be positive integer or -1
        if isinstance(reward_normalization_num_tries,int):
            if not (reward_normalization_num_tries >= 1 or reward_normalization_num_tries == -1):
                raise ValueError("Paramter reward_normalization_num_tries needs to be positive integer or -1!")
        else:
            raise TypeError("Parameter reward_normalization_num_tries needs to be integer!")
        
        # randomization_kwargs needs to be dictionary
        if not isinstance(randomization_kwargs,dict):
            raise TypeError("Parameter randomization_kwargs needs to be a dictionary!")
        
        # if codenames in randomization_kwargs, needs to be dict with str and randomization args
        if "codenames" in randomization_kwargs.keys():
            if isinstance(randomization_kwargs["codenames"],dict):
                for key in randomization_kwargs["codenames"].keys():
                    if isinstance(key,str):
                        if isinstance(randomization_kwargs["codenames"][key],list):
                            if len(randomization_kwargs["codenames"][key]) == 2:
                                if (isinstance(randomization_kwargs["codenames"][key][0],str) and isinstance(randomization_kwargs["codenames"][key][1],dict)):
                                    check_for_allowed_dist(self.randenv_rng,randomization_kwargs["codenames"][key][0],**randomization_kwargs["codenames"][key][1])
                                else:
                                    raise ValueError("Keys in codenames dictionary need to map to lists that have allowed dist name and dist parameters in positions 0 and 1!")
                            elif len(randomization_kwargs["codenames"][key]) == 3:
                                if (isinstance(randomization_kwargs["codenames"][key][0],str) and isinstance(randomization_kwargs["codenames"][key][1],dict)):
                                    check_for_allowed_dist(self.randenv_rng,randomization_kwargs["codenames"][key][0],**randomization_kwargs["codenames"][key][1])
                                else:
                                    raise ValueError("Keys in codenames dictionary need to map to lists that have allowed dist name and dist parameters in positions 0 and 1!")
                            else:
                                raise TypeError("Keys in codenames dictionary need to map to lists of length 2 or three!")
                        else:
                            raise TypeError("Keys in codenames dictionary need to map to lists!")
                    else:
                        raise TypeError("Keys in codenames dictionary need to be strings!")
            else:
                raise TypeError("If codenames is present, it should be a dictionary!")
        
        # if randomize_gridsize_kwargs, needs to be tuple of numbers or random list format and randomize_locations_kwargs needs to be active
        if "randomize_gridsize_kwargs" in randomization_kwargs.keys():
            if "randomize_locations_kwargs" in randomization_kwargs.keys():
                if isinstance(randomization_kwargs["randomize_gridsize_kwargs"],tuple):
                    if len(randomization_kwargs["randomize_gridsize_kwargs"]) == 2:
                        for i in range(2):
                            if isinstance(randomization_kwargs["randomize_gridsize_kwargs"][i],int):
                                if randomization_kwargs["randomize_gridsize_kwargs"][i] < 1:
                                    raise ValueError("If random gridsize is integer, it needs to be positive")
                                elif i == 1:
                                    if randomization_kwargs["randomize_gridsize_kwargs"][0] == 1 and randomization_kwargs["randomize_gridsize_kwargs"][i] == 1:
                                        raise ValueError("Random gridsizes cannot be 1 and 1")
                            elif isinstance(randomization_kwargs["randomize_gridsize_kwargs"][i],list):
                                if len(randomization_kwargs["randomize_gridsize_kwargs"][i]) == 2:
                                    if (isinstance(randomization_kwargs["randomize_gridsize_kwargs"][i][0],str) and isinstance(randomization_kwargs["randomize_gridsize_kwargs"][i][1],dict)):
                                        check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_gridsize_kwargs"][i][0],**randomization_kwargs["randomize_gridsize_kwargs"][i][1])
                                    else:
                                        raise ValueError("Random gridsizes need to have allowed dist name and dist parameters in positions 0 and 1!")
                                elif len(randomization_kwargs["randomize_gridsize_kwargs"][i]) == 3:
                                    if (isinstance(randomization_kwargs["randomize_gridsize_kwargs"][i][0],str) and isinstance(randomization_kwargs["randomize_gridsize_kwargs"][i][1],dict)):
                                        check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_gridsize_kwargs"][i][0],**randomization_kwargs["randomize_gridsize_kwargs"][i][1])
                                    else:
                                        raise ValueError("Random gridsizes need to have allowed dist name and dist parameters in positions 0 and 1!")
                                else:
                                    raise TypeError("Random gridsizes need to be lists of length 2 or three!")
                            else:
                                raise TypeError("If randomize_gridsize_kwargs is present, it should be a tuple containing positive integers or lists!")
                    else:
                        raise ValueError("If randomize_gridsize_kwargs is present, it should be a tuple of length 2!")
                else:
                    raise TypeError("If randomize_gridsize_kwargs is present, it should be a tuple!")
            else:
                raise ValueError("If randomize_gridsize_kwargs is on, randomize_locations_kwargs needs to be on as well!")

        # If randomize_locations_kwargs, see if parameters fit
        if "randomize_locations_kwargs" in randomization_kwargs.keys():
            if "randomize_gridsize_kwargs" in randomization_kwargs.keys() and ("goal" not in randomization_kwargs["randomize_locations_kwargs"] or "start" not in randomization_kwargs["randomize_locations_kwargs"]):
                raise ValueError("If randomization of the gridsize is on, then randomization of start and goal location should be on as well!")
            if isinstance(randomization_kwargs["randomize_locations_kwargs"],dict):
                for key in randomization_kwargs["randomize_locations_kwargs"].keys():
                    if isinstance(key,str):
                        if key in self.state_type_loc:
                            if isinstance(randomization_kwargs["randomize_locations_kwargs"][key],tuple):
                                if len(randomization_kwargs["randomize_locations_kwargs"][key]) == 2:
                                    if isinstance(randomization_kwargs["randomize_locations_kwargs"][key][0],int):
                                        if not randomization_kwargs["randomize_locations_kwargs"][key][0] > 0:
                                            raise ValueError("The number of states to be chosen for the randomization of game locations needs to be either a positive integer or a lists of length 2 or 3 containing the distribution name and parameter dictionary in positions 0 and 1!")
                                    elif isinstance(randomization_kwargs["randomize_locations_kwargs"][key][0],list):
                                        if len(randomization_kwargs["randomize_locations_kwargs"][key][0]) == 2 or len(randomization_kwargs["randomize_locations_kwargs"][key][0]) == 3:
                                            if (isinstance(randomization_kwargs["randomize_locations_kwargs"][key][0][0],str) and isinstance(randomization_kwargs["randomize_locations_kwargs"][key][0][1],dict)):
                                                check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_locations_kwargs"][key][0][0],**randomization_kwargs["randomize_locations_kwargs"][key][0][1])
                                            else:
                                                raise TypeError("The number of states to be chosen for the randomization of game locations needs to be either a positive integer or a lists of length 2 or 3 containing the distribution name and parameter dictionary in positions 0 and 1!")
                                        else:
                                            raise ValueError("The number of states to be chosen for the randomization of game locations needs to be either a positive integer or a lists of length 2 or 3 containing the distribution name and parameter dictionary in positions 0 and 1!")
                                    else:
                                        raise TypeError("The number of states to be chosen for the randomization of game locations needs to be either a positive integer or a lists of length 2 or 3 containing the distribution name and parameter dictionary in positions 0 and 1!")
                                    if isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1],list):
                                        for state in randomization_kwargs["randomize_locations_kwargs"][key][1]:
                                            if isinstance(state,int):
                                                if not state >= 0:
                                                    raise ValueError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                            else:
                                                raise TypeError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                    elif isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1],tuple):
                                        if len(randomization_kwargs["randomize_locations_kwargs"][key][1]) == 2:
                                            if isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1][0],tuple) and isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1][1],tuple):
                                                if len(randomization_kwargs["randomize_locations_kwargs"][key][1][0]) == 2 and len(randomization_kwargs["randomize_locations_kwargs"][key][1][1]) == 2:
                                                    if isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1][0][0],int) and isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1][0][1],int) and isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1][1][0],int) and isinstance(randomization_kwargs["randomize_locations_kwargs"][key][1][1][1],int):
                                                        if not (randomization_kwargs["randomize_locations_kwargs"][key][1][0][0] <= randomization_kwargs["randomize_locations_kwargs"][key][1][1][0] and randomization_kwargs["randomize_locations_kwargs"][key][1][0][1] <= randomization_kwargs["randomize_locations_kwargs"][key][1][1][1]):
                                                            raise ValueError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                                    else:
                                                        raise TypeError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                                else:
                                                    raise TypeError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                            else:
                                                raise TypeError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                        else:
                                            raise ValueError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                    elif randomization_kwargs["randomize_locations_kwargs"][key][1] != "all":
                                        raise TypeError("The locations to be chosen from for the randomization of game locations need to be 'all', a tuple containing tuples delimiting the upper left and lower right area of the states to choose from, or a list of state numbers!")
                                else:
                                    raise ValueError("The number of and locations to be chosen from for the randomization of game locations need to be provided as a tuple of length two")
                            else:
                                raise TypeError("The number of and locations to be chosen from for the randomization of game locations need to be provided as a tuple of length two")
                        else:
                            raise ValueError("If randomize_locations_kwargs is provided, its keys should match the keys of the original game!")
                    else:
                        raise TypeError("If randomize_locations_kwargs is provided, its keys should match the keys of the original game!")
            else:
                raise TypeError("If randomize_locations_kwargs is provided, it should be a dictionary!")

        # If randomize_rewards_kwargs, see if parameters fit
        if "randomize_rewards_kwargs" in randomization_kwargs.keys():
            if isinstance(randomization_kwargs["randomize_rewards_kwargs"],dict):
                for rand_tuple in randomization_kwargs["randomize_rewards_kwargs"].values():
                    if isinstance(rand_tuple,tuple):
                        if len(rand_tuple) == 2:
                            if not isinstance(rand_tuple[0],bool):
                                raise TypeError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                            if rand_tuple[0]:
                                if isinstance(rand_tuple[1],list):
                                    if len(rand_tuple[1]) == 2:
                                        check_for_allowed_dist(self.randenv_rng,rand_tuple[1][0],**rand_tuple[1][1])
                                        for param_value in rand_tuple[1][1].values():
                                            if isinstance(param_value,str):
                                                if "codenames" in randomization_kwargs.keys():
                                                    if not param_value in randomization_kwargs["codenames"]:
                                                        raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                                else:
                                                    raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                            elif isinstance(param_value,list):
                                                for par_val in param_value:
                                                    if isinstance(par_val,str):
                                                        if "codenames" in randomization_kwargs.keys():
                                                            if not par_val in randomization_kwargs["codenames"]:
                                                                raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                                        else:
                                                            raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                    elif len(rand_tuple[1]) == 3:
                                        check_for_allowed_dist(self.randenv_rng,rand_tuple[1][0],**rand_tuple[1][1])
                                        for param_value in rand_tuple[1][1].values():
                                            if isinstance(param_value,str):
                                                if "codenames" in randomization_kwargs.keys():
                                                    if not param_value in randomization_kwargs["codenames"]:
                                                        raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                                else:
                                                    raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                            elif isinstance(param_value,list):
                                                for par_val in param_value:
                                                    if isinstance(par_val,str):
                                                        if "codenames" in randomization_kwargs.keys():
                                                            if not par_val in randomization_kwargs["codenames"]:
                                                                raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                                        else:
                                                            raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                        if isinstance(rand_tuple[1][2],str):
                                            if "codenames" in randomization_kwargs.keys():
                                                if not rand_tuple[1][2] in randomization_kwargs["codenames"]:
                                                    raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                            else:
                                                raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                        elif not isinstance(rand_tuple[1][2],(int,float)):
                                            raise TypeError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                    else:
                                        raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                else:
                                    raise TypeError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                            else:
                                if isinstance(rand_tuple[1],list):
                                    if len(rand_tuple[1]) == 2 or len(rand_tuple[1]) == 3:
                                        pass
                                        check_for_allowed_dist(self.randenv_rng,rand_tuple[1][0],**rand_tuple[1][1])
                                    else:
                                        raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                elif isinstance(rand_tuple[1],(int,float)):
                                    pass
                                elif isinstance(rand_tuple[1],str):
                                    if "codenames" in randomization_kwargs.keys():
                                        if not rand_tuple[1] in randomization_kwargs["codenames"]:
                                            raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                                else:
                                    raise TypeError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                        else:
                            raise ValueError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
                    else:
                        raise TypeError("The keys of randomize_rewards_kwargs should map to a tuple containing a boolean value and a way of randomizing the reward, containing codewords if necessary and providing means if possible!")
            else:
                raise TypeError("If randomize_rewards_kwargs is provided, it should be a dictionary!")

        # If randomize_game_modifications_kwargs, see if parameters fit
        if "randomize_game_modifications" in randomization_kwargs.keys():
            if isinstance(randomization_kwargs["randomize_game_modification_kwargs"],dict):
                if "hovering" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["hovering"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["hovering"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["hovering"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["hovering"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["hovering"][1])
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                if "windy" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["windy"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["windy"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["windy"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["windy"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["windy"][1])                                  
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                if "wind_prob" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["wind_prob"][1])                 
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")                 
                if "wind_dir" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["wind_dir"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["wind_dir"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["wind_dir"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["wind_dir"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["wind_dir"][1])   
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")                               
                if "slippery" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["slippery"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["slippery"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["slippery"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["slippery"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["slippery"][1])                                  
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                if "slip_prob" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["slip_prob"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["slip_prob"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["slip_prob"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["slip_prob"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["slip_prob"][1])                                  
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                if "random_actions" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["random_actions"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["random_actions"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["random_actions"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["random_actions"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["random_actions"][1])                                  
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                if "random_prob" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"]) == 2 or len(randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"]) == 3:
                            check_for_allowed_dist(self.randenv_rng,randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"][0],**randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"][1])                                  
                        else:
                            raise ValueError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise TypeError("Random game modifications need to be provided as a list of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                if "random_vec" in randomization_kwargs["randomize_game_modification_kwargs"].keys():
                    if isinstance(randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"],list):
                        if len(randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"]) == 4:
                            for prob_value in randomization_kwargs["randomize_game_modification_kwargs"]["random_prob"]:
                                if isinstance(prob_value,list):
                                    if len(prob_value) == 2 or len(prob_value) == 3:
                                        check_for_allowed_dist(self.randenv_rng,prob_value[0],**prob_value[1])  
                                    else:
                                        raise ValueError("Random game modifications for random vec need to be provided as a list of length 4 of either positive numerical values or lists of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")                                
                                elif isinstance(prob_value,(int,float)):
                                    if 0 > prob_value:
                                        raise ValueError("Random game modifications for random vec need to be provided as a list of length 4 of either positive numerical values or lists of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")                                      
                                else:
                                    raise ValueError("Random game modifications for random vec need to be provided as a list of length 4 of either positive numerical values or lists of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                        else:
                            raise ValueError("Random game modifications for random vec need to be provided as a list of length 4 of either positive numerical values or lists of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
                    else:
                        raise ValueError("Random game modifications for random vec need to be provided as a list of length 4 of either positive numerical values or lists of length 2 or 3 containing the distribution name and parameters in positions 0 and 1!")
            else:
                raise TypeError("If randomize_game_modification_kwargs is provided, it should be a dictionary!")