{
    "title": "Parameters",
    "description": "PrBO parameters used to set up a PrBO before running",
    "type": "object",
    "properties": {
      "application_name": {
        "description": "Name of the application, this is for printing and file naming purposes.",
        "type": "string",
        "default": "application"
      },
      "log_file": {
        "description": "Change the name of the log file to this name. Log files are very handy in client-server mode where the prints are not displayed.",
        "type": "string",
        "default": "prbo_logfile.log"
      },
      "number_of_cpus": {
        "description": "This is the number of cpus to use. If 0 it means to let PrBO decide (querying the system), otherwise if forces the number of cpus to this number. ",
        "type": "integer",
        "default": 0
      },
      "max_number_of_predictions": {
        "description": "A number greater than 10k. Max number of predictions that the PrBO internal model can perform. We set a max number to limit the execution time of PrBO. Usually a bigger number will give a better accuracy but slower results.",
        "type": "integer",
        "minimum": 10000,
        "default": 1000000,
        "comment": "Is 100K or 1M the right number for one node? Improvement specific to a machine capability: instead of a fixed number we should estimate the memory consumption and the computation time in order to take as much sample as possible."
      },
      "optimization_iterations": {
        "description": "Usually a number between 1 and 10. Max number of optimization iterations that PrBO can internally perform. We set a max number to limit the execution time of PrBO. Usually a bigger number will give a better accuracy but slower results.",
        "type": "integer",
        "minimum": 0,
        "maximum": 5000,
        "default": 50
      },
      "number_of_repetitions": {
        "description": "Usually a number between 1 and 5. Number of times PrBO runs a single sample of the search space. For statistical significance it may be useful to run a sample several time. The mean or the median of the multiple runs is taken by PrBO. Execution time is negatively affected from a high number of repetitions.",
        "type": "integer",
        "minimum": 1,
        "default": 1
      },
      "prbo_mode": {
        "type": "object",
        "description": "One of the ways of using PrBO: default, exhaustive and client-server.",
        "oneOf": [
          {
            "$ref": "#/prbo_modes/default"
          },
          {
            "$ref": "#/prbo_modes/exhaustive"
          },
          {
            "$ref": "#/prbo_modes/client-server"
          }
        ],
        "default": { "mode": "default" }
      },
      "models": {
        "type": "object",
        "description": "PrBO is meant to use multiple models: Random Forest, Gaussian Processes, Parzen Estimators, etc.",
        "oneOf": [
          {
            "$ref": "#/models/randomForest"
          },
          {
            "$ref": "#/models/gaussianProcess"
          }
        ],
        "default": {
          "model": "random_forest"
        }
      },
      "output_image": {
        "type": "object",
        "description": "Info used by the plot script to plot the results of the PrBO search.",
        "$ref": "#/output_image/image",
        "default": {
                    "output_image_pdf_file": "output_pareto.pdf",
                    "image_xlog": false,
                    "image_ylog": false
                  }
      },
      "optimization_objectives": {
        "type": "array",
        "items": { "type": "string" },
        "description": "The names of the objectives PrBO will optimize. PrBO will automatically infer if this application is a mono or multi-objective optimization problem."
      },
      "feasible_output": {
        "type": "object",
        "description": "This the feasible/non feasible output flag, which is the validity or feasibility bit (true, false) of one sample of the space. This is an output of the code being optimized.",
        "$ref": "#/feasible_output/feasible",
        "default": {
                    "name": "Valid",
                    "true_value": "True",
                    "false_value": "False"
                  }
      },
      "timestamp": {
        "type": "string",
        "description": "Name of timestamp variable, this is a float that represents seconds from the linux epoch. This is useful to track the progress of the new samples over time and for comparison with other approaches than PrBO.",
        "default": "Timestamp"
      },
      "evaluations_per_optimization_iteration": {
        "description": "Defines the cap to how many evaluations are done in one optimization iteration.",
        "type": "integer",
        "minimum": 1,
        "default": 1
      },
      "run_directory": {
        "description": "Relative path from where PrBO is launched. The result files will be saved here.",
        "type": "string",
        "default": "."
      },
      "output_data_file": {
        "description": "Output file containing all the points explored by PrBO.",
        "type": "string",
        "default": "output_samples.csv"
      },
      "output_pareto_file": {
        "description": "Output file containing the Pareto points explored by PrBO.",
        "type": "string",
        "default": "output_pareto.csv"
      },
      "design_of_experiment": {
        "type": "object",
        "description": "Before starting the active learning phase, PrBO samples the space: the design of experiment (doe) phase. How many times it samples the space and how is declared here. The doe sampling methods are: random sampling, standard latin hypercube and k latin hypercube. Later on the sliced latin hypercube sampling will be introduced.",
        "$ref": "#/doe_sampling/doe",
        "default": {
                    "doe_type": "random sampling",
                    "number_of_samples": 10
                  }
      },
      "input_parameters" : {
        "type": "object",
        "description": "The input variables that define the search space to explore.",
        "patternProperties": {
            "^[0-9a-zA-Z_-]+$": {
                 "properties": {
                    "prior" : {
                       "oneOf": [
                          {
                            "type" : "string",
                            "enum": ["gaussian", "uniform", "exponential", "decay", "estimate", "tpe"]
                          },
                          {
                            "type" : "array"
                          }
                        ],
                      "default" : "uniform"
                    },
                    "prior_bad" : {
                      "oneOf": [
                         {
                           "type" : "string",
                           "enum": ["gaussian", "uniform", "exponential", "decay", "estimate", "tpe"]
                         },
                         {
                           "type" : "array"
                         }
                       ],
                     "default" : "uniform"
                    },
                    "parameter_type": {
                        "description": "The type of the parameter that is being defined.",
                        "type": "string",
                        "enum": ["ordinal", "categorical", "real", "integer"]
                    },
                    "values": {
                        "type": "array",
                        "items": {
                            "oneOf": [
                                  { "type": "string" },
                                  { "type": "number" }
                                  ]
                        },
                        "description": "The values of this parameter. For real parameters it requires a min and a max. For integer a min and max. For ordinals a list of numbers. For categoricals a list of strings."
                    },
                    "parameter_default" : {
                            "oneOf": [
                                { "type": "string" },
                                { "type": "number" }
                                ]
                    }
                 },
                 "additionalProperties": false,
                 "required": ["parameter_type", "values"]
          }
        },
        "additionalProperties": false,
        "minProperties": 1
      },
      "acquisition_function":{
        "type": "string",
        "description": "which acquisition function to be used in the random scalarizations method: ucb or thompson sampling. Used in random scalarizations only.",
        "enum":["UCB", "TS", "EI"],
        "default": "EI"
      },
      "scalarization_method":{
        "type": "string",
        "description": "which method to use for scalarization. Linear and modified_tchebyshev are implemented as presented in https://arxiv.org/pdf/1805.12168.pdf, while tchebyshev is implemented as presented in https://www.cs.bham.ac.uk/~jdk/parego/ParEGO-TR3.pdf. Used in random scalarizations only.",
        "enum": ["linear", "tchebyshev", "modified_tchebyshev"],
        "default": "tchebyshev"
      },
      "weight_sampling":{
        "type": "string",
        "description": "which method to use to sample the scalarization weights: bounding box or flat. Flat means weights are sampled from a uniform distribution. Bounding box means weights will be sampled so that PrBO prioritizes objective values within the limits specified in 'bounding_box_limits'. Both sampling methods are described in: https://arxiv.org/abs/1805.12168.  Used in random scalarizations only.",
        "enum": ["bounding_box", "flat"],
        "default": "flat"
      },
      "bounding_box_limits":{
        "type": "array",
        "description": "an array of integer with the limits of the bounding boxes, either two elements or two elements per objective. Limits should be given in the same order as optimization_objectives. If only two elements are provided, the same bounds will be used for all objectives. Used in random scalarizations only.",
        "minItems": 2,
        "items": {
          "type": "number"
        },
        "default": [0, 1]
      },
      "optimization_method":{
        "type": "string",
        "description": "Method to use for the multi-objective optimization in PrBO.",
        "enum": ["local_search", "prior_optimization"],
        "default": "prior_optimization"
      },
      "local_search_starting_points":{
        "type": "integer",
        "description": "number of starting points for the multi-start local search used to optimize the acquisition functions.",
        "default": 10
      },
      "local_search_random_points":{
        "type": "integer",
        "description": "number of random points for the multi-start local search used to optimize the acquisition functions.",
        "default": 10000
      },
      "local_search_evaluation_limit":{
        "type": "integer",
        "description": "the maximum number of function evaluations the local search can perform. If -1, the number of function evaluations will not be limited.",
        "default": -1,
        "min": -1
      },
      "scalarization_key":{
        "type": "string",
        "description": "name used by PrBO to refer to the scalarization of the optimization objectives.",
        "default": "scalarization"
      },
      "local_search_scalarization_weights":{
        "type": "array",
        "description": "weights to use in the scalarization of the optimization objectives. Must match the number of objectives. The sum of the weights should be 1, if it is not, PrBO will normalize them to 1.",
        "minItems": 1,
        "items": {
          "type": "number"
        },
        "default": [1]
      },
      "print_parameter_importance":{
        "type": "boolean",
        "description": "whether to print the importance of each input parameter according to the surrogate model.",
        "default": false
      },
      "normalize_inputs":{
        "type": "boolean",
        "description": "whether to normalize inputs to have zero mean and unit variance before fitting model.",
        "default": false
      },
      "normalize_outputs":{
        "type": "boolean",
        "description": "whether to normalize outputs to have zero mean and unit variance before fitting model.",
        "default": true
      },
      "epsilon_greedy_threshold":{
        "type": "number",
        "description": "value for the epsilon in the epsilon greedy component of PrBO.",
        "default": 0.1,
        "minimum": 0,
        "maximum": 1
      },
      "model_posterior_weight":{
        "type": "number",
        "description": "weight given to the model posterior in PrBO's posterior computation. Only used with prior_optimization",
        "default": 10
      },
      "model_good_quantile":{
        "type": "number",
        "description": "defines the quantile for the model's 'good' posterior. A function value will be considered 'good' according to the model if it is lesser or equal to this quantile. Only used with prior_optimization",
        "default": 0.03
      },
      "prior_estimation_file":{
          "type": "string",
          "description": "A csv file containing a set of points to be used for density estimation.",
          "default": "samples.csv"
      },
      "prior_estimation_quantile": {
          "type": "number",
          "description": "A quantile of the best points to be used as 'good points'",
          "default": 1
      },
      "estimate_priors_good": {
        "type": "boolean",
        "description": "Whether to estimate priors for input parameters.",
        "default": false
      },
      "estimate_priors_bad": {
        "type": "boolean",
        "description": "Whether to estimate priors for input parameters.",
        "default": false
      },
      "resume_optimization": {
        "type": "boolean",
        "description": "Whether to resume optimization from a previous state or start a new optimization.",
        "default": false
      },
      "resume_optimization_data": {
        "type": "string",
        "description": "csv data of a previous optimization run to use to resume optimization.",
        "default": "output_samples.csv"
      },
      "bandwidth_parameter": {
        "type": "integer",
        "description": "Parameter used in the bandwidth selection. We use a scott selection, but replace the hardcoded 4 by this parameter",
        "default": 0
      },
      "bandwidth_n_factor": {
        "type": "integer",
        "description": "Parameter used in the bandwidth selection. We use a scott selection, but multiply n by this factor.",
        "default": 100
      },
      "standardize_priors": {
        "type": "boolean",
        "description": "This name is wrong, should be normalize. Whether to standardize priors during optimization. Prior normalization is important for the beta parameter, unnormalized priors may impact the behavior of beta.",
        "default": true
      },
      "prior_limit_estimation_points":{
        "type": "integer",
        "description": "number of random points to estimate the prior limits. The prior limits will be used to standardize the prior during optimization. The limits are updated as new prior values are found, but a initial estimation of the limits prevents the local search from using widely incorrect values in the first iteration.",
        "default": 10000
      },
      "posterior_computation_lower_limit":{
        "type": "number",
        "description": "The minimum allowed for model_bad and both prior_good and model_bad. PrBO will normalize the priors using this value as the minimum. model_bad will be set to this value if lower.",
        "default": 0.00000001
      },
      "tpe_prior_means":{
        "type": "array",
        "description":	"Means for the tpe prior gaussians. Array must have size 1 or match the number of input parameters. If only one element is passed, the same mean will be used for all points. TPE prior can only be used with real parameters for now.",
        "default": [0]
      },
      "tpe_prior_stds":{
        "type": "array",
        "description": "Standard deviations for the tpe gaussians. Array must have size 1 or match the number of input parameters. If only one element is passed, the same std will be used for all points. If -1, the std will be half of the input parameter's range. TPE prior can only be used with real parameters for now.",
        "default": [-1]
      },
      "tpe_bad_prior_means":{
        "type": "array",
        "description":	"Means for the tpe bad prior gaussians. Array must have size 1 or match the number of input parameters. If only one element is passed, the same mean will be used for all parameters. TPE prior can only be used with real parameters for now.",
        "default": [0]
      },
      "tpe_bad_prior_stds":{
        "type": "array",
        "description": "Standard deviations for the tpe bad prior gaussians. Array must have size 1 or match the number of input parameters. If only one element is passed, the same std will be used for all parameters. If -1, the std will be half of the input parameter's range. TPE prior can only be used with real parameters for now.",
        "default": [-1]
      },
      "acquisition_function_optimizer":{
        "type": "string",
        "description": "Which method to use to optimize the acquisition function. Posterior sampling can only be used with PrBO.",
        "enum":["local_search"],
        "default": "local_search"
      }
    },
    "additionalProperties": false,
    "required": ["optimization_objectives"],



    "prbo_modes": {
        "exhaustive": {
            "properties": {
                "mode": { "enum": [ "exhaustive" ] },
                "exhaustive_search_file": {
                    "description": "File containing the exhaustive search. For interesting problems this is usually not available because the space is usually too big. Example: exhaustive_search_file.csv",
                    "type": "string"
                }
            },
            "required": ["mode", "exhaustive_search_file"],
            "additionalProperties": false
        },
        "client-server": {
            "properties": {
                "mode": { "enum": [ "client-server" ] }
            },
            "required": ["mode"],
            "additionalProperties": false
        },
        "default": {
            "properties": {
                "mode": { "enum": [ "default" ] }
            },
            "required": ["mode"],
            "additionalProperties": false
        }
    },
    "models": {
        "randomForest": {
            "properties": {
                "model": { "enum": [ "random_forest" ] },
                "number_of_trees": {"description": "Number of trees in the forest.", "type": "integer", "minimum": 1, "maximum": 1000, "default": 10},
                "max_features": {"description": "Percentage of the features to be used when fitting the forest.", "type": "number", "minimum": 0, "maximum": 1, "default": 0.5},
                "bootstrap": {"description": "Whether to use bagging when fitting the forest.", "type": "boolean", "default": false},
                "min_samples_split": {"description": "Minimum number of samples required to split a node.", "type": "integer", "minimum": 2, "default": 5}
            },
            "required": ["model"]
        },
        "gaussianProcess": {
          "properties": {
              "model": { "enum": [ "gaussian_process" ] }
          },
          "required": ["model"]
      }
    },
    "output_image": {
        "image": {
            "properties": {
                "output_image_pdf_file": {
                  "description": "Output image containing the Pareto and the exploration of PrBO.",
                  "type": "string",
                  "default": "output_image.pdf"
                },
                "optimization_objectives_labels_image_pdf": {
                  "type": "array",
                  "items": { "type": "string" },
                  "description": "The labels of the objectives PrBO will optimize. These are used in the plot script."
                },
                "image_xlog": {
                  "type": "boolean",
                  "default": false,
                  "description": "The x axis of the image will be plot with a log scale if set to true."
                },
                "image_ylog": {
                  "type": "boolean",
                  "default": false,
                  "description": "The y axis of the image will be plot with a log scale if set to true."
                },
                "objective_1_max": {
                  "type": "integer",
                  "description": "This max value if present enables the plot to show axis 1 as a percentage. The value is used to compute the percentage."
                },
                "objective_2_max": {
                  "type": "integer",
                  "description": "This max value if present enables the plot to show axis 2 as a percentage. The value is used to compute the percentage."
                }
            },
            "additionalProperties": false
        }
    },
    "doe_sampling": {
        "doe": {
            "properties": {
                "doe_type": {
                    "enum": [
                          "random sampling",
                          "standard latin hypercube",
                          "k latin hypercube",
                          "grid_search"
                        ],
                    "type": "string",
                    "default": "random sampling"
                },
                "number_of_samples": {
                  "description": "The number of samples during the design of experiment phase.",
                  "type": "integer",
                  "default": 10
                }
            },
            "additionalProperties": false
        }
    },
    "feasible_output": {
        "feasible": {
            "properties": {
                "name": {
                    "description": "Name of the validity bit. Example: 'Valid'.",
                    "type": "string",
                    "default": "Valid"
                },
                "true_value": {
                    "description": "The value that indicates that the sample is valid. Example 1: true. Example 2: 1. Example 3: True.",
                    "default": "true"
                },
                "false_value": {
                    "description": "The value that indicates that the sample is non valid. Example 1: false. Example 2: 0. Example 3: False.",
                    "default": "false"
                },
                "enable_feasible_predictor": {
                    "description": "Enables a classifier (the predictor) that will predict which samples of the space are feasible (i.e. valid) samples. This in turn helps to focus the search on areas that are feasible optimizing the number of samples that are actually run. This field has a negative impact on the speed of PrBO but a positive impact on the final Pareto result.",
                    "type": "boolean",
                    "default": false
                },
                "enable_feasible_predictor_grid_search_on_recall_and_precision": {
                    "description": "Enables a grid search cross-validation on the classifier (the predictor). This is useful for dev purposes to see if the classifier is classifying correctly the samples. An external dataset has to be provided (in the json field feasible_predictor_grid_search_validation_file) to run the cross-validation.",
                    "type": "boolean",
                    "default": false
                },
                "feasible_predictor_grid_search_validation_file": {
                    "description": "Provides the cross-validation dataset enable_feasible_predictor_grid_search_on_recall_and_precision filed of the json.",
                    "type": "string",
                    "default": "apps_classification_test_set/BlackScholes.csv"
                }
            },
            "additionalProperties": false
        }
    }
}
