# Dataset
ds_name: hall2018
data_csv_path: ./raw_data/hall.csv
#./raw_data/Hall2018_processed_akhil.csv
index_col: -1
observation_interval: 5min

# Columns
column_definition:
  - name: id
    data_type: categorical
    input_type: id
  - name: time
    data_type: date
    input_type: time
  - name: gl
    data_type: real_valued
    input_type: target
  - name: Age
    data_type: real_valued
    input_type: static_input
  - name: BMI
    data_type: real_valued
    input_type: static_input
  - name: A1C
    data_type: real_valued
    input_type: static_input
  - name: FBG
    data_type: real_valued
    input_type: static_input
  - name: ogtt.2hr
    data_type: real_valued
    input_type: static_input
  - name: insulin
    data_type: real_valued
    input_type: static_input
  - name: hs.CRP
    data_type: real_valued
    input_type: static_input
  - name: Tchol
    data_type: real_valued
    input_type: static_input
  - name: Trg
    data_type: real_valued
    input_type: static_input
  - name: HDL
    data_type: real_valued
    input_type: static_input
  - name: LDL
    data_type: real_valued
    input_type: static_input
  - name: mean_glucose
    data_type: real_valued
    input_type: static_input
  - name: sd_glucose
    data_type: real_valued
    input_type: static_input
  - name: range_glucose
    data_type: real_valued
    input_type: static_input
  - name: min_glucose
    data_type: real_valued
    input_type: static_input
  - name: max_glucose
    data_type: real_valued
    input_type: static_input
  - name: quartile.25_glucose
    data_type: real_valued
    input_type: static_input
  - name: median_glucose
    data_type: real_valued
    input_type: static_input
  - name: quartile.75_glucose
    data_type: real_valued
    input_type: static_input
  - name: mean_slope
    data_type: real_valued
    input_type: static_input
  - name: max_slope
    data_type: real_valued
    input_type: static_input
  - name: number_Random140
    data_type: real_valued
    input_type: static_input
  - name: number_Random200
    data_type: real_valued
    input_type: static_input
  - name: percent_below.80
    data_type: real_valued
    input_type: static_input
  - name: se_glucose_mean
    data_type: real_valued
    input_type: static_input
  - name: numGE
    data_type: real_valued
    input_type: static_input
  - name: mage
    data_type: real_valued
    input_type: static_input
  - name: j_index
    data_type: real_valued
    input_type: static_input
  - name: IQR
    data_type: real_valued
    input_type: static_input
  - name: modd
    data_type: real_valued
    input_type: static_input
  - name: distance_traveled
    data_type: real_valued
    input_type: static_input
  - name: coef_variation
    data_type: real_valued
    input_type: static_input
  - name: number_Random140_normByDays
    data_type: real_valued
    input_type: static_input
  - name: number_Random200_normByDays
    data_type: real_valued
    input_type: static_input
  - name: numGE_normByDays
    data_type: real_valued
    input_type: static_input
  - name: distance_traveled_normByDays
    data_type: real_valued
    input_type: static_input
  - name: diagnosis
    data_type: categorical
    input_type: static_input
  - name: freq_low
    data_type: real_valued
    input_type: static_input
  - name: freq_moderate
    data_type: real_valued
    input_type: static_input
  - name: freq_severe
    data_type: real_valued
    input_type: static_input
  - name: glucotype
    data_type: categorical
    input_type: static_input
  - name: Height
    data_type: real_valued
    input_type: static_input
  - name: Weight
    data_type: real_valued
    input_type: static_input
  - name: Insulin_rate_dd
    data_type: real_valued
    input_type: static_input
  - name: perc_cgm_prediabetic_range
    data_type: real_valued
    input_type: static_input
  - name: perc_cgm_diabetic_range
    data_type: real_valued
    input_type: static_input
  - name: SSPG
    data_type: real_valued
    input_type: static_input

# Drop
drop: 
  rows: 
    - 57309
  columns: null

# NA values abbreviation
nan_vals: NA

# Interpolation parameters
interpolation_params:
  gap_threshold: 30 # in minutes
  min_drop_length: 192 # number of points

# Splitting parameters
split_params:
  test_percent_subjects: 0.1
  length_segment: 192
  random_state: 0

# Encoding parameters
encoding_params:
  date: 
    - year
    - month
    - day
    - hour
    - minute

# Scaling parameters
scaling_params:
  scaler: None
  
# Model params
max_length_input: 144
length_pred: 12

linreg:
  in_len: 84

linreg_covariates:
  in_len: 60

tft:
  in_len: 96
  max_samples_per_ts: 50
  hidden_size: 160
  num_attention_heads: 2
  dropout: 0.12663651999137013
  lr: 0.0003909069464830342
  batch_size: 48
  max_grad_norm: 0.42691316697261855

tft_covariates:
  in_len: 132
  max_samples_per_ts: 50
  hidden_size: 64
  num_attention_heads: 3
  dropout: 0.1514203549391074
  lr: 0.002278316839625157
  batch_size: 32
  max_grad_norm: 0.6617473571712074

xgboost:
  in_len: 60
  lr: 0.515
  subsample: 0.9
  min_child_weight: 3.0
  colsample_bytree: 0.9
  max_depth: 6
  gamma: 2.0
  alpha: 0.099
  lambda_: 0.134
  n_estimators: 256

xgboost_covariates:
  in_len: 120
  lr: 0.17200000000000001
  subsample: 0.7
  min_child_weight: 2.0
  colsample_bytree: 0.9
  max_depth: 6
  gamma: 1.0
  alpha: 0.167
  lambda_: 0.017
  n_estimators: 320

transformer:
  in_len: 144
  max_samples_per_ts: 200
  d_model: 64
  n_heads: 4
  num_encoder_layers: 1
  num_decoder_layers: 1
  dim_feedforward: 96
  dropout: 0.014744750937083516
  lr: 0.00035186058101597097
  batch_size: 48
  lr_epochs: 14
  max_grad_norm: 0.43187285340924153

transformer_covariates:
  in_len: 132
  max_samples_per_ts: 150
  d_model: 64
  n_heads: 4
  num_encoder_layers: 1
  num_decoder_layers: 3
  dim_feedforward: 192
  dropout: 0.1260638882066075
  lr: 0.0006944648317764303
  batch_size: 48
  lr_epochs: 4
  max_grad_norm: 0.22914229299130273

nhits:
  in_len: 144
  max_samples_per_ts: 100
  kernel_sizes: 4
  dropout: 0.046869296882493555
  lr: 0.00011524084800602483
  batch_size: 48
  lr_epochs: 2

nhits_covariates:
  in_len: 120
  max_samples_per_ts: 50
  kernel_sizes: 5
  dropout: 0.18679300209273494
  lr: 0.0004763622305085654
  batch_size: 48
  lr_epochs: 4

gluformer:
  in_len: 96
  max_samples_per_ts: 200
  d_model: 384
  n_heads: 4
  d_fcn: 1024
  num_enc_layers: 1
  num_dec_layers: 1

latentode:
  in_len: 48
  max_samples_per_ts: 100
  latents: 20
  rec_dims: 40
  rec_layers: 3
  gen_layers: 3
  units: 100
  gru_units: 100