# Meta-RL for Quantum Control - Experiment Configuration

# Random Seed (for reproducibility)
seed: 42

# Quantum System
psd_model: 'one_over_f'  # 'one_over_f', 'lorentzian', 'double_exp'
horizon: 1  # Total evolution time (arbitrary units)
target_gate: 'pauli_x'  # 'hadamard', 'pauli_x', 'pauli_y'
noise_type: 'frequency'

# Task Distribution
#(0.1, 4.0), "A": (100, 1e5), "omega_c": (0, 800) 
task_dist_type: 'uniform'
alpha_range: [0.1, 2]      # Spectral exponent range
A_range: [0.01, 10]         # FIXED: Increased to [0.01, 0.1] for better task diversity and meta-learning
omega_c_range: [1, 300]    # Cutoff frequency range
num_qubits: 1

# Policy Network
task_feature_dim: 3  # FIXED: Changed from 3 to 4 to match include_model=True (alpha, A, omega_c, model_type)
hidden_dim: 128 
n_hidden_layers: 2
n_segments: 60
n_controls: 2
output_scale: 1.0  # FIXED: Increased from 1.0 to allow stronger control signals for better gate fidelity
activation: 'tanh'

# MAML Hyperparameters
inner_lr: 0.01  # FIXED: Further reduced from 0.005 for stable gradients with differentiable simulator
inner_steps: 5   # FIXED: Increased from 3 to allow sufficient adaptation (meta-learning needs this!)
meta_lr: 0.001
first_order: true  # FIXED: Use FOMAML (First-Order MAML) to avoid gradient issues with complex quantum simulation

# Training
n_iterations: 2000 
tasks_per_batch: 32  # FIXED: Increased from 4 for more stable meta-gradient estimates, should be 16  
n_support: 10 #10 
n_query: 10 #10 
log_interval: 10
val_interval: 10
val_tasks: 20  

#New parameters 
drift_strength: 0.1 
sequence: 'ramsey'
model_types: ['one_over_f']
psd_model: None 
model_probs: [1.0]
dt_training: 0.01 
use_rk4_training: True
omega0: 1 
Gamma_h: 100 
 

# Checkpointing
save_dir: 'checkpoints'
