{"type": "Algorithm", "checkpoint_version": "1.1", "format": "cloudpickle", "state_file": "data/logs/tomato/rhard/ORPO/true/model_512-512-512-512/state-action/sqrt_chi2-0.0/seed_0/2025-07-28_05-22-32/checkpoint_000300_with_temp2.0/algorithm_state.pkl", "policy_ids": ["current"], "ray_version": "2.7.1", "ray_commit": "9f07c12615958c3af3760604f6dcacc4b3758a47"}