from utils import ShadowModelDeployer, DHMRMeasurer
from grpos import GRPOS_Trainer
from transformers import AutoModelForCausalLM, AutoTokenizer

shadow_root = "./shadow_models"
device = "cuda"

shadow_deployer = ShadowModelDeployer(shadow_root, device)

dhmr_measurer = DHMRMeasurer()

policy_model = AutoModelForCausalLM.from_pretrained("").to(device)
ref_model = AutoModelForCausalLM.from_pretrained("").to(device)
ref_model.eval()

trainer = GRPOS_Trainer(policy_model, ref_model, shadow_deployer, dhmr_measurer, config)

train_dataset = GRPODataset()  

trainer.train(train_dataset)