import torch
import math
# from data_utils import generate_Cifar
from train_utils import train, noisy_train, test
from init_utils import galore_parse_args, task_init, logger_init
# from LPSGD import LPSGD
# from LMSSGD import LMSSGD
from GaLoreLPSGD import GaLoreLPSGD
from galore_utils import setup_Galore
from fastDP import PrivacyEngine
#PrivacyEngine_Distributed_extending,PrivacyEngine_Distributed_Stage_2_and_3
# from opacus.accountants.utils import get_noise_multiplier
# from opacus.validators import ModuleValidator
import argparse
import warnings
# import timm
# import os
# from datetime import datetime
# import wandb


if __name__ == '__main__':
    warnings.filterwarnings("ignore")
    parser = argparse.ArgumentParser(description='GaLore LP DPSGD')
    parser = galore_parse_args(parser)
    args = parser.parse_args()
    train_dl, test_dl, model, device, sample_size, acc_step, noise = task_init(args)
    log_file = logger_init(args, noise, sample_size//args.mnbs,type=args.log_type)

    with open(args.coef_file, "r") as f:
        coefs = f.readlines()
        a = [float(i) for i in coefs[0].split(",") if i.strip()]
        b = [float(i) for i in coefs[1].split(",") if i.strip()]

    galore_parameters = setup_Galore(model, args)

    if args.algo == "sgd":
        optimizer = GaLoreLPSGD(galore_parameters, lr=args.lr, a=a, b=b)
    elif args.algo == 'adam':
        optimizer = GaLoreLPSGD(galore_parameters, lr=args.lr, a=a, b=b, c=0.999)
    else:
        print(args.algo)
        raise RuntimeError("Unknown Algorithm!")
    
    # from torch.optim import lr_scheduler
    from train_utils import CosineAnnealingWarmupRestarts
    lrscheduler = CosineAnnealingWarmupRestarts(optimizer, max_lr=args.lr, first_cycle_steps= sample_size//args.bs * args.epoch//2, warmup_steps= (sample_size*args.epoch)//(args.bs*20))
    
    # from torch.optim import lr_scheduler
    # lrscheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=args.lr, steps_per_epoch=sample_size//args.bs,
    #                                     epochs=args.epoch, pct_start=0.1, cycle_momentum=False, div_factor=20)
    
    criterion = torch.nn.CrossEntropyLoss(reduction='mean')
    if args.clipping:
        privacy_engine = PrivacyEngine(model, noise_multiplier=noise, numerical_stability_constant=1e-3, grad_accum_steps = acc_step, sample_size= sample_size, batch_size=args.bs, epochs= args.epoch, per_sample_clip=args.clipping, torch_seed_is_fixed=False, clipping_fn=args.clipping_fn, clipping_style=args.clipping_style,max_grad_norm=args.clipping_norm)
        privacy_engine.attach(optimizer)

    use_manual_noise = not args.clipping and noise>0
    for E in range(args.epoch):
        # if args.no_record:
        if use_manual_noise:
            noisy_train(model, train_dl, optimizer, criterion, log_file, device = device, epoch = E, noise = args.noise/args.bs, log_frequency = args.log_freq, acc_step = acc_step,lr_scheduler=lrscheduler)
        else:
            train(model, train_dl, optimizer, criterion, log_file, device = device, epoch = E, log_frequency = args.log_freq, acc_step = acc_step, lr_scheduler=lrscheduler)
        test(model, test_dl, criterion, log_file, device = device, epoch = E)
        
        
