Using cuda:0 device
Files already downloaded and verified
Files already downloaded and verified
low rank layers [adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
), adalora.SVDLinear(
  (base_layer): Linear(in_features=768, out_features=768, bias=True)
  (lora_dropout): ModuleDict(
    (dlrt): Identity()
  )
  (lora_A): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_B): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 768x24 (cuda:0)])
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_E): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 24x24 (cuda:0)])
  (ranknum): ParameterDict(  (dlrt): Parameter containing: [torch.cuda.FloatTensor of size 1 (cuda:0)])
)]

====================================================================================================
training with parameters: Namespace(batch_size=128, epochs=30, lr=0.0005, start_cr=0.0, momentum=0.9, wd=0.0, tau=0.1, init_r=32, net_name='vit', opt='adam', dataset_name='cifar10', save_weights=False, device='cuda:0')
Epoch [1/30], Step [300/391], Loss: 0.0659
Accuracy of the network on the test images: 97.86%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [2/30], Step [300/391], Loss: 0.0760
Accuracy of the network on the test images: 98.25%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [3/30], Step [300/391], Loss: 0.1216
Accuracy of the network on the test images: 98.39%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [4/30], Step [300/391], Loss: 0.0427
Accuracy of the network on the test images: 98.41%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [5/30], Step [300/391], Loss: 0.0402
Accuracy of the network on the test images: 98.43%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [6/30], Step [300/391], Loss: 0.0096
Accuracy of the network on the test images: 98.45%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [7/30], Step [300/391], Loss: 0.0133
Accuracy of the network on the test images: 98.35%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [8/30], Step [300/391], Loss: 0.0183
Accuracy of the network on the test images: 98.38%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [9/30], Step [300/391], Loss: 0.0096
Accuracy of the network on the test images: 98.53%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [10/30], Step [300/391], Loss: 0.0386
Accuracy of the network on the test images: 98.45%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [11/30], Step [300/391], Loss: 0.0173
Accuracy of the network on the test images: 98.48%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [12/30], Step [300/391], Loss: 0.0066
Accuracy of the network on the test images: 98.46%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [13/30], Step [300/391], Loss: 0.0464
Accuracy of the network on the test images: 98.44%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [14/30], Step [300/391], Loss: 0.0189
Accuracy of the network on the test images: 98.47%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [15/30], Step [300/391], Loss: 0.0355
Accuracy of the network on the test images: 98.51%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [16/30], Step [300/391], Loss: 0.0204
Accuracy of the network on the test images: 98.44%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [17/30], Step [300/391], Loss: 0.0190
Accuracy of the network on the test images: 98.34%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [18/30], Step [300/391], Loss: 0.0176
Accuracy of the network on the test images: 98.41%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [19/30], Step [300/391], Loss: 0.0027
Accuracy of the network on the test images: 98.47%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [20/30], Step [300/391], Loss: 0.0027
Accuracy of the network on the test images: 98.71%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [21/30], Step [300/391], Loss: 0.0068
Accuracy of the network on the test images: 98.48%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [22/30], Step [300/391], Loss: 0.0134
Accuracy of the network on the test images: 98.39%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [23/30], Step [300/391], Loss: 0.0013
Accuracy of the network on the test images: 98.29%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [24/30], Step [300/391], Loss: 0.0039
Accuracy of the network on the test images: 98.35%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [25/30], Step [300/391], Loss: 0.0039
Accuracy of the network on the test images: 98.45%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [26/30], Step [300/391], Loss: 0.0297
Accuracy of the network on the test images: 98.39%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [27/30], Step [300/391], Loss: 0.0185
Accuracy of the network on the test images: 98.46%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [28/30], Step [300/391], Loss: 0.0092
Accuracy of the network on the test images: 98.39%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [29/30], Step [300/391], Loss: 0.0063
Accuracy of the network on the test images: 98.53%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
Epoch [30/30], Step [300/391], Loss: 0.0046
Accuracy of the network on the test images: 98.54%
ranks: [5, 2, 2, 2, 3, 4, 3, 2, 5, 5, 4, 4, 4, 2, 5, 5, 5, 4, 7, 5, 2, 5, 8, 2, 4, 2, 8, 6, 3, 4, 3, 2, 4, 7, 8, 3]
