{
    "pretrained_model": "configs/stable_diffusion_v1_4.pth",
    "model": {
            "unet_config": {
                "image_size": 64,
                "in_channels": 4,
                "out_channels": 4,
                "model_channels": 320,
                "attention_resolutions": [4,2,1],
                "num_res_blocks": 2,
                "channel_mult": [1,2,4,4],
                "num_heads": 8,
                "use_spatial_transformer": true,
                "transformer_depth": 1,
                "context_dim": 768,
                "legacy": false,
                "use_fp16": true
            },
            "first_stage_config": {
                    "use_fp16": true,
                    "embed_dim": 4,
                    "ddconfig": {
                        "double_z": true,
                        "z_channels": 4,
                        "resolution": 512,
                        "in_channels": 3,
                        "out_ch": 3,
                        "ch": 128,
                        "ch_mult": [1,2,4,4],
                        "num_res_blocks": 2,
                        "attn_resolutions": [],
                        "dropout": 0.0
                    },
                    "lossconfig": {
                        "type": "torch.nn.Identity",
                        "params": {}
                    }
                },
            "cond_stage_config": {},
            "num_timesteps_cond": 1,
            "image_size": 64,
            "channels": 4,
            "cond_stage_key":"txt",
            "timesteps": 1000,
            "cond_stage_trainable": false,
            "log_every_t": 200,
            "conditioning_key": "crossattn",
            "first_stage_key": "image",
            "use_fp16": true,
            "scale_factor": 0.18215
        },
    "ddim": {
        "S": 50,
        "unconditional_guidance_scale": 7.5,
        "eta": 0.0,
        "shape": [4, 64, 64]
    }
}