network: ResnetEncoder
network_kwargs:
    pretrained: false
    freeze: false
    remove_layer_num: 4
    no_stride: false
    language_fusion: 'film'
