[
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images, serving as a strong baseline across classical benchmarks.' tool good at evaluating the 'Motion Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/00445.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'darken' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: blurs, color distortions, compression, noise, brightness changes, spatial distortions, and sharpness/contrast variations.",
      "LPIPS: This is a Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the top 3 for the following distortions: brightness changes, jitter, sharpness, contrast change, color shift.",
      "FSIM: This is a Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. This model is among the top 3 for the following distortions: JPEG compression, color diffusion, color quantization, JPEG2000 compression.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: blurs, color distortions, compression, noise, brightness changes, spatial distortions, and sharpness/contrast variations.",
    "image_path": [
      "waterloo_select/ref_md/00636.bmp",
      "waterloo_select/md/00636_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'pixelate' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/01335_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.' tool appropriate for evaluating sharpness in the image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/10187102065.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool suitable for assessing aesthetic quality of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/00069.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Does the 'WaDIQaM_NR: Applies a deep neural network with weighted average pooling to perform NR-IQA by aggregating spatially varying local quality scores.' tool effectively evaluate 'Color Distortions' in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/I80_12_01.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.' highly suitable for evaluating color distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/08d853e1ae7db96846f3bcaeb6587d.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool would be most appropriate for evaluating geometric distortions and perceptual degradation in the image?",
    "candidates": [
      "BRISQUE: This NR-IQA model captures spatial statistical deviations in natural images and serves as a strong baseline across classical benchmarks.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
    "image_path": [
      "waterloo_select/micbenc_select/AttnGAN_normal_254.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'noise' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00231_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00027_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'blur' distortion of this image?",
    "candidates": [
      "DISTS: A structural-texture hybrid similarity model, excels in handling Gaussian blur.",
      "GMSD: Measures image quality by capturing local gradient magnitude deviations.",
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework for quality estimation.",
      "PieAPP: A pairwise preference-based FR-IQA that aligns with subjective quality judgments."
    ],
    "correct_answer": "DISTS: A structural-texture hybrid similarity model, excels in handling Gaussian blur.",
    "image_path": [
      "waterloo_select/ref_md/00519.bmp",
      "waterloo_select/md/00519_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00178_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most appropriate for assessing the motion blur present in this image?",
    "candidates": [
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.",
      "UNIQIE: An uncertainty-aware NR-IQA model effective for JPEG compression, lens blur, and spatial distortions.",
      "TReS: A transformer-based blind IQA model focusing on global and local perceptual features."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model effective for JPEG compression, lens blur, and spatial distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/63.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool would effectively assess the perceived sharpness in the image?",
    "candidates": [
      "NIQE: Opinion-unaware metric based on statistical regularities.",
      "MUSIQ: Multi-scale transformer processing images at various resolutions.",
      "NIMA: Predicts aesthetic and technical quality using human ratings.",
      "BRISQUE: Captures spatial statistical deviations in natural images."
    ],
    "correct_answer": "MUSIQ: Multi-scale transformer processing images at various resolutions.",
    "image_path": [
      "waterloo_select/micbenc_select/JPEGImages__2008_003378.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'pixelate' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/01886_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is ideal for assessing color distortions evident in an image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
    "image_path": [
      "waterloo_select/micbenc_select/192.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is suitable for evaluating motion blur in an image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/281.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'contrast_strengthen' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments. It excels in handling distortions such as brightness changes, jitter, contrast variations, and color shifts.",
      "FSIM: A widely used FR-IQA model based on low-level feature similarity, such as phase congruency and gradient magnitude. It is particularly effective for JPEG compression, color quantization, color diffusion, and JPEG2000 compression.",
      "PieAPP: A pairwise preference-based FR-IQA model that learns perceptual differences directly from human annotations. Designed to align with subjective quality judgments."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
    "image_path": [
      "waterloo_select/ref_md/03036.bmp",
      "waterloo_select/md/03036_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.' tool effective for handling motion blur observed in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/01026.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.' tool relevant for evaluating lens blur in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/bach-537813_13_04.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'brighten' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00730_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool relevant for assessing spatial distortions in the background?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/00469.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.' tool suitable for assessing spatial distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/10358252894.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool good at evaluating blurring distortions in the image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/10eba82e82686abe792dd84b1c40c37a.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'contrast_strengthen' distortion of this image?",
    "candidates": [
      "FSIM: A widely used FR-IQA model based on low-level feature similarity, effective for JPEG compression, color quantization, color diffusion, and JPEG2000 compression.",
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments, excels in handling brightness changes, jitter, contrast variations, and color shifts.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. Highly effective for JPEG and JPEG2000 compression.",
      "GMSD: Measures image quality by capturing local gradient magnitude deviations, effective in detecting visually important structural distortions."
    ],
    "correct_answer": "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments, excels in handling brightness changes, jitter, contrast variations, and color shifts.",
    "image_path": [
      "waterloo_select/ref_md/03896.bmp",
      "waterloo_select/md/03896_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'saturate_strengthen' distortion of this image?",
    "candidates": [
      "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
      "AHIQ: An attention-guided FR-IQA model tailored to assess distortions commonly introduced by generative models (e.g., GANs). It integrates hybrid mechanisms to improve robustness under complex generation artifacts.",
      "PieAPP: This is Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression."
    ],
    "correct_answer": "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
    "image_path": [
      "waterloo_select/ref_md/00752.bmp",
      "waterloo_select/md/00752_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that applies weighted average pooling to fuse predictions from reference and distorted images for quality estimation.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments.",
      "FSIM: FSIM: This is Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception."
    ],
    "correct_answer": "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
    "image_path": [
      "waterloo_select/ref_md/01284.bmp",
      "waterloo_select/md/01284_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that applies weighted average pooling to fuse predictions from reference and distorted images for quality estimation.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments.",
      "FSIM: FSIM: This is Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception."
    ],
    "correct_answer": "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
    "image_path": [
      "waterloo_select/ref_md/01284.bmp",
      "waterloo_select/md/01284_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool would be most applicable for evaluating motion blur in this image?",
    "candidates": [
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/00332.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' suitable for evaluating spatial statistical deviations caused by the distortions present in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/000000448537_elastic_transform_4.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'pixelate' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/01628.bmp",
      "waterloo_select/md/01628_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00779_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/14353e3d2ed48f316fa3705abe41b64.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is *best* at evaluating:- Blurs (lens blur, motion blur)- Color distortions (color diffusion, color shift, color quantization, color saturation)- Compression (JPEG2000 and JPEG)- Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact)- Brightness change (brighten, darken, mean shift)- Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block)- Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
      "FSIM: This is Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. . This model is among the **top 3** for the following distortions: JPEG compression, Color diffusion, Color quantization, JPEG2000 compression.",
      "DISTS: A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. It performs particularly well for Gaussian blur.",
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that applies weighted average pooling to fuse predictions from reference and distorted images for quality estimation."
    ],
    "correct_answer": "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is *best* at evaluating:- Blurs (lens blur, motion blur)- Color distortions (color diffusion, color shift, color quantization, color saturation)- Compression (JPEG2000 and JPEG)- Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact)- Brightness change (brighten, darken, mean shift)- Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block)- Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
    "image_path": [
      "waterloo_select/ref_md/00752.bmp",
      "waterloo_select/md/00752_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'oversharpen' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/01225_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited to evaluate the presence of noise in this image?",
    "candidates": [
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction.",
      "TReS: A transformer-based blind IQA model that incorporates relative ranking and consistency learning to capture both global and local perceptual features.",
      "CLIPIQA: Leveraging CLIP embeddings to measure semantic fidelity and perceptual degradation. Suitable for content-aware quality estimation, but lacks top-tier performance on benchmark distortions."
    ],
    "correct_answer": "TReS: A transformer-based blind IQA model that incorporates relative ranking and consistency learning to capture both global and local perceptual features.",
    "image_path": [
      "waterloo_select/micbenc_select/04928.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Motion blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/00142.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'contrast_strengthen' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions. It performs best on classical distortions such as contrast variations.",
      "FSIM: A FR-IQA model based on low-level feature similarity, effective for JPEG compression and color quantization.",
      "DISTS: A FR-IQA model that balances sensitivity to structural degradations.",
      "PieAPP: A FR-IQA model that learns perceptual differences directly from human annotations."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions. It performs best on classical distortions such as contrast variations.",
    "image_path": [
      "waterloo_select/ref_md/00884.bmp",
      "waterloo_select/md/00884_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/02356.bmp",
      "waterloo_select/md/02356_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'brighten' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/04439_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/06635.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.' tool good at evaluating the 'Color saturation distortion' of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/82.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'NIQE: An opinion-unaware metric based on statistical regularities in natural images.' tool suitable for assessing the blur in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/15f44ad78c1605d747d13fb08355f9.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool optimal for evaluating motion artifacts in the image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/14528d7e5372986a3152889c26032c.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: blurs, color distortions, compression, noise, brightness change, spatial distortions, sharpness, and contrast.",
      "LPIPS: This is a Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the top 3 for the following distortions: brighten, darken, mean shift, jitter, high sharpen, contrast change, color shift, color saturation.",
      "FSIM: This is a Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. This model is among the top 3 for the following distortions: JPEG compression, color diffusion, color quantization, JPEG2000 compression.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: blurs, color distortions, compression, noise, brightness change, spatial distortions, sharpness, and contrast.",
    "image_path": [
      "waterloo_select/ref_md/01281.bmp",
      "waterloo_select/md/01281_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/13cb4c358293f5f9173610d920ed96a2.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/15f44ad78c1605d747d13fb08355f9.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'oversharpen' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/03525_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is most suitable for evaluating potential perceptual degradation in blurred images?",
    "candidates": [
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation, facilitating robust prediction without reference supervision.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks."
    ],
    "correct_answer": "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
    "image_path": [
      "waterloo_select/micbenc_select/3338894282.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments. It excels in handling distortions such as brightness changes, jitter, contrast variations, and color shifts.",
      "FSIM: A widely used FR-IQA model based on low-level feature similarity, such as phase congruency and gradient magnitude. It is particularly effective for JPEG compression, color quantization, color diffusion, and JPEG2000 compression.",
      "PieAPP: A pairwise preference-based FR-IQA model that learns perceptual differences directly from human annotations. Designed to align with subjective quality judgments."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
    "image_path": [
      "waterloo_select/ref_md/01615.bmp",
      "waterloo_select/md/01615_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating pixelate distortions among others.",
      "VSI: Integrates visual saliency into FR-IQA by emphasizing regions likely to draw human attention. Offers enhanced perceptual alignment for saliency-sensitive distortions.",
      "MS-SSIM: An extension of SSIM that computes multi-scale structural similarity. Ideal for comprehensive image structure assessment across resolutions.",
      "GMSD: Measures image quality by capturing local gradient magnitude deviations. Effective in detecting visually important structural distortions."
    ],
    "correct_answer": "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating pixelate distortions among others.",
    "image_path": [
      "waterloo_select/ref_md/02631.bmp",
      "waterloo_select/md/02631_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool would be least suitable for evaluating semantic content quality in this image with motion blur?",
    "candidates": [
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding and perceptual rule learning.",
      "DBCNN: A bilinear CNN architecture for extracting and fusing local-global representations.",
      "CLIPIQA: An NR-IQA method leveraging CLIP embeddings for semantic fidelity."
    ],
    "correct_answer": "CLIPIQA: An NR-IQA method leveraging CLIP embeddings for semantic fidelity.",
    "image_path": [
      "waterloo_select/micbenc_select/63.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'pixelate' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/01217_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool would be least effective at assessing color brightness variations in the butterfly's image?",
    "candidates": [
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation."
    ],
    "correct_answer": "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
    "image_path": [
      "waterloo_select/micbenc_select/10397166423.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'NIQE: An opinion-unaware metric based on statistical regularities in natural images, frequently used in blind IQA pipelines due to its model-free nature.' effective for assessing color quantization in the image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/08858.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'compression' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/04244.bmp",
      "waterloo_select/md/04244_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/10641.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'CLIPIQA: An NR-IQA method leveraging CLIP embeddings to measure semantic fidelity and perceptual degradation, suitable for content-aware quality estimation.' tool primarily focused on evaluating 'darken' distortion in images?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/sd/04671_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00774_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most suitable for evaluating the motion blur distortion evident in the image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios, offering robust perceptual quality prediction via hierarchical feature fusion.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation, facilitating robust prediction without reference supervision.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/14c2c41060a77c169322d3caca9ea14c.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'brighten' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/02411.bmp",
      "waterloo_select/md/02411_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/00069.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'darken' distortion of this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/03101_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/00469.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'darken' distortion of this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model addressing Gaussian blur, color distortions, noise types, brightness variations, and more.",
      "CLIPIQA: An NR-IQA method leveraging CLIP embeddings for semantic fidelity and perceptual degradation.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images, frequently used in blind IQA pipelines.",
      "NIMA: Predicts aesthetic quality using probability distributions derived from human ratings."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model addressing Gaussian blur, color distortions, noise types, brightness variations, and more.",
    "image_path": [
      "waterloo_select/sd/01521_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/VOC2012__2008_008632.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'compression' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down model focused on perceptually important distortion regions. Best for classical distortions including JPEG/JPEG2000 compression.",
      "FSIM: Assesses quality by comparing low-level features, effective for JPEG and JPEG2000 compression.",
      "WaDIQaM_FR: A Siamese-network-based framework applying weighted average pooling for quality estimation.",
      "LPIPS: Evaluates perceptual similarity using deep network features, handling brightness and color shifts."
    ],
    "correct_answer": "TOPIQ_FR: A top-down model focused on perceptually important distortion regions. Best for classical distortions including JPEG/JPEG2000 compression.",
    "image_path": [
      "waterloo_select/ref_md/03638.bmp",
      "waterloo_select/md/03638_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "FSIM: This is a Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. This model is among the **top 3** for the following distortions: JPEG compression, Color diffusion, Color quantization, JPEG2000 compression.",
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: Blurs (lens blur, motion blur), Color distortions (color diffusion, color shift, color quantization, color saturation), Compression (JPEG2000 and JPEG), Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact), Brightness change (brighten, darken, mean shift), Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block), Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
      "MS-SSIM: An extension of SSIM that computes multi-scale structural similarity, providing a more comprehensive account of image structure across resolutions.",
      "CKDN: A knowledge-distillation-based FR-IQA model that incorporates degraded reference images to improve robustness under partial-reference conditions."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: Blurs (lens blur, motion blur), Color distortions (color diffusion, color shift, color quantization, color saturation), Compression (JPEG2000 and JPEG), Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact), Brightness change (brighten, darken, mean shift), Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block), Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
    "image_path": [
      "waterloo_select/ref_md/04539.bmp",
      "waterloo_select/md/04539_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/02030.bmp",
      "waterloo_select/md/02030_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations.' tool appropriate for evaluating 'JPEG compression' distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/game_2854.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Brightness variation' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/00022.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most appropriate for evaluating the blur in the background of this image?",
    "candidates": [
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions."
    ],
    "correct_answer": "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
    "image_path": [
      "waterloo_select/micbenc_select/145.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Does the 'UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.' tool address spatial distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/10007357496.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool would be least relevant for evaluating JPEG compression artifacts in the image?",
    "candidates": [
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations, effective for JPEG compression.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation.",
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios."
    ],
    "correct_answer": "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios.",
    "image_path": [
      "waterloo_select/micbenc_select/259.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'oversharpen' distortion of this image?",
    "candidates": [
      "DISTS: A structural-texture hybrid similarity model that excels in handling Gaussian blur.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images.",
      "SSIM: A foundational FR-IQA model based on luminance, contrast, and structure comparisons.",
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments, handling distortions like oversharpen effectively."
    ],
    "correct_answer": "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments, handling distortions like oversharpen effectively.",
    "image_path": [
      "waterloo_select/ref_md/03724.bmp",
      "waterloo_select/md/03724_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that utilizes high-level semantic information to focus on semantically important distortion regions, excels with pixelate and spatial distortions.",
      "PieAPP: A pairwise preference-based FR-IQA model that learns perceptual differences directly from human annotations.",
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that fuses predictions from reference and distorted images for quality estimation.",
      "MS-SSIM: An extension of SSIM that computes multi-scale structural similarity, providing a more comprehensive account of image structure."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that utilizes high-level semantic information to focus on semantically important distortion regions, excels with pixelate and spatial distortions.",
    "image_path": [
      "waterloo_select/ref_md/03896.bmp",
      "waterloo_select/md/03896_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most applicable for dealing with 'color quantization' in an image?",
    "candidates": [
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios, offering robust perceptual quality prediction via hierarchical feature fusion.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation, facilitating robust prediction without reference supervision.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation."
    ],
    "correct_answer": "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
    "image_path": [
      "waterloo_select/sd/01164_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.' tool relevant for evaluating the technical quality of the color distortion in the image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/game_0498.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "GMSD: Measures image quality by capturing local gradient magnitude deviations. Particularly effective in detecting visually important structural distortions.",
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that applies weighted average pooling to fuse predictions from reference and distorted images for quality estimation.",
      "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is *best* at evaluating:- Blurs (lens blur, motion blur)- Color distortions (color diffusion, color shift, color quantization, color saturation)- Compression (JPEG2000 and JPEG)- Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact)- Brightness change (brighten, darken, mean shift)- Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block)- Sharpness and contrastand brightness changes. Performs best on common distortions in classical image quality benchmarks.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression."
    ],
    "correct_answer": "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is *best* at evaluating:- Blurs (lens blur, motion blur)- Color distortions (color diffusion, color shift, color quantization, color saturation)- Compression (JPEG2000 and JPEG)- Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact)- Brightness change (brighten, darken, mean shift)- Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block)- Sharpness and contrastand brightness changes. Performs best on common distortions in classical image quality benchmarks.",
    "image_path": [
      "waterloo_select/ref_md/01966.bmp",
      "waterloo_select/md/01966_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.' a suitable tool for evaluating the semantic fidelity impacted by the distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/000000448537_elastic_transform_4.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for evaluating color distortions in this image?",
    "candidates": [
      "QAlign: Capable of addressing color distortions such as shift, quantization, and saturation.",
      "NIQE: Opinion-unaware metric based on natural image regularities.",
      "BRISQUE: Captures spatial statistical deviations in natural images.",
      "NIMA: Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "QAlign: Capable of addressing color distortions such as shift, quantization, and saturation.",
    "image_path": [
      "waterloo_select/micbenc_select/1426c2d6f31a115ccb8cfbeb2682636.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'pixelate' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/03245.bmp",
      "waterloo_select/md/03245_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'oversharpen' distortion of this image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. Effective across classical benchmarks.",
      "LIQE: A multitask-learning-based NR-IQA model that demonstrates strong performance on color diffusion and related distortions.",
      "UNIQIE: An uncertainty-aware NR-IQA model designed for spatial distortions such as non-eccentricity patches and pixelation.",
      "ARNIQA: A self-supervised NR-IQA model facilitating robust prediction on various distortions without reference supervision."
    ],
    "correct_answer": "LIQE: A multitask-learning-based NR-IQA model that demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/sd/04272_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/01564.bmp",
      "waterloo_select/md/01564_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool appropriate for assessing the visibility issues caused by low light conditions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/14895e4dbeffd911034871ea320e0bd.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "QAlign: This is an NR-IQA model based on multimodal large language models, capable of addressing Gaussian blur, motion blur, color distortions, noise types, brightness variations, spatial distortions, and sharpness without needing reference images.",
      "CLIPIQA: An NR-IQA method leveraging CLIP embeddings to measure semantic fidelity and perceptual degradation, suitable for content-aware quality estimation.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization.",
      "BRISQUE: An NSS-based NR-IQA model capturing spatial statistical deviations in natural images, serving as a strong baseline across classical benchmarks."
    ],
    "correct_answer": "QAlign: This is an NR-IQA model based on multimodal large language models, capable of addressing Gaussian blur, motion blur, color distortions, noise types, brightness variations, spatial distortions, and sharpness without needing reference images.",
    "image_path": [
      "waterloo_select/sd/01864_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/09345.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that utilizes high-level semantic information to enhance assessment for classical distortions.",
      "LPIPS: A deep feature-based FR-IQA metric aligned with human visual judgments.",
      "FSIM: A Feature Similarity Index assessing low-level feature similarity.",
      "PieAPP: A perceptual metric quantifying differences through pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that utilizes high-level semantic information to enhance assessment for classical distortions.",
    "image_path": [
      "waterloo_select/ref_md/00072.bmp",
      "waterloo_select/md/00072_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.' tool suitable for handling color shift distortions in images?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/sd/01449_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most appropriate for evaluating JPEG compression effects in this image?",
    "candidates": [
      "QAlign: Capable of addressing various distortions, including compression.",
      "HyperIQA: Decouples IQA into content understanding, flexible across contexts.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "MANIQA: Combines visual transformers with attention mechanisms for complex artifacts."
    ],
    "correct_answer": "QAlign: Capable of addressing various distortions, including compression.",
    "image_path": [
      "waterloo_select/micbenc_select/10458591.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'darken' distortion of this image?",
    "candidates": [
      "PieAPP: Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments.",
      "LPIPS: The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment.",
      "TOPIQ_FR: A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy.",
      "GMSD: Measures image quality by capturing local gradient magnitude deviations. Particularly effective in detecting visually important structural distortions."
    ],
    "correct_answer": "TOPIQ_FR: A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy.",
    "image_path": [
      "waterloo_select/ref_md/04230.bmp",
      "waterloo_select/md/04230_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'darken' distortion of this image?",
    "candidates": [
      "FSIM: This is a Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception.",
      "DISTS: This is a Full-reference IQA model. A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations.",
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. Performs best on common distortions in classical image quality benchmarks.",
      "GMSD: This is a Full-reference IQA model. Measures image quality by capturing local gradient magnitude deviations, effective in detecting visually important structural distortions."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. Performs best on common distortions in classical image quality benchmarks.",
    "image_path": [
      "waterloo_select/ref_md/02320.bmp",
      "waterloo_select/md/02320_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/06819.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Color distortion' found in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/game_0498.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool would be most suitable to evaluate the perceptual quality considering potential 'color distortions' in this image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation, facilitating robust prediction without reference supervision."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/23.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'quantization' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/02827.bmp",
      "waterloo_select/md/02827_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios, offering robust perceptual quality prediction via hierarchical feature fusion.' a relevant tool for evaluating the visual distortion in the image due to lighting conditions?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/00022.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/02120_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios, offering robust perceptual quality prediction via hierarchical feature fusion.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/0e731e59345538173144691f0fe.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'blur' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "DISTS: A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. It performs particularly well for Gaussian blur.",
      "FSIM: A widely used FR-IQA model based on low-level feature similarity, such as phase congruency and gradient magnitude. It is particularly effective for JPEG compression, color quantization, color diffusion, and JPEG2000 compression.",
      "GMSD: Measures image quality by capturing local gradient magnitude deviations. Particularly effective in detecting visually important structural distortions."
    ],
    "correct_answer": "DISTS: A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. It performs particularly well for Gaussian blur.",
    "image_path": [
      "waterloo_select/ref_md/01299.bmp",
      "waterloo_select/md/01299_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Color distortion' of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/139db25841cde7105bb5949b4341f9.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited to evaluate blurriness in this image?",
    "candidates": [
      "BRISQUE: Captures spatial statistical deviations in natural images. Strong baseline across classical benchmarks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "HyperIQA: A self-adaptive architecture for flexible generalization across diverse image contexts.",
      "UNIQIE: Effective for lens blur, JPEG compression, and denoise artifacts."
    ],
    "correct_answer": "UNIQIE: Effective for lens blur, JPEG compression, and denoise artifacts.",
    "image_path": [
      "waterloo_select/micbenc_select/138891c382ad2f1a885e234981597c72.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool focuses on semantic fidelity and perceptual degradation, making it less effective specifically for distortions like motion blur?",
    "candidates": [
      "CLIPIQA: Leverages CLIP embeddings for semantic fidelity.",
      "BRISQUE: Captures spatial statistical deviations in natural images.",
      "MUSIQ: Processes images at native resolutions for perceptual quality prediction.",
      "UNIQIE: Uncertainty-aware and effective for JPEG compression, lens blur."
    ],
    "correct_answer": "UNIQIE: Uncertainty-aware and effective for JPEG compression, lens blur.",
    "image_path": [
      "waterloo_select/micbenc_select/06956.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the model 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' suitable for evaluating potential spatial distortions in the image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/game_2850.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' applicable for assessing color distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/08858.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'compression' distortion of this image?",
    "candidates": [
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. Highly effective for JPEG and JPEG2000 compression.",
      "FSIM: A Feature Similarity Index that assesses image quality by comparing low-level features such as phase congruency and gradient magnitude. Effective for JPEG and JPEG2000.",
      "PSNR: A classical pixel-wise metric computing the logarithmic ratio of peak signal power to distortion noise. Used in compression tasks.",
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments. Excellent for compression."
    ],
    "correct_answer": "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. Highly effective for JPEG and JPEG2000 compression.",
    "image_path": [
      "waterloo_select/ref_md/01760.bmp",
      "waterloo_select/md/01760_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool effective for evaluating spatial distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/10536668436.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.' tool effective for analyzing the color contrast issue in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/00107.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/02358_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'quantization' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/04636.bmp",
      "waterloo_select/md/04636_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Color distortion' of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/bald-634460_10_01.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.' tool effective for evaluating contrast-related distortions in an image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/sd/03901_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.' tool relevant for evaluating technical quality distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/sd/04629_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most appropriate to evaluate spatial distortions such as non-eccentricity patches in the image?",
    "candidates": [
      "UNIQIE: An NR-IQA model effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "MUSIQ: A multi-scale transformer with robust perceptual quality prediction via hierarchical feature fusion.",
      "DBCNN: A model that extracts local-global representations for no-reference quality prediction.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images."
    ],
    "correct_answer": "UNIQIE: An NR-IQA model effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/micbenc_select/movie_2837.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most effective for evaluating the image quality considering the presence of 'color distortions'?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios, offering robust perceptual quality prediction via hierarchical feature fusion."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/JPEGImages__2012_002310.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.' tool suitable for the evaluation of synthetic color distortions such as 'brighten'?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/sd/04439_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'darken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/04671_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'noise' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating noise variations.",
      "DISTS: A structural-texture hybrid similarity model effective for structural degradations.",
      "VIF: Evaluates image quality based on visual information retention, excellent for JPEG compression.",
      "LPIPS: A deep feature-based metric, excelling in perceptual similarity evaluations."
    ],
    "correct_answer": "TOPIQ_FR: A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating noise variations.",
    "image_path": [
      "waterloo_select/ref_md/01224.bmp",
      "waterloo_select/md/01224_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/00744.bmp",
      "waterloo_select/md/00744_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/game_2850.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which of the following NR-IQA tools is most suitable for assessing color saturation issues?",
    "candidates": [
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction.",
      "LIQE: A multitask-learning-based blind IQA model that demonstrates strong performance on color diffusion and related distortions.",
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/04603.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/sd/03082_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited to evaluate color diffusion in this image?",
    "candidates": [
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/movie_0843.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is most suitable for assessing the 'Blur' and 'Spatial distortion' issues of this image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/10534454676.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/01385.bmp",
      "waterloo_select/md/01385_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'darken' distortion of this image?",
    "candidates": [
      "LPIPS: Evaluates perceptual similarity using deep network features, capturing differences aligned with human judgment, effective for brightness changes.",
      "PSNR: Computes the ratio of peak signal power to distortion noise, prevalent in compression and restoration tasks.",
      "VIF: Evaluates quality based on visual information retention, ranked highly for JPEG compression.",
      "WaDIQaM_FR: Uses Siamese networks for quality estimation through weighted average pooling."
    ],
    "correct_answer": "LPIPS: Evaluates perceptual similarity using deep network features, capturing differences aligned with human judgment, effective for brightness changes.",
    "image_path": [
      "waterloo_select/ref_md/01592.bmp",
      "waterloo_select/md/01592_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool appropriate for evaluating lighting-related distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/00390.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best for evaluating local-global representations for no-reference quality prediction?",
    "candidates": [
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation."
    ],
    "correct_answer": "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
    "image_path": [
      "waterloo_select/micbenc_select/2622466873.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: lens blur, motion blur, color diffusion, color shift, color quantization, JPEG2000 and JPEG compression, various noise types, brightness changes, spatial distortions, sharpness and contrast.",
      "LPIPS: This is a Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the top 3 for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift.",
      "GMSD: This is a Full-reference IQA model. Measures image quality by capturing local gradient magnitude deviations. Particularly effective in detecting visually important structural distortions.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: lens blur, motion blur, color diffusion, color shift, color quantization, JPEG2000 and JPEG compression, various noise types, brightness changes, spatial distortions, sharpness and contrast.",
    "image_path": [
      "waterloo_select/ref_md/01706.bmp",
      "waterloo_select/md/01706_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'contrast_strengthen' distortion of this image?",
    "candidates": [
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation, facilitating robust prediction without reference supervision.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/sd/00606_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "LPIPS: This is a Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the top 3 for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
      "FSIM: This is a Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. This model is among the top 3 for the following distortions: JPEG compression, Color diffusion, Color quantization, JPEG2000 compression.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
    "image_path": [
      "waterloo_select/ref_md/03685.bmp",
      "waterloo_select/md/03685_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'saturate_strengthen' distortion of this image?",
    "candidates": [
      "PieAPP: Pairwise preference model, learning perceptual differences from human annotations.",
      "SSIM: Focuses on luminance, contrast, and structure comparisons for tasks like denoising, deblurring.",
      "LPIPS: Perceptual similarity metric aligning with human judgments, useful for brightness and color variations like saturation.",
      "MS-SSIM: Extends SSIM to a multi-scale framework but not detailed for saturation distortions."
    ],
    "correct_answer": "LPIPS: Perceptual similarity metric aligning with human judgments, useful for brightness and color variations like saturation.",
    "image_path": [
      "waterloo_select/ref_md/02924.bmp",
      "waterloo_select/md/02924_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/03369_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "MANIQA: Combines visual transformers with quality-aware attention mechanisms to evaluate GAN-generated distortions and other complex artifacts."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/00083_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for evaluating spatial distortions such as those seen at the edges of this image?",
    "candidates": [
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios, offering robust perceptual quality prediction via hierarchical feature fusion.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/micbenc_select/08412.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/sd/01436_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'noise' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "DISTS: This is a structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. It performs particularly well for Gaussian blur.",
      "FSIM: A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. It performs best for JPEG compression, color quantization, color diffusion, and JPEG2000 compression.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
    "image_path": [
      "waterloo_select/ref_md/00196.bmp",
      "waterloo_select/md/00196_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Texture' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/13aa95fb4ddeedefdded4388cb0c774.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'noise' distortion of this image?",
    "candidates": [
      "GMSD: Measures image quality by capturing local gradient magnitude deviations. Particularly effective in detecting visually important structural distortions.",
      "SSIM: A foundational FR-IQA model based on luminance, contrast, and structure comparisons. Widely used in denoising, deblurring, and super-resolution evaluations.",
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "CKDN: A knowledge-distillation-based FR-IQA model that incorporates degraded reference images to improve robustness under partial-reference conditions."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
    "image_path": [
      "waterloo_select/ref_md/01615.bmp",
      "waterloo_select/md/01615_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'quantization' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/00035.bmp",
      "waterloo_select/md/00035_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/4535431418.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited to evaluate perceptual degradation due to haziness or faded appearance in distant objects in the image?",
    "candidates": [
      "BRISQUE: Captures spatial statistical deviations in natural images.",
      "DBCNN: Bilinear CNN architecture fusing local-global representations.",
      "TReS: Transformer-based model capturing global and local perceptual features.",
      "NIMA: Predicts aesthetic and technical quality using human ratings."
    ],
    "correct_answer": "TReS: Transformer-based model capturing global and local perceptual features.",
    "image_path": [
      "waterloo_select/micbenc_select/483.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'compression' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/sd/00238_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/10536668436.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'saturate_strengthen' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/04301.bmp",
      "waterloo_select/md/04301_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is most suitable for assessing the motion blur in this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/10c4f2896f26dda410fb926e54b11fb3.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/AttnGAN_normal_113.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'noise' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/03654.bmp",
      "waterloo_select/md/03654_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for assessing the spatial distortion present in the image?",
    "candidates": [
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "WaDIQaM_NR: Applies a deep neural network with weighted average pooling to perform NR-IQA by aggregating spatially varying local quality scores.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation, facilitating robust prediction without reference supervision."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/micbenc_select/11310848503.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best for evaluating color distortions in the image?",
    "candidates": [
      "UNIQIE: Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "CLIPIQA: Suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
      "LIQE: Demonstrates strong performance on color diffusion and related distortions.",
      "NIQE: Frequent use in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "LIQE: Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/3197293330.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.' suitable for evaluating 'Brightness Variation' in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/1090979049.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which IQA tool would be best for evaluating potential color distortions in this image?",
    "candidates": [
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/10244314384.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions, multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions, multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/02258_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations.' tool suitable for addressing JPEG compression in the image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/04487.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool would best address the difficulty in determining color distortions within this image?",
    "candidates": [
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation.",
      "LIQE: A multitask-learning-based blind IQA model showing strong performance on color diffusion and related distortions.",
      "TReS: A transformer-based blind IQA model that incorporates relative ranking and consistency learning.",
      "WaDIQaM_NR: Applies a deep neural network with weighted average pooling to perform NR-IQA."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model showing strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/00361.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.' suitable for evaluating complex aesthetic quality issues like color saturation in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/I81_03_01.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'blur' distortion of this image?",
    "candidates": [
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "TReS: A transformer-based blind IQA model that incorporates relative ranking and consistency learning to capture both global and local perceptual features."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/03646_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most appropriate for evaluating potential 'brightness variations' in the image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/7268649826.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00250_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'noise' distortion of this image?",
    "candidates": [
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that applies weighted average pooling to fuse predictions from reference and distorted images for quality estimation.",
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is *best* at evaluating:\n- Blurs (lens blur, motion blur)\n- Color distortions (color diffusion, color shift, color quantization, color saturation)\n- Compression (JPEG2000 and JPEG)\n- Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact)\n- Brightness change (brighten, darken, mean shift)\n- Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block)\n- Sharpness and contrastand brightness changes. Performs best on common distortions in classical image quality benchmarks.",
      "DISTS: A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. It performs particularly well for Gaussian blur.",
      "GMSD: Measures image quality by capturing local gradient magnitude deviations. Particularly effective in detecting visually important structural distortions."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is *best* at evaluating:\n- Blurs (lens blur, motion blur)\n- Color distortions (color diffusion, color shift, color quantization, color saturation)\n- Compression (JPEG2000 and JPEG)\n- Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact)\n- Brightness change (brighten, darken, mean shift)\n- Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block)\n- Sharpness and contrastand brightness changes. Performs best on common distortions in classical image quality benchmarks.",
    "image_path": [
      "waterloo_select/ref_md/01589.bmp",
      "waterloo_select/md/01589_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'compression' distortion of this image?",
    "candidates": [
      "UNIQIE: Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions.",
      "LIQE: Demonstrates strong performance on color diffusion and related distortions.",
      "NIMA: Used for aesthetic and technical quality assessment.",
      "HyperIQA: Enables flexible generalization across diverse image contexts."
    ],
    "correct_answer": "UNIQIE: Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions.",
    "image_path": [
      "waterloo_select/sd/01216_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "WaDIQaM_FR: This is a Full-reference IQA framework. A Siamese-network system employs weighted average pooling to merge predictions from reference and distorted images for enhanced quality estimation.",
      "PSNR: A Full-reference IQA model. A classical pixel-wise metric that computes the logarithmic ratio of peak signal power to distortion noise. Still prevalent in compression and restoration tasks.",
      "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
      "SSIM: This is a Full-reference IQA model. The Structural Similarity Index evaluates image quality by comparing changes in structural information, luminance, and contrast between images. It serves as a robust baseline in deblurring, denoising, and super-resolution contexts."
    ],
    "correct_answer": "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
    "image_path": [
      "waterloo_select/ref_md/03273.bmp",
      "waterloo_select/md/03273_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is likely less effective for evaluating distortions in this image caused by lighting conditions?",
    "candidates": [
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "LIQE: A multitask-learning-based blind IQA model for color diffusion and related distortions.",
      "WaDIQaM_NR: Uses spatially varying local quality scores for NR-IQA.",
      "CLIPIQA: Leverages CLIP embeddings for semantic fidelity and perceptual degradation."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model for color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/126.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Distortion around edges' of the butterfly in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/I31_01_01.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the optical distortion in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/486.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited to evaluate motion blur in the image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions.",
      "WaDIQaM_NR: Applies a deep neural network with weighted average pooling to perform NR-IQA by aggregating spatially varying local quality scores.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/00340.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/01164_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/00907_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'compression' distortion of this image?",
    "candidates": [
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/sd/04180_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool below demonstrates efficacy in addressing 'spatial distortions' observed in the image?",
    "candidates": [
      "UNIQIE: An uncertainty-aware NR-IQA model effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings, widely used for aesthetic assessment tasks.",
      "BRISQUE: A pioneering NSS-based NR-IQA model capturing spatial statistical deviations in natural images, a strong baseline across classical benchmarks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images, frequently used in blind IQA pipelines."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/micbenc_select/market_blur1_jpeg3.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool would be most appropriate for evaluating color distortions in this image?",
    "candidates": [
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/9283219931.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is least appropriate for evaluating color diffusion distortion in this image?",
    "candidates": [
      "WaDIQaM_NR: Applies a deep neural network with weighted average pooling to perform NR-IQA by aggregating spatially varying local quality scores.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/sd/03369_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/1348aeaa1744112e11a274342ee35b63.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Low light' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/1071b071cd68ae974c7166895f8ecd.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/00390.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'saturate_strengthen' distortion of this image?",
    "candidates": [
      "VIF: This is a Full-reference IQA model. Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression.",
      "AHIQ: This is a Full-reference IQA model. An attention-guided FR-IQA model tailored to assess distortions commonly introduced by generative models. It integrates hybrid mechanisms to improve robustness under complex generation artifacts.",
      "DISTS: This is a Full-reference IQA model. A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations.",
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: blurs, color distortions, compression, noise, brightness change, spatial distortions, sharpness, and contrast."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: blurs, color distortions, compression, noise, brightness change, spatial distortions, sharpness, and contrast.",
    "image_path": [
      "waterloo_select/ref_md/01281.bmp",
      "waterloo_select/md/01281_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'darken' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/03925.bmp",
      "waterloo_select/md/03925_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/01026.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'compression' distortion of this image?",
    "candidates": [
      "FSIM: A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. This model is among the top 3 for the following distortions: JPEG compression, Color diffusion, Color quantization, JPEG2000 compression.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression.",
      "MS-SSIM: An extension of SSIM that computes multi-scale structural similarity, providing a more comprehensive account of image structure across resolutions.",
      "LPIPS: The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment."
    ],
    "correct_answer": "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression.",
    "image_path": [
      "waterloo_select/ref_md/04230.bmp",
      "waterloo_select/md/04230_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'contrast_strengthen' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/02825.bmp",
      "waterloo_select/md/02825_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'MANIQA: combining visual transformers with quality-aware attention mechanisms' effective in evaluating the 'blur' present in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/AttnGAN_normal_113.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which of the following No-Reference IQA tools is most appropriate for assessing color distortions such as quantization or saturation observed in the image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation, facilitating robust prediction without reference supervision.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/075ab40d1e4f1eee25ef3e83d6a7b9.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'oversharpen' distortion of this image?",
    "candidates": [
      "SSIM: A foundational FR-IQA model based on luminance, contrast, and structure comparisons. Widely used in denoising, deblurring, and super-resolution evaluations.",
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression.",
      "DISTS: A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. It performs particularly well for Gaussian blur."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
    "image_path": [
      "waterloo_select/ref_md/00732.bmp",
      "waterloo_select/md/00732_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/sd1.5_highcorr_133.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for evaluating color distortions present in the image?",
    "candidates": [
      "UNIQIE: Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "QAlign: Capable of addressing color distortions (shift, quantization, saturation) without requiring reference images.",
      "TReS: Captures both global and local perceptual features using transformer-based blind IQA.",
      "MUSIQ: Processes images at native resolutions with multi-scale transformer architecture."
    ],
    "correct_answer": "QAlign: Capable of addressing color distortions (shift, quantization, saturation) without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/glide_normal_022.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool would be most appropriate for evaluating the perceptual quality focusing on spatial distortions in this image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "MANIQA: Combines visual transformers with quality-aware attention mechanisms to evaluate GAN-generated distortions and other complex artifacts."
    ],
    "correct_answer": "MANIQA: Combines visual transformers with quality-aware attention mechanisms to evaluate GAN-generated distortions and other complex artifacts.",
    "image_path": [
      "waterloo_select/micbenc_select/00128.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the lighting condition of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/02d9c7f28046f06ba56fa6f935618.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'brighten' distortion of this image?",
    "candidates": [
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that applies weighted average pooling to fuse predictions from reference and distorted images for quality estimation.",
      "VIF: Evaluates quality based on the amount of visual information retained. Highly effective for compression artifacts.",
      "LPIPS: The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity using deep network features, effectively handling brighten distortions.",
      "FSIM: Assesses image quality by comparing low-level features such as phase congruency and gradient magnitude."
    ],
    "correct_answer": "LPIPS: The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity using deep network features, effectively handling brighten distortions.",
    "image_path": [
      "waterloo_select/ref_md/02631.bmp",
      "waterloo_select/md/02631_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'quantization' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/04662.bmp",
      "waterloo_select/md/04662_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA method is particularly designed for estimating quality under both synthetic and real-world degradations, including blur?",
    "candidates": [
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. Suitable for content-aware quality estimation.",
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/01388.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Texture Overlay' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/000000098283_frost_3.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most suitable for assessing noise and color distortions present in the image?",
    "candidates": [
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.",
      "HyperIQA: A self-adaptive architecture that enables flexible generalization across diverse image contexts.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions, multiple noise types, brightness variations, spatial distortions, and sharpness."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions, multiple noise types, brightness variations, spatial distortions, and sharpness.",
    "image_path": [
      "waterloo_select/micbenc_select/95.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is most suitable for evaluating motion blur in this image based on its capabilities?",
    "candidates": [
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. Suitable for content-aware quality estimation.",
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations.",
      "WaDIQaM_NR: Applies a deep neural network with weighted average pooling to perform NR-IQA by aggregating spatially varying local quality scores.",
      "QAlign: A state-of-the-art NR-IQA model addressing Gaussian blur and motion blur, among other distortions."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model addressing Gaussian blur and motion blur, among other distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/AVA__303546.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'compression' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/01742.bmp",
      "waterloo_select/md/01742_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.' effective for evaluating contrast and brightness issues in images?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/4535431418.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'oversharpen' distortion of this image?",
    "candidates": [
      "GMSD: Measures image quality by capturing local gradient magnitude deviations, particularly effective in detecting visually important structural distortions.",
      "SSIM: A foundational FR-IQA model based on luminance, contrast, and structure comparisons. Widely used in denoising, deblurring, and super-resolution evaluations.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. Ranks highly for JPEG and JPEG2000 compression.",
      "LPIPS: A deep feature-based FR-IQA metric computing perceptual similarity aligned with human visual judgments, particularly effective for brightness changes, jitter, contrast variations, and color shifts."
    ],
    "correct_answer": "GMSD: Measures image quality by capturing local gradient magnitude deviations, particularly effective in detecting visually important structural distortions.",
    "image_path": [
      "waterloo_select/ref_md/02917.bmp",
      "waterloo_select/md/02917_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA model is effective in evaluating spatial distortions that may be present in this image?",
    "candidates": [
      "WaDIQaM_NR: Applies deep neural networks with weighted average pooling for blind IQA.",
      "UNIQIE: An uncertainty-aware model effective for JPEG compression and spatial distortions.",
      "LIQE: Leverages multitask learning for distortion awareness, excelling in color diffusion distortions.",
      "ARNIQA: A self-supervised NR-IQA model learning a distortion manifold for quality representation."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware model effective for JPEG compression and spatial distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/15ddd98856b48de7d181ccefff41d56.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'saturate_strengthen' distortion of this image?",
    "candidates": [
      "MS-SSIM: This is Full-reference IQA model. An extension of SSIM that computes multi-scale structural similarity, providing a more comprehensive account of image structure across resolutions.",
      "DISTS: This is Full-reference IQA model. A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. It performs particularly well for Gaussian blur.",
      "VIF: This is Full-reference IQA model. Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression.",
      "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation."
    ],
    "correct_answer": "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the **top 3** for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
    "image_path": [
      "waterloo_select/ref_md/02433.bmp",
      "waterloo_select/md/02433_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'quantization' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/01340_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/I13_17_01.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "LPIPS: This FR-IQA model evaluates perceptual similarity using deep network features, capturing differences aligned with human judgment. Best for brightness and contrast changes.",
      "DISTS: A structural-texture hybrid model effective at balancing structural degradations and tolerating textural variations.",
      "WaDIQaM_FR: A Siamese-network-based FR-IQA framework that fuses predictions for quality estimation.",
      "GMSD: Measures quality by capturing local gradient magnitude deviations, effective for structural distortions."
    ],
    "correct_answer": "LPIPS: This FR-IQA model evaluates perceptual similarity using deep network features, capturing differences aligned with human judgment. Best for brightness and contrast changes.",
    "image_path": [
      "waterloo_select/ref_md/01950.bmp",
      "waterloo_select/md/01950_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Motion Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/1090979049.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA model focuses on semantic fidelity and perceptual degradation?",
    "candidates": [
      "QAlign: Uses MLLMs for diverse distortions.",
      "CLIPIQA: Leverages CLIP embeddings for semantic fidelity.",
      "DBCNN: Uses bilinear CNN for local-global representations.",
      "LIQE: Enhances distortion awareness through multitask learning."
    ],
    "correct_answer": "CLIPIQA: Leverages CLIP embeddings for semantic fidelity.",
    "image_path": [
      "waterloo_select/micbenc_select/movie_0625.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'brighten' distortion of this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "WaDIQaM_NR: Applies a deep neural network with weighted average pooling to perform NR-IQA by aggregating spatially varying local quality scores."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/00813_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: - Blurs (lens blur, motion blur) - Color distortions (color diffusion, color shift, color quantization, color saturation) - Compression (JPEG2000 and JPEG) - Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact) - Brightness change (brighten, darken, mean shift) - Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block) - Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
      "LPIPS: This is Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the top 3 for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
      "FSIM: This is Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. This model is among the **top 3 for the following distortions: JPEG compression, Color diffusion, Color quantization, JPEG2000 compression.",
      "PieAPP: This is Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: This is Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: - Blurs (lens blur, motion blur) - Color distortions (color diffusion, color shift, color quantization, color saturation) - Compression (JPEG2000 and JPEG) - Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact) - Brightness change (brighten, darken, mean shift) - Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block) - Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
    "image_path": [
      "waterloo_select/ref_md/02789.bmp",
      "waterloo_select/md/02789_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'quantization' distortion of this image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "MANIQA: Combines visual transformers with quality-aware attention mechanisms to evaluate GAN-generated distortions and other complex artifacts.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/sd/03828_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'quantization' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/03138_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'oversharpen' distortion of this image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "MANIQA: Combines visual transformers with quality-aware attention mechanisms to evaluate GAN-generated distortions and other complex artifacts.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "MANIQA: Combines visual transformers with quality-aware attention mechanisms to evaluate GAN-generated distortions and other complex artifacts.",
    "image_path": [
      "waterloo_select/sd/04546_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/10442628686.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'contrast_strengthen' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/01208_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.' tool optimal for assessing blur in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/10641.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is best suited for evaluating the Gaussian blur in an image without reference?",
    "candidates": [
      "BRISQUE: A NSS-based NR-IQA model capturing spatial statistical deviations, serving as a strong baseline for classical benchmarks.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks for enhanced distortion awareness.",
      "TReS: A transformer-based blind IQA model capturing both global and local perceptual features.",
      "DBCNN: Employs a bilinear CNN architecture to fuse local-global representations for no-reference quality prediction."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks for enhanced distortion awareness.",
    "image_path": [
      "waterloo_select/micbenc_select/01604.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'MANIQA: Combines visual transformers with quality-aware attention mechanisms to evaluate GAN-generated distortions and other complex artifacts.' tool suitable for evaluating distortions in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/0b97a8fe8b5439b6d1acf36c02b1992.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is most appropriate for evaluating JPEG compression in images?",
    "candidates": [
      "UNIQIE: Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "LIQE: Demonstrates strong performance on color diffusion and related distortions.",
      "CLIPIQA: Measures semantic fidelity and perceptual degradation but lacks top-tier performance on benchmark distortions."
    ],
    "correct_answer": "UNIQIE: Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.",
    "image_path": [
      "waterloo_select/micbenc_select/EMOTIC__COCO_train2014_000000499376.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'compression' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/sd/01433_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "What tool is likely most effective at evaluating color diffusion and related distortions in the image?",
    "candidates": [
      "MANIQA: Uses visual transformers with quality-aware attention mechanisms for complex artifacts.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "LIQE: Multitask-learning-based model that excels in color diffusion and related distortions.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions from human ratings."
    ],
    "correct_answer": "LIQE: Multitask-learning-based model that excels in color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/11010062705.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.' tool good at evaluating motion blur in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/224.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: A top-down approach using high-level semantic information to focus on semantically important local distortion regions. Best on spatial distortions like pixelate.",
      "VIF: Evaluates quality based on the amount of visual information retained, ranking highly for compression tasks like JPEG.",
      "GMSD: Measures image quality by capturing local gradient magnitude deviations, effective for structural distortions.",
      "AHIQ: An attention-guided FR-IQA model tailored to assess distortions introduced by generative models."
    ],
    "correct_answer": "TOPIQ_FR: A top-down approach using high-level semantic information to focus on semantically important local distortion regions. Best on spatial distortions like pixelate.",
    "image_path": [
      "waterloo_select/ref_md/01073.bmp",
      "waterloo_select/md/01073_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'saturate_weaken' distortion of this image?",
    "candidates": [
      "QAlign: Capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness.",
      "CLIPIQA: Leverages CLIP embeddings to measure semantic fidelity and perceptual degradation, suitable for content-aware quality estimation.",
      "BRISQUE: Captures spatial statistical deviations in natural images, serving as a strong baseline across classical benchmarks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images."
    ],
    "correct_answer": "QAlign: Capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness.",
    "image_path": [
      "waterloo_select/sd/04167_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Can the 'CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.' tool be applied effectively to address motion blur in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/09942.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for assessing both semantic fidelity and perceptual degradation in this image with potential focus issues?",
    "candidates": [
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions."
    ],
    "correct_answer": "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
    "image_path": [
      "waterloo_select/micbenc_select/14bc46df20d85441a446141a9266e73f.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.' tool effective for evaluating the image with low light conditions?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/1071b071cd68ae974c7166895f8ecd.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is best suited for assessing semantic fidelity and perceptual degradation due to blurred features in the image?",
    "candidates": [
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature."
    ],
    "correct_answer": "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/259.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'saturate_strengthen' distortion of this image?",
    "candidates": [
      "GMSD: Focuses on local gradient magnitude deviations for detecting structural distortions.",
      "LPIPS: Utilizes deep features for perceptual similarity, effective with color saturation changes.",
      "SSIM: Based on luminance, contrast, and structure comparisons, widely used in various evaluations.",
      "PieAPP: Learns perceptual differences directly from human annotations for subjective quality judgments."
    ],
    "correct_answer": "LPIPS: Utilizes deep features for perceptual similarity, effective with color saturation changes.",
    "image_path": [
      "waterloo_select/ref_md/03638.bmp",
      "waterloo_select/md/03638_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "PieAPP: A learning-based FR-IQA metric modeling human pairwise preference judgments.",
      "GMSD: Measures quality by capturing local gradient magnitude deviations, focusing on structural distortions.",
      "MS-SSIM: An extension of SSIM for multi-scale structural similarity, enhancing image structure assessment.",
      "TOPIQ_FR: A top-down FR-IQA model tailored to spatial distortions, including pixelate."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model tailored to spatial distortions, including pixelate.",
    "image_path": [
      "waterloo_select/ref_md/00209.bmp",
      "waterloo_select/md/00209_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best to evaluate motion blur in the image?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation. It is suitable for content-aware quality estimation but lacks top-tier performance on benchmark distortions.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/24.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is suitable for evaluating distortions such as JPEG compression, lens blur, denoise artifacts, and spatial distortions like pixelation in an image?",
    "candidates": [
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations.",
      "ARNIQA: A self-supervised NR-IQA model that learns a distortion manifold for quality representation.",
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations.",
      "LIQE: A multitask-learning-based blind IQA model exploiting auxiliary tasks for distortion awareness."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations.",
    "image_path": [
      "waterloo_select/micbenc_select/6325240412.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool should be used to assess color distortions such as shifts and quantization in the image?",
    "candidates": [
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/484.bmp"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/I80_12_01.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'contrast_strengthen' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/00838.bmp",
      "waterloo_select/md/00838_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'pixelate' distortion of this image?",
    "candidates": [
      "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: Blurs (lens blur, motion blur) - Color distortions (color diffusion, color shift, color quantization, color saturation) - Compression (JPEG2000 and JPEG) - Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact) - Brightness change (brighten, darken, mean shift) - Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block) - Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
      "LPIPS: This is a Full-reference IQA model. The Learned Perceptual Image Patch Similarity metric evaluates perceptual similarity between images using deep network features, capturing differences aligned with human judgment. This model is among the top 3 for the following distortions: Brighten, Darken, Mean shift, Jitter, High sharpen, Contrast change, Color shift, Color saturation.",
      "FSIM: This is a Full-reference IQA model. A Feature Similarity Index that assesses image quality by comparing low-level features, such as phase congruency and gradient magnitude, reflecting the human visual system's perception. This model is among the top 3 for the following distortions: JPEG compression, Color diffusion, Color quantization, JPEG2000 compression.",
      "PieAPP: This is a Full-reference IQA model. Perceptual Image-Error Assessment through Pairwise Preference is a learning-based metric that quantifies perceptual differences between images by modeling human pairwise preference judgments."
    ],
    "correct_answer": "TOPIQ_FR: This is a Full-reference IQA model. A top-down approach that utilizes high-level semantic information to guide the IQA network, focusing on semantically important local distortion regions for improved assessment accuracy. This model is best at evaluating: Blurs (lens blur, motion blur) - Color distortions (color diffusion, color shift, color quantization, color saturation) - Compression (JPEG2000 and JPEG) - Noise (white noise, color component noise, impulse noise, multiplicative noise, denoise artifact) - Brightness change (brighten, darken, mean shift) - Spatial distortions (jitter, non-eccentricity patch, pixelate, otsu quantization, color block) - Sharpness and contrast and brightness changes. Performs best on common distortions in classical image quality benchmarks.",
    "image_path": [
      "waterloo_select/ref_md/04467.bmp",
      "waterloo_select/md/04467_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most appropriate for evaluating color distortion in this image?",
    "candidates": [
      "CLIPIQA: An NR-IQA method leveraging CLIP embeddings to measure semantic fidelity and perceptual degradation.",
      "UNIQIE: A model for quality estimation under synthetic and real-world degradations, effective for JPEG compression and spatial distortions.",
      "HyperIQA: A self-adaptive architecture enabling flexibility across various image contexts.",
      "LIQE: A multitask-learning-based blind IQA model showing strong performance on color diffusion and related distortions."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model showing strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/micbenc_select/149e8a6bfdbdbf41fffaf01c2607964.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most suitable for evaluating motion blur in the given image?",
    "candidates": [
      "DBCNN: A bilinear CNN architecture that extracts and fuses local-global representations for no-reference quality prediction.",
      "HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings."
    ],
    "correct_answer": "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
    "image_path": [
      "waterloo_select/sd/02451_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'darken' distortion of this image?",
    "candidates": [
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments. It excels in handling distortions such as brightness changes, jitter, contrast variations, and color shifts.",
      "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
      "PieAPP: A pairwise preference-based FR-IQA model that learns perceptual differences directly from human annotations. Designed to align with subjective quality judgments.",
      "CKDN: A knowledge-distillation-based FR-IQA model that incorporates degraded reference images to improve robustness under partial-reference conditions."
    ],
    "correct_answer": "TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.",
    "image_path": [
      "waterloo_select/ref_md/01299.bmp",
      "waterloo_select/md/01299_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is best suited for handling the 'contrast_weaken' distortion of this image?",
    "candidates": [
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "NIQE: An opinion-unaware metric based on statistical regularities in natural images. Frequently used in blind IQA pipelines due to its model-free nature.",
      "TReS: A transformer-based blind IQA model that incorporates relative ranking and consistency learning to capture both global and local perceptual features."
    ],
    "correct_answer": "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness. Demonstrates strong performance on color diffusion and related distortions.",
    "image_path": [
      "waterloo_select/sd/02241_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'pixelate' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/01449_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations. Effective for JPEG compression, lens blur, denoise artifacts, and spatial distortions such as non-eccentricity patches and pixelation.' tool effective for JPEG compression artifacts in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/I13_17_01.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.' tool effective for assessing lighting impacts on this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/8471068172.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'quantization' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/sd/03152_0.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'compression' distortion of this image?",
    "candidates": [
      "PSNR: A classical pixel-wise metric that computes the logarithmic ratio of peak signal power to distortion noise. Still prevalent in compression and restoration tasks.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression.",
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments. It excels in handling distortions such as brightness changes, jitter, contrast variations, and color shifts.",
      "SSIM: A foundational FR-IQA model based on luminance, contrast, and structure comparisons. Widely used in denoising, deblurring, and super-resolution evaluations."
    ],
    "correct_answer": "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. It ranks highly for JPEG and JPEG2000 compression.",
    "image_path": [
      "waterloo_select/ref_md/03041.bmp",
      "waterloo_select/md/03041_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'HyperIQA: A self-adaptive architecture that decouples IQA into content understanding, perceptual rule learning, and score prediction, enabling flexible generalization across diverse image contexts' tool effective for assessing motion blur in images?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "No",
    "image_path": [
      "waterloo_select/micbenc_select/10eba82e82686abe792dd84b1c40c37a.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which tool is most appropriate to assess the possible motion blur present in this image?",
    "candidates": [
      "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations.",
      "TReS: A transformer-based blind IQA model that incorporates relative ranking and consistency learning to capture both global and local perceptual features.",
      "CLIPIQA: An NR-IQA method that leverages CLIP embeddings to measure semantic fidelity and perceptual degradation.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model designed to estimate quality under both synthetic and real-world degradations.",
    "image_path": [
      "waterloo_select/micbenc_select/000000483833_contrast_5.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is most suitable for assessing brightness variation in this image?",
    "candidates": [
      "MUSIQ: A multi-scale transformer offering robust perceptual quality prediction via hierarchical feature fusion.",
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions, multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images.",
      "LIQE: A multitask-learning-based blind IQA model that exploits auxiliary tasks to enhance distortion awareness."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions, multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/02547.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is 'NIQE: An opinion-unaware metric based on statistical regularities in natural images, frequently used in blind IQA pipelines.' effective for assessing 'Noise' in this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/10442628686.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/motion0101.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.' tool good at evaluating the 'Motion blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/micbenc_select/04663.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Which NR-IQA tool is specifically designed to handle gaussian blur, motion blur and might be suitable for this image's distortion type?",
    "candidates": [
      "TReS: A transformer-based blind IQA model that incorporates consistency learning.",
      "BRISQUE: A pioneering NSS-based NR-IQA model capturing spatial statistically deviations in natural images.",
      "UNIQIE: An uncertainty-aware NR-IQA model effective for JPEG compression and lens blur.",
      "WaDIQaM_NR: Applies a deep neural network with weighted average pooling for NR-IQA."
    ],
    "correct_answer": "UNIQIE: An uncertainty-aware NR-IQA model effective for JPEG compression and lens blur.",
    "image_path": [
      "waterloo_select/micbenc_select/14bc46df20d85441a446141a9266e73f.jpg"
    ]
  },
  {
    "type": "executor",
    "question": "Is the 'TOPIQ_FR: A top-down FR-IQA model that leverages high-level semantic guidance to focus on perceptually important distortion regions, thereby enhancing assessment accuracy. It performs best on classical distortions such as lens blur, motion blur, color diffusion, color shift, color quantization, JPEG/JPEG2000 compression, various noise types (white, component, impulse, multiplicative, denoise artifact), brightness changes (brighten, darken, mean shift), spatial distortions (jitter, pixelate, non-eccentricity patch, otsu quantization, color block), and contrast/sharpness variations.' tool good at evaluating the 'Blur' distortion of this image?",
    "candidates": [
      "Yes",
      "No"
    ],
    "correct_answer": "Yes",
    "image_path": [
      "waterloo_select/ref_md/02645.bmp",
      "waterloo_select/md/02645_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which FR-IQA tool is best suited for handling the 'saturate_strengthen' distortion of this image?",
    "candidates": [
      "FSIM: A widely used FR-IQA model based on low-level feature similarity, such as phase congruency and gradient magnitude. Effective for JPEG compression, color quantization, color diffusion, and JPEG2000 compression.",
      "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments. Excels in handling distortions such as brightness changes, jitter, contrast variations, and color shifts.",
      "VIF: Evaluates quality based on the amount of visual information retained between reference and distorted images. Ranks highly for JPEG and JPEG2000 compression.",
      "DISTS: A structural-texture hybrid similarity model that balances sensitivity to structural degradations and tolerance to textural variations. Particularly well for Gaussian blur."
    ],
    "correct_answer": "LPIPS: A deep feature-based FR-IQA metric that computes perceptual similarity aligned with human visual judgments. Excels in handling distortions such as brightness changes, jitter, contrast variations, and color shifts.",
    "image_path": [
      "waterloo_select/ref_md/01985.bmp",
      "waterloo_select/md/01985_1.png"
    ]
  },
  {
    "type": "executor",
    "question": "Which No-Reference IQA tool is best suited for evaluating the perceptual quality of text legibility in this image due to distortion types like blurring and color shift?",
    "candidates": [
      "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
      "NIMA: Predicts aesthetic and technical quality using probability distributions derived from human ratings. Widely used for aesthetic assessment tasks.",
      "BRISQUE: A pioneering NSS-based NR-IQA model that captures spatial statistical deviations in natural images. It serves as a strong baseline across classical benchmarks.",
      "MUSIQ: A multi-scale transformer that processes images at native resolutions and varying aspect ratios, offering robust perceptual quality prediction via hierarchical feature fusion."
    ],
    "correct_answer": "QAlign: A state-of-the-art NR-IQA model based on multimodal large language models (MLLMs), capable of addressing Gaussian blur, motion blur, color distortions (shift, quantization, saturation), multiple noise types, brightness variations, spatial distortions, and sharpness without requiring reference images.",
    "image_path": [
      "waterloo_select/micbenc_select/02371.jpg"
    ]
  }
]