model,question,room_type,score
claude-sonnet-4-20250514,Pair Distance,kitchens,0.0
claude-sonnet-4-20250514,Pair Distance,living_rooms,0.0
claude-sonnet-4-20250514,Pair Distance,bedrooms,0.0
claude-sonnet-4-20250514,Pair Distance,hssd_simple,0.0
claude-sonnet-4-20250514,Placement,kitchens,0.0
claude-sonnet-4-20250514,Placement,living_rooms,0.0
claude-sonnet-4-20250514,Placement,bedrooms,0.0
claude-sonnet-4-20250514,Placement,hssd_simple,0.0
claude-sonnet-4-20250514,Repositioning,kitchens,0.0
claude-sonnet-4-20250514,Repositioning,living_rooms,0.0
claude-sonnet-4-20250514,Repositioning,bedrooms,0.0
claude-sonnet-4-20250514,Repositioning,hssd_simple,0.0
claude-sonnet-4-20250514,Free Space,kitchens,0.0
claude-sonnet-4-20250514,Free Space,living_rooms,0.0
claude-sonnet-4-20250514,Free Space,bedrooms,0.0
claude-sonnet-4-20250514,Free Space,hssd_simple,0.0
claude-sonnet-4-20250514,Visibility,kitchens,0.17
claude-sonnet-4-20250514,Visibility,living_rooms,0.17
claude-sonnet-4-20250514,Visibility,bedrooms,1.0
claude-sonnet-4-20250514,Visibility,hssd_simple,0.0
claude-sonnet-4-20250514,View Angle,kitchens,0.0
claude-sonnet-4-20250514,View Angle,living_rooms,0.0
claude-sonnet-4-20250514,View Angle,bedrooms,0.0
claude-sonnet-4-20250514,View Angle,hssd_simple,0.0
claude-sonnet-4-20250514,Max Box,kitchens,0.0
claude-sonnet-4-20250514,Max Box,living_rooms,0.0
claude-sonnet-4-20250514,Max Box,bedrooms,0.0
claude-sonnet-4-20250514,Max Box,hssd_simple,0.0
claude-sonnet-4-20250514,Shortest Path (Valid),kitchens,0.0
claude-sonnet-4-20250514,Shortest Path (Valid),living_rooms,0.0
claude-sonnet-4-20250514,Shortest Path (Valid),bedrooms,0.17
claude-sonnet-4-20250514,Shortest Path (Valid),hssd_simple,0.0
gpt-4.1-2025-04-14,Pair Distance,kitchens,0.0
gpt-4.1-2025-04-14,Pair Distance,living_rooms,0.17
gpt-4.1-2025-04-14,Pair Distance,bedrooms,0.0
gpt-4.1-2025-04-14,Pair Distance,hssd_simple,0.5
gpt-4.1-2025-04-14,Placement,kitchens,0.0
gpt-4.1-2025-04-14,Placement,living_rooms,0.0
gpt-4.1-2025-04-14,Placement,bedrooms,0.0
gpt-4.1-2025-04-14,Placement,hssd_simple,0.0
gpt-4.1-2025-04-14,Repositioning,kitchens,0.17
gpt-4.1-2025-04-14,Repositioning,living_rooms,0.17
gpt-4.1-2025-04-14,Repositioning,bedrooms,0.0
gpt-4.1-2025-04-14,Repositioning,hssd_simple,0.0
gpt-4.1-2025-04-14,Free Space,kitchens,0.0
gpt-4.1-2025-04-14,Free Space,living_rooms,0.33
gpt-4.1-2025-04-14,Free Space,bedrooms,0.0
gpt-4.1-2025-04-14,Free Space,hssd_simple,2.0
gpt-4.1-2025-04-14,Visibility,kitchens,0.0
gpt-4.1-2025-04-14,Visibility,living_rooms,0.0
gpt-4.1-2025-04-14,Visibility,bedrooms,0.0
gpt-4.1-2025-04-14,Visibility,hssd_simple,0.0
gpt-4.1-2025-04-14,View Angle,kitchens,0.0
gpt-4.1-2025-04-14,View Angle,living_rooms,0.0
gpt-4.1-2025-04-14,View Angle,bedrooms,0.0
gpt-4.1-2025-04-14,View Angle,hssd_simple,0.5
gpt-4.1-2025-04-14,Max Box,kitchens,0.0
gpt-4.1-2025-04-14,Max Box,living_rooms,0.0
gpt-4.1-2025-04-14,Max Box,bedrooms,0.0
gpt-4.1-2025-04-14,Max Box,hssd_simple,0.0
gpt-4.1-2025-04-14,Shortest Path (Valid),kitchens,0.17
gpt-4.1-2025-04-14,Shortest Path (Valid),living_rooms,0.0
gpt-4.1-2025-04-14,Shortest Path (Valid),bedrooms,0.0
gpt-4.1-2025-04-14,Shortest Path (Valid),hssd_simple,0.0
moonshotai/Kimi-K2-Instruct,Pair Distance,kitchens,0.0
moonshotai/Kimi-K2-Instruct,Pair Distance,living_rooms,0.0
moonshotai/Kimi-K2-Instruct,Pair Distance,bedrooms,0.17
moonshotai/Kimi-K2-Instruct,Pair Distance,hssd_simple,0.0
moonshotai/Kimi-K2-Instruct,Placement,kitchens,0.0
moonshotai/Kimi-K2-Instruct,Placement,living_rooms,0.0
moonshotai/Kimi-K2-Instruct,Placement,bedrooms,0.0
moonshotai/Kimi-K2-Instruct,Placement,hssd_simple,0.5
moonshotai/Kimi-K2-Instruct,Repositioning,kitchens,0.0
moonshotai/Kimi-K2-Instruct,Repositioning,living_rooms,0.0
moonshotai/Kimi-K2-Instruct,Repositioning,bedrooms,0.0
moonshotai/Kimi-K2-Instruct,Repositioning,hssd_simple,0.0
moonshotai/Kimi-K2-Instruct,Free Space,kitchens,0.17
moonshotai/Kimi-K2-Instruct,Free Space,living_rooms,0.0
moonshotai/Kimi-K2-Instruct,Free Space,bedrooms,0.0
moonshotai/Kimi-K2-Instruct,Free Space,hssd_simple,0.0
moonshotai/Kimi-K2-Instruct,Visibility,kitchens,0.0
moonshotai/Kimi-K2-Instruct,Visibility,living_rooms,0.0
moonshotai/Kimi-K2-Instruct,Visibility,bedrooms,0.0
moonshotai/Kimi-K2-Instruct,Visibility,hssd_simple,0.0
moonshotai/Kimi-K2-Instruct,View Angle,kitchens,0.0
moonshotai/Kimi-K2-Instruct,View Angle,living_rooms,0.17
moonshotai/Kimi-K2-Instruct,View Angle,bedrooms,0.0
moonshotai/Kimi-K2-Instruct,View Angle,hssd_simple,0.0
moonshotai/Kimi-K2-Instruct,Max Box,kitchens,0.0
moonshotai/Kimi-K2-Instruct,Max Box,living_rooms,0.67
moonshotai/Kimi-K2-Instruct,Max Box,bedrooms,1.5
moonshotai/Kimi-K2-Instruct,Max Box,hssd_simple,0.5
moonshotai/Kimi-K2-Instruct,Shortest Path (Valid),kitchens,0.5
moonshotai/Kimi-K2-Instruct,Shortest Path (Valid),living_rooms,0.0
moonshotai/Kimi-K2-Instruct,Shortest Path (Valid),bedrooms,0.0
moonshotai/Kimi-K2-Instruct,Shortest Path (Valid),hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Pair Distance,kitchens,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Pair Distance,living_rooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Pair Distance,bedrooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Pair Distance,hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Placement,kitchens,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Placement,living_rooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Placement,bedrooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Placement,hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Repositioning,kitchens,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Repositioning,living_rooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Repositioning,bedrooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Repositioning,hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Free Space,kitchens,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Free Space,living_rooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Free Space,bedrooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Free Space,hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Visibility,kitchens,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Visibility,living_rooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Visibility,bedrooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Visibility,hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,View Angle,kitchens,0.17
Qwen/Qwen3-Coder-480B-A35B-Instruct,View Angle,living_rooms,0.17
Qwen/Qwen3-Coder-480B-A35B-Instruct,View Angle,bedrooms,0.33
Qwen/Qwen3-Coder-480B-A35B-Instruct,View Angle,hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Max Box,kitchens,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Max Box,living_rooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Max Box,bedrooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Max Box,hssd_simple,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Shortest Path (Valid),kitchens,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Shortest Path (Valid),living_rooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Shortest Path (Valid),bedrooms,0.0
Qwen/Qwen3-Coder-480B-A35B-Instruct,Shortest Path (Valid),hssd_simple,0.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Pair Distance,kitchens,0.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Pair Distance,living_rooms,0.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Pair Distance,bedrooms,0.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Pair Distance,hssd_simple,17.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Placement,kitchens,6.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Placement,living_rooms,6.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Placement,bedrooms,17.17
Qwen/Qwen3-235B-A22B-Instruct-2507,Placement,hssd_simple,6.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Repositioning,kitchens,5.67
Qwen/Qwen3-235B-A22B-Instruct-2507,Repositioning,living_rooms,1.67
Qwen/Qwen3-235B-A22B-Instruct-2507,Repositioning,bedrooms,7.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Repositioning,hssd_simple,47.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Free Space,kitchens,0.17
Qwen/Qwen3-235B-A22B-Instruct-2507,Free Space,living_rooms,4.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Free Space,bedrooms,4.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Free Space,hssd_simple,52.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Visibility,kitchens,0.0
Qwen/Qwen3-235B-A22B-Instruct-2507,Visibility,living_rooms,0.17
Qwen/Qwen3-235B-A22B-Instruct-2507,Visibility,bedrooms,0.17
Qwen/Qwen3-235B-A22B-Instruct-2507,Visibility,hssd_simple,11.5
Qwen/Qwen3-235B-A22B-Instruct-2507,View Angle,kitchens,0.5
Qwen/Qwen3-235B-A22B-Instruct-2507,View Angle,living_rooms,1.5
Qwen/Qwen3-235B-A22B-Instruct-2507,View Angle,bedrooms,0.5
Qwen/Qwen3-235B-A22B-Instruct-2507,View Angle,hssd_simple,26.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Max Box,kitchens,22.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Max Box,living_rooms,49.83
Qwen/Qwen3-235B-A22B-Instruct-2507,Max Box,bedrooms,46.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Max Box,hssd_simple,45.5
Qwen/Qwen3-235B-A22B-Instruct-2507,Shortest Path (Valid),kitchens,44.33
Qwen/Qwen3-235B-A22B-Instruct-2507,Shortest Path (Valid),living_rooms,37.83
Qwen/Qwen3-235B-A22B-Instruct-2507,Shortest Path (Valid),bedrooms,40.17
Qwen/Qwen3-235B-A22B-Instruct-2507,Shortest Path (Valid),hssd_simple,35.5
gpt-4.1-mini-2025-04-14,Pair Distance,kitchens,0.0
gpt-4.1-mini-2025-04-14,Pair Distance,living_rooms,0.0
gpt-4.1-mini-2025-04-14,Pair Distance,bedrooms,0.17
gpt-4.1-mini-2025-04-14,Pair Distance,hssd_simple,8.5
gpt-4.1-mini-2025-04-14,Placement,kitchens,0.0
gpt-4.1-mini-2025-04-14,Placement,living_rooms,0.0
gpt-4.1-mini-2025-04-14,Placement,bedrooms,0.0
gpt-4.1-mini-2025-04-14,Placement,hssd_simple,0.0
gpt-4.1-mini-2025-04-14,Repositioning,kitchens,0.0
gpt-4.1-mini-2025-04-14,Repositioning,living_rooms,0.0
gpt-4.1-mini-2025-04-14,Repositioning,bedrooms,0.17
gpt-4.1-mini-2025-04-14,Repositioning,hssd_simple,2.0
gpt-4.1-mini-2025-04-14,Free Space,kitchens,0.0
gpt-4.1-mini-2025-04-14,Free Space,living_rooms,0.0
gpt-4.1-mini-2025-04-14,Free Space,bedrooms,0.0
gpt-4.1-mini-2025-04-14,Free Space,hssd_simple,1.0
gpt-4.1-mini-2025-04-14,Visibility,kitchens,0.0
gpt-4.1-mini-2025-04-14,Visibility,living_rooms,0.17
gpt-4.1-mini-2025-04-14,Visibility,bedrooms,0.33
gpt-4.1-mini-2025-04-14,Visibility,hssd_simple,2.5
gpt-4.1-mini-2025-04-14,View Angle,kitchens,0.0
gpt-4.1-mini-2025-04-14,View Angle,living_rooms,0.0
gpt-4.1-mini-2025-04-14,View Angle,bedrooms,0.0
gpt-4.1-mini-2025-04-14,View Angle,hssd_simple,3.0
gpt-4.1-mini-2025-04-14,Max Box,kitchens,0.0
gpt-4.1-mini-2025-04-14,Max Box,living_rooms,0.0
gpt-4.1-mini-2025-04-14,Max Box,bedrooms,0.33
gpt-4.1-mini-2025-04-14,Max Box,hssd_simple,0.5
gpt-4.1-mini-2025-04-14,Shortest Path (Valid),kitchens,0.0
gpt-4.1-mini-2025-04-14,Shortest Path (Valid),living_rooms,0.17
gpt-4.1-mini-2025-04-14,Shortest Path (Valid),bedrooms,0.0
gpt-4.1-mini-2025-04-14,Shortest Path (Valid),hssd_simple,0.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Pair Distance,kitchens,0.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Pair Distance,living_rooms,0.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Pair Distance,bedrooms,0.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Pair Distance,hssd_simple,3.5
Qwen/Qwen3-30B-A3B-Instruct-2507,Placement,kitchens,1.17
Qwen/Qwen3-30B-A3B-Instruct-2507,Placement,living_rooms,1.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Placement,bedrooms,2.17
Qwen/Qwen3-30B-A3B-Instruct-2507,Placement,hssd_simple,1.5
Qwen/Qwen3-30B-A3B-Instruct-2507,Repositioning,kitchens,2.33
Qwen/Qwen3-30B-A3B-Instruct-2507,Repositioning,living_rooms,1.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Repositioning,bedrooms,0.83
Qwen/Qwen3-30B-A3B-Instruct-2507,Repositioning,hssd_simple,29.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Free Space,kitchens,0.33
Qwen/Qwen3-30B-A3B-Instruct-2507,Free Space,living_rooms,1.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Free Space,bedrooms,0.33
Qwen/Qwen3-30B-A3B-Instruct-2507,Free Space,hssd_simple,39.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Visibility,kitchens,0.17
Qwen/Qwen3-30B-A3B-Instruct-2507,Visibility,living_rooms,0.5
Qwen/Qwen3-30B-A3B-Instruct-2507,Visibility,bedrooms,1.33
Qwen/Qwen3-30B-A3B-Instruct-2507,Visibility,hssd_simple,11.0
Qwen/Qwen3-30B-A3B-Instruct-2507,View Angle,kitchens,0.0
Qwen/Qwen3-30B-A3B-Instruct-2507,View Angle,living_rooms,0.17
Qwen/Qwen3-30B-A3B-Instruct-2507,View Angle,bedrooms,0.5
Qwen/Qwen3-30B-A3B-Instruct-2507,View Angle,hssd_simple,17.5
Qwen/Qwen3-30B-A3B-Instruct-2507,Max Box,kitchens,6.17
Qwen/Qwen3-30B-A3B-Instruct-2507,Max Box,living_rooms,29.33
Qwen/Qwen3-30B-A3B-Instruct-2507,Max Box,bedrooms,21.83
Qwen/Qwen3-30B-A3B-Instruct-2507,Max Box,hssd_simple,20.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Shortest Path (Valid),kitchens,41.5
Qwen/Qwen3-30B-A3B-Instruct-2507,Shortest Path (Valid),living_rooms,46.33
Qwen/Qwen3-30B-A3B-Instruct-2507,Shortest Path (Valid),bedrooms,49.0
Qwen/Qwen3-30B-A3B-Instruct-2507,Shortest Path (Valid),hssd_simple,43.0
mistralai/Devstral-Small-2505,Pair Distance,kitchens,0.17
mistralai/Devstral-Small-2505,Pair Distance,living_rooms,0.17
mistralai/Devstral-Small-2505,Pair Distance,bedrooms,0.83
mistralai/Devstral-Small-2505,Pair Distance,hssd_simple,1.5
mistralai/Devstral-Small-2505,Placement,kitchens,1.17
mistralai/Devstral-Small-2505,Placement,living_rooms,1.5
mistralai/Devstral-Small-2505,Placement,bedrooms,0.5
mistralai/Devstral-Small-2505,Placement,hssd_simple,4.5
mistralai/Devstral-Small-2505,Repositioning,kitchens,0.17
mistralai/Devstral-Small-2505,Repositioning,living_rooms,1.0
mistralai/Devstral-Small-2505,Repositioning,bedrooms,1.33
mistralai/Devstral-Small-2505,Repositioning,hssd_simple,4.5
mistralai/Devstral-Small-2505,Free Space,kitchens,2.5
mistralai/Devstral-Small-2505,Free Space,living_rooms,10.17
mistralai/Devstral-Small-2505,Free Space,bedrooms,6.33
mistralai/Devstral-Small-2505,Free Space,hssd_simple,28.5
mistralai/Devstral-Small-2505,Visibility,kitchens,0.33
mistralai/Devstral-Small-2505,Visibility,living_rooms,0.33
mistralai/Devstral-Small-2505,Visibility,bedrooms,0.67
mistralai/Devstral-Small-2505,Visibility,hssd_simple,0.0
mistralai/Devstral-Small-2505,View Angle,kitchens,0.33
mistralai/Devstral-Small-2505,View Angle,living_rooms,0.67
mistralai/Devstral-Small-2505,View Angle,bedrooms,1.17
mistralai/Devstral-Small-2505,View Angle,hssd_simple,2.5
mistralai/Devstral-Small-2505,Max Box,kitchens,1.5
mistralai/Devstral-Small-2505,Max Box,living_rooms,0.83
mistralai/Devstral-Small-2505,Max Box,bedrooms,0.83
mistralai/Devstral-Small-2505,Max Box,hssd_simple,3.0
mistralai/Devstral-Small-2505,Shortest Path (Valid),kitchens,6.67
mistralai/Devstral-Small-2505,Shortest Path (Valid),living_rooms,7.83
mistralai/Devstral-Small-2505,Shortest Path (Valid),bedrooms,9.17
mistralai/Devstral-Small-2505,Shortest Path (Valid),hssd_simple,27.0
