{"metadata":{"kernelspec":{"name":"bas","display_name":"bas"}},"nbformat_minor":5,"nbformat":4,"cells":[{"id":"6f385e4d-96f0-49f1-b5b7-9ebd25afc73b","cell_type":"code","source":"import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"\nimport sys\nsys.path.append(\"/mnt/workspace/workgroup/zheliu.lzy/vision_cot/OminiControl\")\nimport numpy as np\n\nfrom src.utils.scene import DiffusionScene\nfrom src.utils.visualization import imshowp\n\ndef find_nonzero_bounding_box(vector):\n  \"\"\"\n  检测numpy向量（数组）中非零区域的边界框。\n\n  Args:\n    vector: 一个 NumPy 数组。\n\n  Returns:\n    如果向量中存在非零元素，则返回一个包含 (x_min, y_min, x_max, y_max) 的元组。\n    如果向量中所有元素都为零，则返回 None。\n  \"\"\"\n  # 检查输入是否为 NumPy 数组\n  if not isinstance(vector, np.ndarray):\n    raise TypeError(\"输入必须是 NumPy 数组\")\n\n  # 检查数组维度是否为 2\n  if vector.ndim != 2:\n      raise ValueError(\"输入数组必须是二维的\")\n\n  # 找到所有非零元素的索引\n  non_zero_indices = np.nonzero(vector)\n\n  # non_zero_indices 是一个包含两个数组的元组：\n  # 第一个数组是行索引 (y 坐标)\n  # 第二个数组是列索引 (x 坐标)\n  y_indices = non_zero_indices[0]\n  x_indices = non_zero_indices[1]\n\n  # 检查是否存在非零元素\n  if len(y_indices) == 0:\n    # 如果没有非零元素，则返回 None\n    return None\n\n  # 计算 x 和 y 坐标的最小值和最大值\n  y_min = np.min(y_indices)\n  y_max = np.max(y_indices)\n  x_min = np.min(x_indices)\n  x_max = np.max(x_indices)\n\n  return (x_min, y_min, x_max, y_max)\n\ndef entity_center(x_min, y_min, x_max, y_max, shape, c_max=0.15, c_min=0.1):\n    h, w = shape\n    if x_min>h*c_max and y_min>w*c_max and x_max<h*(1-c_max) and y_max<w*(1-c_max):\n        return 0.1\n    if x_min<h*c_min or y_min<w*c_min or x_max>h*(1-c_min) or y_max>w*(1-c_min):\n        return -0.1\n    else:\n        return 0\n\nans_json={\n        \"scene_parameters\": {\n            \"scene_size\": 3,\n            \"camera_pitch_angle\": 15\n        },\n        \"entity_layout\": [\n            {\n                \"entity_name\": \"the brown coat rack\",\n                \"size\": [\n                    1.0,\n                    1.0,\n                    2.5\n                ],\n                \"position\": [\n                    0.0,\n                    1.25,\n                    1.5\n                ],\n                \"orient\": 0\n            },\n            {\n                \"entity_name\": \"The red hat\",\n                \"size\": [\n                    0.8,\n                    0.8,\n                    0.4\n                ],\n                \"position\": [\n                    0.0,\n                    2.7,\n                    1.5\n                ],\n                \"orient\": 0\n            }\n        ]\n    }\n\n# ans_json={\n#   \"scene_parameters\": {\n#       \"scene_size\": 3,\n#       \"camera_pitch_angle\": 15\n#   },\n#   \"entity_layout\": [\n#       {\n#           \"entity_name\": \"the brown coat rack\",\n#           \"size\": [\n#               1.0,\n#               1.0,\n#               2.5\n#           ],\n#           \"position\": [\n#               0.0,\n#               0.75,\n#               1.5\n#           ],\n#           \"orient\": 0\n#       },\n#       {\n#           \"entity_name\": \"The red hat\",\n#           \"size\": [\n#               0.8,\n#               0.8,\n#               0.4\n#           ],\n#           \"position\": [\n#               0.0,\n#               2.2,\n#               1.5\n#           ],\n#           \"orient\": 0\n#       }\n#   ]\n# }\n\n# ans_json={\n#   \"scene_parameters\": {\n#       \"scene_size\": 3,\n#       \"camera_pitch_angle\": 15\n#   },\n#   \"entity_layout\": [\n#       {\n#           \"entity_name\": \"the brown coat rack\",\n#           \"size\": [\n#               1.0,\n#               1.0,\n#               2.5\n#           ],\n#           \"position\": [\n#               0.0,\n#               1.25,\n#               0.0\n#           ],\n#           \"orient\": 0\n#       },\n#       {\n#           \"entity_name\": \"The red hat\",\n#           \"size\": [\n#               0.8,\n#               0.8,\n#               0.4\n#           ],\n#           \"position\": [\n#               0.0,\n#               2.7,\n#               0.0\n#           ],\n#           \"orient\": 0\n#       }\n#   ]\n# }\n\nans_json={\n        \"scene_parameters\": {\n            \"scene_size\": 3,\n            \"camera_pitch_angle\": 15\n        },\n        \"entity_layout\": [\n            {\n                \"entity_name\": \"dog\",\n                \"size\": [\n                    1.0,\n                    0.4,\n                    0.7\n                ],\n                \"position\": [\n                    0.0,\n                    0.35,\n                    1.0\n                ],\n                \"orient\": 0\n            },\n            {\n                \"entity_name\": \"cat\",\n                \"size\": [\n                    0.6,\n                    0.25,\n                    0.4\n                ],\n                \"position\": [\n                    -0.8,\n                    0.2,\n                    1.8\n                ],\n                \"orient\": 45\n            }\n        ]\n}\n\n# ans_json={\n#         \"scene_parameters\": {\n#             \"scene_size\": 3,\n#             \"camera_pitch_angle\": 15\n#         },\n#         \"entity_layout\": [\n#             {\n#                 \"entity_name\": \"dog\",\n#                 \"size\": [\n#                     3,\n#                     0.4,\n#                     3\n#                 ],\n#                 \"position\": [\n#                     0.0,\n#                     1.5,\n#                     # 1.0\n#                     0.2\n#                 ],\n#                 \"orient\": 0\n#             },\n#             {\n#                 \"entity_name\": \"cat\",\n#                 \"size\": [\n#                     0.6,\n#                     0.25,\n#                     3\n#                 ],\n#                 \"position\": [\n#                     -0.8,\n#                     1.5,\n#                     1.8\n#                 ],\n#                 \"orient\": 45\n#             }\n#         ]\n# }","metadata":{"trusted":true,"libroFormatter":"formatter-string","libroCellType":"code","execution":{"shell.execute_reply.started":"2025-05-03T19:03:52.982005Z","shell.execute_reply.end":"2025-05-03T19:03:52.990734Z","to_execute":"2025-05-03T19:03:52.805Z"}},"execution_count":102,"outputs":[]},{"id":"f3ca71fb-b5be-4f58-b03b-6fdd9c317d6e","cell_type":"code","source":"scene_size = ans_json['scene_parameters']['scene_size'] / 2\ncam_pitch_angle = 90 - ans_json['scene_parameters']['camera_pitch_angle']\ncam_pitch_angle = 90 - 10\ncam_pitch_angle = 90\n# floor_offset = - scene_size / 2\nfloor_scale_x = 1\nfloor_scale_y = 1\n\ny_min = 100\ny_max = 0\nfor i, entity in enumerate(ans_json['entity_layout']):\n    y_min = min(y_min, entity['position'][1] - entity['size'][2]/2)\n    y_max = max(y_max, entity['position'][1] + entity['size'][2]/2)\nfloor_offset = - (y_max + y_min) / 2\n\n# Build the scene    \nscene = DiffusionScene(scene_size=scene_size, fov=(60,60))\nscene.move_camera(rotation_angle=cam_pitch_angle,rotation_axis=[1,0,0], translation=[0,0,0])# rotation_axis(x,z,y), translation(x, z, y)\n# scene.move_camera(rotation_angle=0,rotation_axis=[1,0,0], translation=[0,-2*scene_size,0])# rotation_axis(x,z,y), translation(x, z, y)\nscene.build_floor(scale_x=floor_scale_x, scale_y=floor_scale_y, floor_offset=floor_offset)\n\nfor i, entity in enumerate(ans_json['entity_layout']):\n    scene.add_box(id=f\"box_{i}\", size=entity['size'], origin=entity['position'], prompt=entity['entity_name'])\n    # scene.box(f\"box_{i}\").rotate_left(entity['orient'])\n    # mask_b2, latent_mask_b2, p_image_b2 = scene.get_box_masks(box_id=\"box_2\")\n\ndepth_all, p_img = scene.render(single=True, floor=False, render_floor=False, depth_max=4*scene_size)\nimshowp(depth_all[-1])\nx_min, y_min, x_max, y_max = find_nonzero_bounding_box(depth_all[-1])\nmove = entity_center(x_min, y_min, x_max, y_max, depth_all[-1].shape)\nwhile move != 0:\n    scene.move_camera(rotation_angle=0,rotation_axis=[1,0,0], translation=[0,move,0])# rotation_axis(x,z,y), translation(x, z, y)\n    imshowp(depth_all[-1])\n    depth_all, p_img = scene.render(single=True, floor=False, render_floor=False, depth_max=4*scene_size)\n    x_min, y_min, x_max, y_max = find_nonzero_bounding_box(depth_all[-1])\n    move = entity_center(x_min, y_min, x_max, y_max, depth_all[-1].shape)\n\nprint(x_min, y_min, x_max, y_max)\ndepth_all = scene.render(single=True, floor=False, render_floor=False, depth_max=4*scene_size)\nprint(depth_all.shape)\nimshowp(depth_all[-1])","metadata":{"trusted":true,"libroFormatter":"formatter-string","execution":{"shell.execute_reply.started":"2025-05-04T03:18:04.239056Z","shell.execute_reply.end":"2025-05-04T03:18:05.118865Z","to_execute":"2025-05-04T03:18:04.099Z"}},"execution_count":104,"outputs":[{"output_type":"error","ename":"ValueError","evalue":"too many values to unpack (expected 2)","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)","Cell \u001b[0;32mIn[104], line 27\u001b[0m\n\u001b[1;32m     23\u001b[0m     scene\u001b[38;5;241m.\u001b[39madd_box(\u001b[38;5;28mid\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbox_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, size\u001b[38;5;241m=\u001b[39mentity[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msize\u001b[39m\u001b[38;5;124m'\u001b[39m], origin\u001b[38;5;241m=\u001b[39mentity[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mposition\u001b[39m\u001b[38;5;124m'\u001b[39m], prompt\u001b[38;5;241m=\u001b[39mentity[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mentity_name\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m     24\u001b[0m     \u001b[38;5;66;03m# scene.box(f\"box_{i}\").rotate_left(entity['orient'])\u001b[39;00m\n\u001b[1;32m     25\u001b[0m     \u001b[38;5;66;03m# mask_b2, latent_mask_b2, p_image_b2 = scene.get_box_masks(box_id=\"box_2\")\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m depth_all, p_img \u001b[38;5;241m=\u001b[39m scene\u001b[38;5;241m.\u001b[39mrender(single\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, floor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, render_floor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, depth_max\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39mscene_size)\n\u001b[1;32m     28\u001b[0m imshowp(depth_all[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m])\n\u001b[1;32m     29\u001b[0m x_min, y_min, x_max, y_max \u001b[38;5;241m=\u001b[39m find_nonzero_bounding_box(depth_all[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m])\n","\u001b[0;31mValueError\u001b[0m: too many values to unpack (expected 2)"]}]},{"id":"9f7efea6-808d-4335-8fd4-5b2a7f58919b","cell_type":"code","source":"def find_nonzero_bounding_box(vector):\n  \"\"\"\n  检测numpy向量（数组）中非零区域的边界框。\n\n  Args:\n    vector: 一个 NumPy 数组。\n\n  Returns:\n    如果向量中存在非零元素，则返回一个包含 (x_min, y_min, x_max, y_max) 的元组。\n    如果向量中所有元素都为零，则返回 None。\n  \"\"\"\n  # 检查输入是否为 NumPy 数组\n  if not isinstance(vector, np.ndarray):\n    raise TypeError(\"输入必须是 NumPy 数组\")\n\n  # 检查数组维度是否为 2\n  if vector.ndim != 2:\n      raise ValueError(\"输入数组必须是二维的\")\n\n  # 找到所有非零元素的索引\n  non_zero_indices = np.nonzero(vector)\n\n  # non_zero_indices 是一个包含两个数组的元组：\n  # 第一个数组是行索引 (y 坐标)\n  # 第二个数组是列索引 (x 坐标)\n  y_indices = non_zero_indices[0]\n  x_indices = non_zero_indices[1]\n\n  # 检查是否存在非零元素\n  if len(y_indices) == 0:\n    # 如果没有非零元素，则返回 None\n    return None\n\n  # 计算 x 和 y 坐标的最小值和最大值\n  y_min = np.min(y_indices)\n  y_max = np.max(y_indices)\n  x_min = np.min(x_indices)\n  x_max = np.max(x_indices)\n\n  return (x_min, y_min, x_max, y_max)\n\nx_min, y_min, x_max, y_max = find_nonzero_bounding_box(depth_all[-1])\nprint(x_min, y_min, x_max, y_max)","metadata":{"trusted":true,"libroFormatter":"formatter-string","execution":{"shell.execute_reply.started":"2025-05-03T18:46:02.638397Z","shell.execute_reply.end":"2025-05-03T18:46:02.643553Z","to_execute":"2025-05-03T18:46:02.370Z"}},"execution_count":87,"outputs":[{"output_type":"stream","name":"stdout","text":"163 220 306 291\n"}]}]}