[
  "/* Copyright 2024 Stanford University\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"stencil_mapper.h\"\n\n#include \"mappers/default_mapper.h\"\n\n#define SPMD_SHARD_USE_IO_PROC 1\n\nusing namespace Legion;\nusing namespace Legion::Mapping;\n\nstatic Logger log_stencil(\"stencil\");\n\nclass StencilMapper : public DefaultMapper\n{\npublic:\n  StencilMapper(MapperRuntime *rt, Machine machine, Processor local,\n                const char *mapper_name,\n                std::vector<Processor>* procs_list);\n  void default_policy_rank_processor_kinds(\n                                    MapperContext ctx, const Task &task,\n                                    std::vector<Processor::Kind> &ranking) override;\n  Processor default_policy_select_initial_processor(\n                                    MapperContext ctx, const Task &task) override;\n  void default_policy_select_target_processors(\n                                    MapperContext ctx,\n                                    const Task &task,\n                                    std::vector<Processor> &target_procs) override;\n  LogicalRegion default_policy_select_instance_region(\n                                MapperContext ctx, Memory target_memory,\n                                const RegionRequirement &req,\n                                const LayoutConstraintSet &constraints,\n                                bool force_new_instances,\n                                bool meets_constraints) override;\n  void map_task(const MapperContext ctx,\n                        const Task &task,\n                        const MapTaskInput &input,\n                        MapTaskOutput &output) override;\n  void map_copy(const MapperContext ctx,\n                        const Copy &copy,\n                        const MapCopyInput &input,\n                        MapCopyOutput &output) override;\n  template<bool IS_SRC>\n  void stencil_create_copy_instance(MapperContext ctx, const Copy &copy,\n                                    const RegionRequirement &req, unsigned index,\n                                    std::vector<PhysicalInstance> &instances);\nprivate:\n  std::vector<Processor>& procs_list;\n};\n\nStencilMapper::StencilMapper(MapperRuntime *rt, Machine machine, Processor local,\n                             const char *mapper_name,\n                             std::vector<Processor>* _procs_list)\n  : DefaultMapper(rt, machine, local, mapper_name)\n  , procs_list(*_procs_list)\n{\n}\n\nvoid StencilMapper::default_policy_rank_processor_kinds(MapperContext ctx,\n                        const Task &task, std::vector<Processor::Kind> &ranking)\n{\n#if SPMD_SHARD_USE_IO_PROC\n  const char* task_name = task.get_task_name();\n  const char* prefix = \"shard_\";\n  if (strncmp(task_name, prefix, strlen(prefix)) == 0) {\n    // Put shard tasks on IO processors.\n    ranking.resize(5);\n    ranking[0] = Processor::TOC_PROC;\n    ranking[1] = Processor::PROC_SET;\n    ranking[2] = Processor::IO_PROC;\n    ranking[3] = Processor::LOC_PROC;\n    ranking[4] = Processor::PY_PROC;\n  } else {\n#endif\n    ranking.resize(5);\n    ranking[0] = Processor::TOC_PROC;\n    ranking[1] = Processor::PROC_SET;\n    ranking[2] = Processor::LOC_PROC;\n    ranking[3] = Processor::IO_PROC;\n    ranking[4] = Processor::PY_PROC;\n#if SPMD_SHARD_USE_IO_PROC\n  }\n#endif\n}\n\nProcessor StencilMapper::default_policy_select_initial_processor(\n                                    MapperContext ctx, const Task &task)\n{\n  return DefaultMapper::default_policy_select_initial_processor(ctx, task);\n}\n\nvoid StencilMapper::default_policy_select_target_processors(\n                                    MapperContext ctx,\n                                    const Task &task,\n                                    std::vector<Processor> &target_procs)\n{\n  target_procs.push_back(task.target_proc);\n}\n\nLogicalRegion StencilMapper::default_policy_select_instance_region(\n                              MapperContext ctx, Memory target_memory,\n                              const RegionRequirement &req,\n                              const LayoutConstraintSet &constraints,\n                              bool force_new_instances,\n                              bool meets_constraints)\n{\n  return req.region;\n}\n\nvoid StencilMapper::map_task(const MapperContext      ctx,\n                             const Task&              task,\n                             const MapTaskInput&      input,\n                                   MapTaskOutput&     output)\n{\n  if (task.parent_task != NULL && task.parent_task->must_epoch_task) {\n    Processor::Kind target_kind = task.target_proc.kind();\n    // Get the variant that we are going to use to map this task\n    VariantInfo chosen = default_find_preferred_variant(task, ctx,\n                                                        true/*needs tight bound*/, true/*cache*/, target_kind);\n    output.chosen_variant = chosen.variant;\n    // TODO: some criticality analysis to assign priorities\n    output.task_priority = 0;\n    output.postmap_task = false;\n    // Figure out our target processors\n    output.target_procs.push_back(task.target_proc);\n\n    for (unsigned idx = 0; idx < task.regions.size(); idx++) {\n      const RegionRequirement &req = task.regions[idx];\n\n      // Skip any empty regions\n      if ((req.privilege == NO_ACCESS) || (req.privilege_fields.empty()))\n        continue;\n\n      if (input.valid_instances[idx].empty()) {\n        // happens when the region is empty\n        output.chosen_instances[idx].resize(1);\n        const LayoutConstraintSet empty_constraints;\n        const std::vector<LogicalRegion> empty_regions(1, req.region);\n        bool created = false;\n        bool ok = runtime->find_or_create_physical_instance(ctx, \n            default_policy_select_target_memory(ctx, task.target_proc, req),\n            empty_constraints, empty_regions, output.chosen_instances[idx].back(), \n            created, true/*acquire*/);\n        if (!ok) {\n          log_stencil.error(\"failed to find or create empty instance\");\n          assert(false);\n        }\n        continue;\n      }\n      output.chosen_instances[idx] = input.valid_instances[idx];\n      bool ok = runtime->acquire_and_filter_instances(ctx, output.chosen_instances);\n      if (!ok) {\n        log_stencil.error(\"failed to acquire instances\");\n        assert(false);\n      }\n    }\n    return;\n  }\n\n  DefaultMapper::map_task(ctx, task, input, output);\n}\n\nvoid StencilMapper::map_copy(const MapperContext ctx,\n                             const Copy &copy,\n                             const MapCopyInput &input,\n                             MapCopyOutput &output)\n{\n  log_stencil.spew(\"Stencil mapper map_copy\");\n  for (unsigned idx = 0; idx < copy.src_requirements.size(); idx++)\n  {\n    // Always use a virtual instance for the source.\n    output.src_instances[idx].clear();\n    output.src_instances[idx].push_back(\n      PhysicalInstance::get_virtual_instance());\n\n    // Place the destination instance on the remote node.\n    output.dst_instances[idx].clear();\n    if (!copy.dst_requirements[idx].is_restricted()) {\n      // Call a customized method to create an instance on the desired node.\n      stencil_create_copy_instance<false/*is src*/>(ctx, copy, \n        copy.dst_requirements[idx], idx, output.dst_instances[idx]);\n    } else {\n      // If it's restricted, just take the instance. This will only\n      // happen inside the shard task.\n      output.dst_instances[idx] = input.dst_instances[idx];\n      if (!output.dst_instances[idx].empty())\n        runtime->acquire_and_filter_instances(ctx,\n                                output.dst_instances[idx]);\n    }\n  }\n}\n\n//--------------------------------------------------------------------------\ntemplate<bool IS_SRC>\nvoid StencilMapper::stencil_create_copy_instance(MapperContext ctx,\n                     const Copy &copy, const RegionRequirement &req, \n                     unsigned idx, std::vector<PhysicalInstance> &instances)\n//--------------------------------------------------------------------------\n{\n  // This method is identical to the default version except that it\n  // chooses an intelligent memory based on the destination of the\n  // copy.\n\n  // See if we have all the fields covered\n  std::set<FieldID> missing_fields = req.privilege_fields;\n  for (std::vector<PhysicalInstance>::const_iterator it = \n        instances.begin(); it != instances.end(); it++)\n  {\n    it->remove_space_fields(missing_fields);\n    if (missing_fields.empty())\n      break;\n  }\n  if (missing_fields.empty())\n    return;\n  // If we still have fields, we need to make an instance\n  // We clearly need to take a guess, let's see if we can find\n  // one of our instances to use.\n\n  // ELLIOTT: Get the remote node here.\n  Color index = runtime->get_logical_region_color(ctx, copy.src_requirements[idx].region);\n  Memory target_memory = default_policy_select_target_memory(ctx,\n                           procs_list[index % procs_list.size()],\n                           req);\n  log_stencil.warning(\"Building instance for copy of a region with index %u to be in memory %llx\",\n                      index, target_memory.id);\n  bool force_new_instances = false;\n  LayoutConstraintID our_layout_id = \n   default_policy_select_layout_constraints(ctx, target_memory, \n                                            req, COPY_MAPPING,\n                                            true/*needs check*/, \n                                            force_new_instances);\n  LayoutConstraintSet creation_constraints = \n              runtime->find_layout_constraints(ctx, our_layout_id);\n  creation_constraints.add_constraint(\n      FieldConstraint(missing_fields,\n                      false/*contig*/, false/*inorder*/));\n  instances.resize(instances.size() + 1);\n  if (!default_make_instance(ctx, target_memory, \n        creation_constraints, instances.back(), \n        COPY_MAPPING, force_new_instances, true/*meets*/, req))\n  {\n    // If we failed to make it that is bad\n    log_stencil.error(\"Stencil mapper failed allocation for \"\n                   \"%s region requirement %d of explicit \"\n                   \"region-to-region copy operation in task %s \"\n                   \"(ID %lld) in memory \" IDFMT \" for processor \"\n                   IDFMT \". This means the working set of your \"\n                   \"application is too big for the allotted \"\n                   \"capacity of the given memory under the default \"\n                   \"mapper's mapping scheme. You have three \"\n                   \"choices: ask Realm to allocate more memory, \"\n                   \"write a custom mapper to better manage working \"\n                   \"sets, or find a bigger machine. Good luck!\",\n                   IS_SRC ? \"source\" : \"destination\", idx, \n                   copy.parent_task->get_task_name(),\n                   copy.parent_task->get_unique_id(),\n\t\t       target_memory.id,\n\t\t       copy.parent_task->current_proc.id);\n    assert(false);\n  }\n}\n\nstatic void create_mappers(Machine machine, Runtime *runtime, const std::set<Processor> &local_procs)\n{\n  std::vector<Processor>* procs_list = new std::vector<Processor>();\n\n  Machine::ProcessorQuery procs_query(machine);\n  procs_query.only_kind(Processor::LOC_PROC);\n  for (Machine::ProcessorQuery::iterator it = procs_query.begin();\n        it != procs_query.end(); it++)\n    procs_list->push_back(*it);\n\n  for (std::set<Processor>::const_iterator it = local_procs.begin();\n        it != local_procs.end(); it++)\n  {\n    StencilMapper* mapper = new StencilMapper(runtime->get_mapper_runtime(),\n                                              machine, *it, \"stencil_mapper\",\n                                              procs_list);\n    runtime->replace_default_mapper(mapper, *it);\n  }\n}\n\nvoid register_mappers()\n{\n  Runtime::add_registration_callback(create_mappers);\n}"
]