/***************************************************************************************************
 * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 *modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright notice,
 *this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *notice, this list of conditions and the following disclaimer in the
 *documentation and/or other materials provided with the distribution.
 *     * Neither the name of the NVIDIA CORPORATION nor the names of its
 *contributors may be used to endorse or promote products derived from this
 *software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 *DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY DIRECT,
 *INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 *OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TOR (INCLUDING
 *NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 *EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 **************************************************************************************************/

/*! \file
  \brief CUTLASS layout visualization tool
*/

#include <map>
#include <iostream>
#include <iomanip>
#include <memory>

#include <cutlass/cutlass.h>

#include "options.h"
#include "register_layout.h"

/////////////////////////////////////////////////////////////////////////////////////////////////

std::map<std::string, std::unique_ptr<VisualizeLayoutBase> > layouts;

/////////////////////////////////////////////////////////////////////////////////////////////////

void print_usage(std::ostream& out) {
    out << "03_visualize_layout <layout> [options]"
        << "\n\n"
        << "  Layouts:\n";

    for (auto const& layout : layouts) {
        out << "    " << layout.first
            << std::string(46 - layout.first.size(), ' ');
        layout.second->print_help(out);
        out << "\n";
    }

    out << "\n";

    Options::print_usage(out);

    out << "\nExamples:\n\n"
        << "$ 03_visualize_layout RowMajor --extent=16,16\n"
        << "$ 03_visualize_layout \"ColumnMajorInterleaved<4>\" --extent=32,8 "
           "--output-shape=16 --vectorize=4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<4,64>\" "
           "--extent=64,64 --vectorize=32 --output-shape=256,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<4,128>\" "
           "--extent=128,32 --vectorize=32 --output-shape=256,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<4,256>\" "
           "--extent=256,16 --vectorize=32 --output-shape=256,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<8,32>\" "
           "--extent=32,64 --vectorize=16 --output-shape=128,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<8,64>\" "
           "--extent=64,32 --vectorize=16 --output-shape=128,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<8,128>\" "
           "--extent=128,16 --vectorize=16 --output-shape=128,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<16,32>\" "
           "--extent=32,32 --vectorize=8 --output-shape=64,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<16,64>\" "
           "--extent=64,16 --vectorize=8 --output-shape=64,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<32,16>\" "
           "--extent=16,32 --vectorize=4 --output-shape=32,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicand<32,32>\" "
           "--extent=32,16 --vectorize=4 --output-shape=32,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicandCongruous<32,32>\" "
           "--extent=32,16 --vectorize=4 --output-shape=32,4\n"
        << "$ 03_visualize_layout \"TensorOpMultiplicandCongruous<64, 16>\" "
           "--extent=16,16 --vectorize=2 --output-shape=16,4\n"
        << "$ 03_visualize_layout "
           "\"VoltaTensorOpMultiplicandCrosswise<16,32>\" "
           "--extent=32,64 --vectorize=4 --output-shape=64,4\n"
        << "$ 03_visualize_layout \"VotlaTensorOpMultiplicandCongruous<16>\" "
           "--extent=64,32 --vectorize=8 --output-shape=64,4\n";

    out << std::endl;
}

/////////////////////////////////////////////////////////////////////////////////////////////////

/// Entry point
int main(int argc, char const* arg[]) {
    RegisterLayouts(layouts);

    if (argc == 1 ||
        (std::string(arg[0]) == "-h" || std::string(arg[1]) == "--help")) {
        print_usage(std::cout);
        return 0;
    }

    // parse command line, skipping layout name
    cutlass::CommandLine cmd_line(argc - 1, arg + 1);
    Options options(cmd_line);

    if (options.help) {
        print_usage(std::cout);
        return 0;
    }

    if (!options.good) {
        return -1;
    }

    std::string layout_name = arg[1];

    auto layout_it = layouts.find(layout_name);
    if (layout_it == layouts.end()) {
        std::cerr << "Layout '" << layout_name << "' not supported."
                  << std::endl;
        return -1;
    }

    bool passed = layout_it->second->visualize(options);
    if (!passed) {
        return -1;
    }

    layout_it->second->print_csv(std::cout);

    cudaFree(0);  // Ensure CUDA is available.

    return 0;
}

/////////////////////////////////////////////////////////////////////////////////////////////////
