// Copyright © 2023-2025 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT

// clang-format off
#include "shim.bwd_postprocess.h"
#include <aotriton/util.h>
#include <tuple>
#include <iostream>
#include "iface.op_attn_bwd.h"

namespace AOTRITON_NS::v3::flash {

#if 1
using AOTRITON_NS::v3::flash::OpAttnBwdParams;
#endif

#define CAST(x) const_cast<void*>(static_cast<const void*>(x))
typedef std::vector<void*>(*PP_FUNC)(const OpAttnBwdParams& context, const TritonAuxiliaryArguments&);

namespace {
extern PP_FUNC prepare_arguments[ 1 ];
}

int64_t BwdPostprocessContext::godel_number() const
{
    int64_t sum = 0;
    const auto& args = *params;
    {
        int64_t number = -1;
        if (args.DQ->dtype() == DType::kFloat16) number = 0 ;
        if (args.DQ->dtype() == DType::kBFloat16) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported DQ, value: " << args.DQ->dtype() << std::endl;
#endif
            return -1;
        }
        sum += number * 24;
    }
    {
        int64_t number = -1;
        if (args.BLOCK_DMODEL == 16) number = 0 ;
        if (args.BLOCK_DMODEL == 32) number = 1 ;
        if (args.BLOCK_DMODEL == 48) number = 2 ;
        if (args.BLOCK_DMODEL == 64) number = 3 ;
        if (args.BLOCK_DMODEL == 80) number = 4 ;
        if (args.BLOCK_DMODEL == 96) number = 5 ;
        if (args.BLOCK_DMODEL == 128) number = 6 ;
        if (args.BLOCK_DMODEL == 160) number = 7 ;
        if (args.BLOCK_DMODEL == 192) number = 8 ;
        if (args.BLOCK_DMODEL == 224) number = 9 ;
        if (args.BLOCK_DMODEL == 256) number = 10 ;
        if (args.BLOCK_DMODEL == 512) number = 11 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported BLOCK_DMODEL, value: " << +args.BLOCK_DMODEL << std::endl;
#endif
            return -1;
        }
        sum += number * 2;
    }
    {
        int64_t number = -1;
        if (args.PADDED_HEAD == false) number = 0 ;
        if (args.PADDED_HEAD == true) number = 1 ;
        if (number < 0) {
#ifndef NDEBUG
            std::cerr << __FILE__ << ":" << __LINE__ << ": Unsupported PADDED_HEAD, value: " << args.PADDED_HEAD << std::endl;
#endif
            return -1;
        }
        sum += number * 1;
    }

    return sum;
}

hipError_t
BwdPostprocessContext::lookup_optimal(Gpu gpu) {
    auto [arch_number, mod_number] = get_archmod_number(gpu);
    if (arch_number < 0) {
        return hipErrorNoBinaryForGpu;
    }
    kernel_on_device = nullptr;
    auto number = godel_number();
    if (number < 0)
        return hipErrorNotSupported;
    auto tune_func = autotune_table[arch_number][number];
    if (!tune_func)
        return hipErrorProfilerNotInitialized;
    tune_func(*this, mod_number);
    if (!kernel_on_device)
        return hipErrorSharedObjectSymbolNotFound;
    return hipSuccess;
}

hipError_t
BwdPostprocessContext::launch(hipStream_t stream) const {
    constexpr std::string_view triton_kernel_name { "bwd_postprocess" };
    TritonAuxiliaryArguments aux;
    auto args = prepare_arguments[pp_args_index](*this->params, aux);
    dim3 grid;
    if (custom_grid_calculator) {
        grid = custom_grid_calculator(*this);
    } else {
        grid = grid_calculator();
    }
#if AOTRITON_BUILD_FOR_TUNING
    return kernel_on_device->invoke(triton_kernel_name,
                                    package_path,
                                    func_name,
                                    arch_name,
                                    grid,
                                    args,
                                    peek_kernel_image,
                                    stream);
#else
    return kernel_on_device->invoke(triton_kernel_name,
                                    package_path,
                                    func_name,
                                    arch_name,
                                    grid,
                                    args,
                                    stream);
#endif
}

std::tuple<int, int>
BwdPostprocessContext::get_archmod_number(Gpu gpu) {
    if (gpu == GPU_AMD_ARCH_GFX950_MOD0) return { 0, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1100_MOD0) return { 1, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1101_MOD0) return { 2, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1102_MOD0) return { 3, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1103_MOD0) return { 4, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1150_MOD0) return { 5, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1151_MOD0) return { 6, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1152_MOD0) return { 7, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1153_MOD0) return { 8, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1201_MOD0) return { 9, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1200_MOD0) return { 10, 0 };
    // TODO: print warning about tuning for this GPU mod is not built.
    // Note: if some mod does not have tuning info in the database at all, the
    //       getGpuFromStream should not return that mod from beginning.
    return std::make_tuple(-1, 0);
}


static std::vector<void*>
bwd_postprocess_pp_args_0(const OpAttnBwdParams& params,
                          const TritonAuxiliaryArguments& aux) {
  return { params.DQ_ACC->kparam_data_ptr(), // DQ_ACC
           params.DQ->kparam_data_ptr(), // DQ
           params.DQ_ACC->kparam_stride(0), // stride_accz
           params.DQ_ACC->kparam_stride(1), // stride_acch
           params.DQ_ACC->kparam_stride(2), // stride_accm
           params.DQ->kparam_stride(0), // stride_dqz
           params.DQ->kparam_stride(1), // stride_dqh
           params.DQ->kparam_stride(2), // stride_dqm
           CAST(&params.max_seqlen_q), // max_seqlen_q
           CAST(&params.head_dim), // head_dim
           CAST(&aux.global_scratch),
           CAST(&aux.profile_scratch)
         };
}

namespace {
PP_FUNC prepare_arguments[ 1 ] = {
  bwd_postprocess_pp_args_0
};
}


const std::vector<std::string>& BwdPostprocessMetadata::get_DQ_ACC_choices()
{
    static const std::vector<std::string> choices = { "*fp32:16" };
    return choices;
}

const std::vector<std::string>& BwdPostprocessMetadata::get_DQ_choices()
{
    static const std::vector<std::string> choices = { "*fp16:16", "*bf16:16" };
    return choices;
}

const std::vector<std::string>& BwdPostprocessMetadata::get_max_seqlen_q_choices()
{
    static const std::vector<std::string> choices = { "i32" };
    return choices;
}

const std::vector<std::string>& BwdPostprocessMetadata::get_head_dim_choices()
{
    static const std::vector<std::string> choices = { "i32" };
    return choices;
}

const std::vector<int>& BwdPostprocessMetadata::get_BLOCK_DMODEL_choices()
{
    static const std::vector<int> choices = { 16, 32, 48, 64, 80, 96, 128, 160, 192, 224, 256, 512 };
    return choices;
}

const std::vector<bool>& BwdPostprocessMetadata::get_PADDED_HEAD_choices()
{
    static const std::vector<bool> choices = { false, true };
    return choices;
}

namespace autotune {

const char bwd_postprocess_packed_string[] =
"128\0"
"wave2_warp4_stg1\0";

int bwd_postprocess__lut_lambda__0 (const OpAttnBwdParams& params, int mod_number, int8_t lut[1][1]) {
    
    return lut[mod_number][0];
};

} // namespace autotune

BwdPostprocessContext::AutoTuneTableEntry
BwdPostprocessContext::autotune_table[][ 48 ] = {
    {
        &autotune::Autotune_bwd_postprocess__A0__F0,
        &autotune::Autotune_bwd_postprocess__A0__F1,
        &autotune::Autotune_bwd_postprocess__A0__F2,
        &autotune::Autotune_bwd_postprocess__A0__F3,
        &autotune::Autotune_bwd_postprocess__A0__F4,
        &autotune::Autotune_bwd_postprocess__A0__F5,
        &autotune::Autotune_bwd_postprocess__A0__F6,
        &autotune::Autotune_bwd_postprocess__A0__F7,
        &autotune::Autotune_bwd_postprocess__A0__F8,
        &autotune::Autotune_bwd_postprocess__A0__F9,
        &autotune::Autotune_bwd_postprocess__A0__F10,
        &autotune::Autotune_bwd_postprocess__A0__F11,
        &autotune::Autotune_bwd_postprocess__A0__F12,
        &autotune::Autotune_bwd_postprocess__A0__F13,
        &autotune::Autotune_bwd_postprocess__A0__F14,
        &autotune::Autotune_bwd_postprocess__A0__F15,
        &autotune::Autotune_bwd_postprocess__A0__F16,
        &autotune::Autotune_bwd_postprocess__A0__F17,
        &autotune::Autotune_bwd_postprocess__A0__F18,
        &autotune::Autotune_bwd_postprocess__A0__F19,
        &autotune::Autotune_bwd_postprocess__A0__F20,
        &autotune::Autotune_bwd_postprocess__A0__F21,
        &autotune::Autotune_bwd_postprocess__A0__F22,
        &autotune::Autotune_bwd_postprocess__A0__F23,
        &autotune::Autotune_bwd_postprocess__A0__F24,
        &autotune::Autotune_bwd_postprocess__A0__F25,
        &autotune::Autotune_bwd_postprocess__A0__F26,
        &autotune::Autotune_bwd_postprocess__A0__F27,
        &autotune::Autotune_bwd_postprocess__A0__F28,
        &autotune::Autotune_bwd_postprocess__A0__F29,
        &autotune::Autotune_bwd_postprocess__A0__F30,
        &autotune::Autotune_bwd_postprocess__A0__F31,
        &autotune::Autotune_bwd_postprocess__A0__F32,
        &autotune::Autotune_bwd_postprocess__A0__F33,
        &autotune::Autotune_bwd_postprocess__A0__F34,
        &autotune::Autotune_bwd_postprocess__A0__F35,
        &autotune::Autotune_bwd_postprocess__A0__F36,
        &autotune::Autotune_bwd_postprocess__A0__F37,
        &autotune::Autotune_bwd_postprocess__A0__F38,
        &autotune::Autotune_bwd_postprocess__A0__F39,
        &autotune::Autotune_bwd_postprocess__A0__F40,
        &autotune::Autotune_bwd_postprocess__A0__F41,
        &autotune::Autotune_bwd_postprocess__A0__F42,
        &autotune::Autotune_bwd_postprocess__A0__F43,
        &autotune::Autotune_bwd_postprocess__A0__F44,
        &autotune::Autotune_bwd_postprocess__A0__F45,
        &autotune::Autotune_bwd_postprocess__A0__F46,
        &autotune::Autotune_bwd_postprocess__A0__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A1__F0,
        &autotune::Autotune_bwd_postprocess__A1__F1,
        &autotune::Autotune_bwd_postprocess__A1__F2,
        &autotune::Autotune_bwd_postprocess__A1__F3,
        &autotune::Autotune_bwd_postprocess__A1__F4,
        &autotune::Autotune_bwd_postprocess__A1__F5,
        &autotune::Autotune_bwd_postprocess__A1__F6,
        &autotune::Autotune_bwd_postprocess__A1__F7,
        &autotune::Autotune_bwd_postprocess__A1__F8,
        &autotune::Autotune_bwd_postprocess__A1__F9,
        &autotune::Autotune_bwd_postprocess__A1__F10,
        &autotune::Autotune_bwd_postprocess__A1__F11,
        &autotune::Autotune_bwd_postprocess__A1__F12,
        &autotune::Autotune_bwd_postprocess__A1__F13,
        &autotune::Autotune_bwd_postprocess__A1__F14,
        &autotune::Autotune_bwd_postprocess__A1__F15,
        &autotune::Autotune_bwd_postprocess__A1__F16,
        &autotune::Autotune_bwd_postprocess__A1__F17,
        &autotune::Autotune_bwd_postprocess__A1__F18,
        &autotune::Autotune_bwd_postprocess__A1__F19,
        &autotune::Autotune_bwd_postprocess__A1__F20,
        &autotune::Autotune_bwd_postprocess__A1__F21,
        &autotune::Autotune_bwd_postprocess__A1__F22,
        &autotune::Autotune_bwd_postprocess__A1__F23,
        &autotune::Autotune_bwd_postprocess__A1__F24,
        &autotune::Autotune_bwd_postprocess__A1__F25,
        &autotune::Autotune_bwd_postprocess__A1__F26,
        &autotune::Autotune_bwd_postprocess__A1__F27,
        &autotune::Autotune_bwd_postprocess__A1__F28,
        &autotune::Autotune_bwd_postprocess__A1__F29,
        &autotune::Autotune_bwd_postprocess__A1__F30,
        &autotune::Autotune_bwd_postprocess__A1__F31,
        &autotune::Autotune_bwd_postprocess__A1__F32,
        &autotune::Autotune_bwd_postprocess__A1__F33,
        &autotune::Autotune_bwd_postprocess__A1__F34,
        &autotune::Autotune_bwd_postprocess__A1__F35,
        &autotune::Autotune_bwd_postprocess__A1__F36,
        &autotune::Autotune_bwd_postprocess__A1__F37,
        &autotune::Autotune_bwd_postprocess__A1__F38,
        &autotune::Autotune_bwd_postprocess__A1__F39,
        &autotune::Autotune_bwd_postprocess__A1__F40,
        &autotune::Autotune_bwd_postprocess__A1__F41,
        &autotune::Autotune_bwd_postprocess__A1__F42,
        &autotune::Autotune_bwd_postprocess__A1__F43,
        &autotune::Autotune_bwd_postprocess__A1__F44,
        &autotune::Autotune_bwd_postprocess__A1__F45,
        &autotune::Autotune_bwd_postprocess__A1__F46,
        &autotune::Autotune_bwd_postprocess__A1__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A2__F0,
        &autotune::Autotune_bwd_postprocess__A2__F1,
        &autotune::Autotune_bwd_postprocess__A2__F2,
        &autotune::Autotune_bwd_postprocess__A2__F3,
        &autotune::Autotune_bwd_postprocess__A2__F4,
        &autotune::Autotune_bwd_postprocess__A2__F5,
        &autotune::Autotune_bwd_postprocess__A2__F6,
        &autotune::Autotune_bwd_postprocess__A2__F7,
        &autotune::Autotune_bwd_postprocess__A2__F8,
        &autotune::Autotune_bwd_postprocess__A2__F9,
        &autotune::Autotune_bwd_postprocess__A2__F10,
        &autotune::Autotune_bwd_postprocess__A2__F11,
        &autotune::Autotune_bwd_postprocess__A2__F12,
        &autotune::Autotune_bwd_postprocess__A2__F13,
        &autotune::Autotune_bwd_postprocess__A2__F14,
        &autotune::Autotune_bwd_postprocess__A2__F15,
        &autotune::Autotune_bwd_postprocess__A2__F16,
        &autotune::Autotune_bwd_postprocess__A2__F17,
        &autotune::Autotune_bwd_postprocess__A2__F18,
        &autotune::Autotune_bwd_postprocess__A2__F19,
        &autotune::Autotune_bwd_postprocess__A2__F20,
        &autotune::Autotune_bwd_postprocess__A2__F21,
        &autotune::Autotune_bwd_postprocess__A2__F22,
        &autotune::Autotune_bwd_postprocess__A2__F23,
        &autotune::Autotune_bwd_postprocess__A2__F24,
        &autotune::Autotune_bwd_postprocess__A2__F25,
        &autotune::Autotune_bwd_postprocess__A2__F26,
        &autotune::Autotune_bwd_postprocess__A2__F27,
        &autotune::Autotune_bwd_postprocess__A2__F28,
        &autotune::Autotune_bwd_postprocess__A2__F29,
        &autotune::Autotune_bwd_postprocess__A2__F30,
        &autotune::Autotune_bwd_postprocess__A2__F31,
        &autotune::Autotune_bwd_postprocess__A2__F32,
        &autotune::Autotune_bwd_postprocess__A2__F33,
        &autotune::Autotune_bwd_postprocess__A2__F34,
        &autotune::Autotune_bwd_postprocess__A2__F35,
        &autotune::Autotune_bwd_postprocess__A2__F36,
        &autotune::Autotune_bwd_postprocess__A2__F37,
        &autotune::Autotune_bwd_postprocess__A2__F38,
        &autotune::Autotune_bwd_postprocess__A2__F39,
        &autotune::Autotune_bwd_postprocess__A2__F40,
        &autotune::Autotune_bwd_postprocess__A2__F41,
        &autotune::Autotune_bwd_postprocess__A2__F42,
        &autotune::Autotune_bwd_postprocess__A2__F43,
        &autotune::Autotune_bwd_postprocess__A2__F44,
        &autotune::Autotune_bwd_postprocess__A2__F45,
        &autotune::Autotune_bwd_postprocess__A2__F46,
        &autotune::Autotune_bwd_postprocess__A2__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A3__F0,
        &autotune::Autotune_bwd_postprocess__A3__F1,
        &autotune::Autotune_bwd_postprocess__A3__F2,
        &autotune::Autotune_bwd_postprocess__A3__F3,
        &autotune::Autotune_bwd_postprocess__A3__F4,
        &autotune::Autotune_bwd_postprocess__A3__F5,
        &autotune::Autotune_bwd_postprocess__A3__F6,
        &autotune::Autotune_bwd_postprocess__A3__F7,
        &autotune::Autotune_bwd_postprocess__A3__F8,
        &autotune::Autotune_bwd_postprocess__A3__F9,
        &autotune::Autotune_bwd_postprocess__A3__F10,
        &autotune::Autotune_bwd_postprocess__A3__F11,
        &autotune::Autotune_bwd_postprocess__A3__F12,
        &autotune::Autotune_bwd_postprocess__A3__F13,
        &autotune::Autotune_bwd_postprocess__A3__F14,
        &autotune::Autotune_bwd_postprocess__A3__F15,
        &autotune::Autotune_bwd_postprocess__A3__F16,
        &autotune::Autotune_bwd_postprocess__A3__F17,
        &autotune::Autotune_bwd_postprocess__A3__F18,
        &autotune::Autotune_bwd_postprocess__A3__F19,
        &autotune::Autotune_bwd_postprocess__A3__F20,
        &autotune::Autotune_bwd_postprocess__A3__F21,
        &autotune::Autotune_bwd_postprocess__A3__F22,
        &autotune::Autotune_bwd_postprocess__A3__F23,
        &autotune::Autotune_bwd_postprocess__A3__F24,
        &autotune::Autotune_bwd_postprocess__A3__F25,
        &autotune::Autotune_bwd_postprocess__A3__F26,
        &autotune::Autotune_bwd_postprocess__A3__F27,
        &autotune::Autotune_bwd_postprocess__A3__F28,
        &autotune::Autotune_bwd_postprocess__A3__F29,
        &autotune::Autotune_bwd_postprocess__A3__F30,
        &autotune::Autotune_bwd_postprocess__A3__F31,
        &autotune::Autotune_bwd_postprocess__A3__F32,
        &autotune::Autotune_bwd_postprocess__A3__F33,
        &autotune::Autotune_bwd_postprocess__A3__F34,
        &autotune::Autotune_bwd_postprocess__A3__F35,
        &autotune::Autotune_bwd_postprocess__A3__F36,
        &autotune::Autotune_bwd_postprocess__A3__F37,
        &autotune::Autotune_bwd_postprocess__A3__F38,
        &autotune::Autotune_bwd_postprocess__A3__F39,
        &autotune::Autotune_bwd_postprocess__A3__F40,
        &autotune::Autotune_bwd_postprocess__A3__F41,
        &autotune::Autotune_bwd_postprocess__A3__F42,
        &autotune::Autotune_bwd_postprocess__A3__F43,
        &autotune::Autotune_bwd_postprocess__A3__F44,
        &autotune::Autotune_bwd_postprocess__A3__F45,
        &autotune::Autotune_bwd_postprocess__A3__F46,
        &autotune::Autotune_bwd_postprocess__A3__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A4__F0,
        &autotune::Autotune_bwd_postprocess__A4__F1,
        &autotune::Autotune_bwd_postprocess__A4__F2,
        &autotune::Autotune_bwd_postprocess__A4__F3,
        &autotune::Autotune_bwd_postprocess__A4__F4,
        &autotune::Autotune_bwd_postprocess__A4__F5,
        &autotune::Autotune_bwd_postprocess__A4__F6,
        &autotune::Autotune_bwd_postprocess__A4__F7,
        &autotune::Autotune_bwd_postprocess__A4__F8,
        &autotune::Autotune_bwd_postprocess__A4__F9,
        &autotune::Autotune_bwd_postprocess__A4__F10,
        &autotune::Autotune_bwd_postprocess__A4__F11,
        &autotune::Autotune_bwd_postprocess__A4__F12,
        &autotune::Autotune_bwd_postprocess__A4__F13,
        &autotune::Autotune_bwd_postprocess__A4__F14,
        &autotune::Autotune_bwd_postprocess__A4__F15,
        &autotune::Autotune_bwd_postprocess__A4__F16,
        &autotune::Autotune_bwd_postprocess__A4__F17,
        &autotune::Autotune_bwd_postprocess__A4__F18,
        &autotune::Autotune_bwd_postprocess__A4__F19,
        &autotune::Autotune_bwd_postprocess__A4__F20,
        &autotune::Autotune_bwd_postprocess__A4__F21,
        &autotune::Autotune_bwd_postprocess__A4__F22,
        &autotune::Autotune_bwd_postprocess__A4__F23,
        &autotune::Autotune_bwd_postprocess__A4__F24,
        &autotune::Autotune_bwd_postprocess__A4__F25,
        &autotune::Autotune_bwd_postprocess__A4__F26,
        &autotune::Autotune_bwd_postprocess__A4__F27,
        &autotune::Autotune_bwd_postprocess__A4__F28,
        &autotune::Autotune_bwd_postprocess__A4__F29,
        &autotune::Autotune_bwd_postprocess__A4__F30,
        &autotune::Autotune_bwd_postprocess__A4__F31,
        &autotune::Autotune_bwd_postprocess__A4__F32,
        &autotune::Autotune_bwd_postprocess__A4__F33,
        &autotune::Autotune_bwd_postprocess__A4__F34,
        &autotune::Autotune_bwd_postprocess__A4__F35,
        &autotune::Autotune_bwd_postprocess__A4__F36,
        &autotune::Autotune_bwd_postprocess__A4__F37,
        &autotune::Autotune_bwd_postprocess__A4__F38,
        &autotune::Autotune_bwd_postprocess__A4__F39,
        &autotune::Autotune_bwd_postprocess__A4__F40,
        &autotune::Autotune_bwd_postprocess__A4__F41,
        &autotune::Autotune_bwd_postprocess__A4__F42,
        &autotune::Autotune_bwd_postprocess__A4__F43,
        &autotune::Autotune_bwd_postprocess__A4__F44,
        &autotune::Autotune_bwd_postprocess__A4__F45,
        &autotune::Autotune_bwd_postprocess__A4__F46,
        &autotune::Autotune_bwd_postprocess__A4__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A5__F0,
        &autotune::Autotune_bwd_postprocess__A5__F1,
        &autotune::Autotune_bwd_postprocess__A5__F2,
        &autotune::Autotune_bwd_postprocess__A5__F3,
        &autotune::Autotune_bwd_postprocess__A5__F4,
        &autotune::Autotune_bwd_postprocess__A5__F5,
        &autotune::Autotune_bwd_postprocess__A5__F6,
        &autotune::Autotune_bwd_postprocess__A5__F7,
        &autotune::Autotune_bwd_postprocess__A5__F8,
        &autotune::Autotune_bwd_postprocess__A5__F9,
        &autotune::Autotune_bwd_postprocess__A5__F10,
        &autotune::Autotune_bwd_postprocess__A5__F11,
        &autotune::Autotune_bwd_postprocess__A5__F12,
        &autotune::Autotune_bwd_postprocess__A5__F13,
        &autotune::Autotune_bwd_postprocess__A5__F14,
        &autotune::Autotune_bwd_postprocess__A5__F15,
        &autotune::Autotune_bwd_postprocess__A5__F16,
        &autotune::Autotune_bwd_postprocess__A5__F17,
        &autotune::Autotune_bwd_postprocess__A5__F18,
        &autotune::Autotune_bwd_postprocess__A5__F19,
        &autotune::Autotune_bwd_postprocess__A5__F20,
        &autotune::Autotune_bwd_postprocess__A5__F21,
        &autotune::Autotune_bwd_postprocess__A5__F22,
        &autotune::Autotune_bwd_postprocess__A5__F23,
        &autotune::Autotune_bwd_postprocess__A5__F24,
        &autotune::Autotune_bwd_postprocess__A5__F25,
        &autotune::Autotune_bwd_postprocess__A5__F26,
        &autotune::Autotune_bwd_postprocess__A5__F27,
        &autotune::Autotune_bwd_postprocess__A5__F28,
        &autotune::Autotune_bwd_postprocess__A5__F29,
        &autotune::Autotune_bwd_postprocess__A5__F30,
        &autotune::Autotune_bwd_postprocess__A5__F31,
        &autotune::Autotune_bwd_postprocess__A5__F32,
        &autotune::Autotune_bwd_postprocess__A5__F33,
        &autotune::Autotune_bwd_postprocess__A5__F34,
        &autotune::Autotune_bwd_postprocess__A5__F35,
        &autotune::Autotune_bwd_postprocess__A5__F36,
        &autotune::Autotune_bwd_postprocess__A5__F37,
        &autotune::Autotune_bwd_postprocess__A5__F38,
        &autotune::Autotune_bwd_postprocess__A5__F39,
        &autotune::Autotune_bwd_postprocess__A5__F40,
        &autotune::Autotune_bwd_postprocess__A5__F41,
        &autotune::Autotune_bwd_postprocess__A5__F42,
        &autotune::Autotune_bwd_postprocess__A5__F43,
        &autotune::Autotune_bwd_postprocess__A5__F44,
        &autotune::Autotune_bwd_postprocess__A5__F45,
        &autotune::Autotune_bwd_postprocess__A5__F46,
        &autotune::Autotune_bwd_postprocess__A5__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A6__F0,
        &autotune::Autotune_bwd_postprocess__A6__F1,
        &autotune::Autotune_bwd_postprocess__A6__F2,
        &autotune::Autotune_bwd_postprocess__A6__F3,
        &autotune::Autotune_bwd_postprocess__A6__F4,
        &autotune::Autotune_bwd_postprocess__A6__F5,
        &autotune::Autotune_bwd_postprocess__A6__F6,
        &autotune::Autotune_bwd_postprocess__A6__F7,
        &autotune::Autotune_bwd_postprocess__A6__F8,
        &autotune::Autotune_bwd_postprocess__A6__F9,
        &autotune::Autotune_bwd_postprocess__A6__F10,
        &autotune::Autotune_bwd_postprocess__A6__F11,
        &autotune::Autotune_bwd_postprocess__A6__F12,
        &autotune::Autotune_bwd_postprocess__A6__F13,
        &autotune::Autotune_bwd_postprocess__A6__F14,
        &autotune::Autotune_bwd_postprocess__A6__F15,
        &autotune::Autotune_bwd_postprocess__A6__F16,
        &autotune::Autotune_bwd_postprocess__A6__F17,
        &autotune::Autotune_bwd_postprocess__A6__F18,
        &autotune::Autotune_bwd_postprocess__A6__F19,
        &autotune::Autotune_bwd_postprocess__A6__F20,
        &autotune::Autotune_bwd_postprocess__A6__F21,
        &autotune::Autotune_bwd_postprocess__A6__F22,
        &autotune::Autotune_bwd_postprocess__A6__F23,
        &autotune::Autotune_bwd_postprocess__A6__F24,
        &autotune::Autotune_bwd_postprocess__A6__F25,
        &autotune::Autotune_bwd_postprocess__A6__F26,
        &autotune::Autotune_bwd_postprocess__A6__F27,
        &autotune::Autotune_bwd_postprocess__A6__F28,
        &autotune::Autotune_bwd_postprocess__A6__F29,
        &autotune::Autotune_bwd_postprocess__A6__F30,
        &autotune::Autotune_bwd_postprocess__A6__F31,
        &autotune::Autotune_bwd_postprocess__A6__F32,
        &autotune::Autotune_bwd_postprocess__A6__F33,
        &autotune::Autotune_bwd_postprocess__A6__F34,
        &autotune::Autotune_bwd_postprocess__A6__F35,
        &autotune::Autotune_bwd_postprocess__A6__F36,
        &autotune::Autotune_bwd_postprocess__A6__F37,
        &autotune::Autotune_bwd_postprocess__A6__F38,
        &autotune::Autotune_bwd_postprocess__A6__F39,
        &autotune::Autotune_bwd_postprocess__A6__F40,
        &autotune::Autotune_bwd_postprocess__A6__F41,
        &autotune::Autotune_bwd_postprocess__A6__F42,
        &autotune::Autotune_bwd_postprocess__A6__F43,
        &autotune::Autotune_bwd_postprocess__A6__F44,
        &autotune::Autotune_bwd_postprocess__A6__F45,
        &autotune::Autotune_bwd_postprocess__A6__F46,
        &autotune::Autotune_bwd_postprocess__A6__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A7__F0,
        &autotune::Autotune_bwd_postprocess__A7__F1,
        &autotune::Autotune_bwd_postprocess__A7__F2,
        &autotune::Autotune_bwd_postprocess__A7__F3,
        &autotune::Autotune_bwd_postprocess__A7__F4,
        &autotune::Autotune_bwd_postprocess__A7__F5,
        &autotune::Autotune_bwd_postprocess__A7__F6,
        &autotune::Autotune_bwd_postprocess__A7__F7,
        &autotune::Autotune_bwd_postprocess__A7__F8,
        &autotune::Autotune_bwd_postprocess__A7__F9,
        &autotune::Autotune_bwd_postprocess__A7__F10,
        &autotune::Autotune_bwd_postprocess__A7__F11,
        &autotune::Autotune_bwd_postprocess__A7__F12,
        &autotune::Autotune_bwd_postprocess__A7__F13,
        &autotune::Autotune_bwd_postprocess__A7__F14,
        &autotune::Autotune_bwd_postprocess__A7__F15,
        &autotune::Autotune_bwd_postprocess__A7__F16,
        &autotune::Autotune_bwd_postprocess__A7__F17,
        &autotune::Autotune_bwd_postprocess__A7__F18,
        &autotune::Autotune_bwd_postprocess__A7__F19,
        &autotune::Autotune_bwd_postprocess__A7__F20,
        &autotune::Autotune_bwd_postprocess__A7__F21,
        &autotune::Autotune_bwd_postprocess__A7__F22,
        &autotune::Autotune_bwd_postprocess__A7__F23,
        &autotune::Autotune_bwd_postprocess__A7__F24,
        &autotune::Autotune_bwd_postprocess__A7__F25,
        &autotune::Autotune_bwd_postprocess__A7__F26,
        &autotune::Autotune_bwd_postprocess__A7__F27,
        &autotune::Autotune_bwd_postprocess__A7__F28,
        &autotune::Autotune_bwd_postprocess__A7__F29,
        &autotune::Autotune_bwd_postprocess__A7__F30,
        &autotune::Autotune_bwd_postprocess__A7__F31,
        &autotune::Autotune_bwd_postprocess__A7__F32,
        &autotune::Autotune_bwd_postprocess__A7__F33,
        &autotune::Autotune_bwd_postprocess__A7__F34,
        &autotune::Autotune_bwd_postprocess__A7__F35,
        &autotune::Autotune_bwd_postprocess__A7__F36,
        &autotune::Autotune_bwd_postprocess__A7__F37,
        &autotune::Autotune_bwd_postprocess__A7__F38,
        &autotune::Autotune_bwd_postprocess__A7__F39,
        &autotune::Autotune_bwd_postprocess__A7__F40,
        &autotune::Autotune_bwd_postprocess__A7__F41,
        &autotune::Autotune_bwd_postprocess__A7__F42,
        &autotune::Autotune_bwd_postprocess__A7__F43,
        &autotune::Autotune_bwd_postprocess__A7__F44,
        &autotune::Autotune_bwd_postprocess__A7__F45,
        &autotune::Autotune_bwd_postprocess__A7__F46,
        &autotune::Autotune_bwd_postprocess__A7__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A8__F0,
        &autotune::Autotune_bwd_postprocess__A8__F1,
        &autotune::Autotune_bwd_postprocess__A8__F2,
        &autotune::Autotune_bwd_postprocess__A8__F3,
        &autotune::Autotune_bwd_postprocess__A8__F4,
        &autotune::Autotune_bwd_postprocess__A8__F5,
        &autotune::Autotune_bwd_postprocess__A8__F6,
        &autotune::Autotune_bwd_postprocess__A8__F7,
        &autotune::Autotune_bwd_postprocess__A8__F8,
        &autotune::Autotune_bwd_postprocess__A8__F9,
        &autotune::Autotune_bwd_postprocess__A8__F10,
        &autotune::Autotune_bwd_postprocess__A8__F11,
        &autotune::Autotune_bwd_postprocess__A8__F12,
        &autotune::Autotune_bwd_postprocess__A8__F13,
        &autotune::Autotune_bwd_postprocess__A8__F14,
        &autotune::Autotune_bwd_postprocess__A8__F15,
        &autotune::Autotune_bwd_postprocess__A8__F16,
        &autotune::Autotune_bwd_postprocess__A8__F17,
        &autotune::Autotune_bwd_postprocess__A8__F18,
        &autotune::Autotune_bwd_postprocess__A8__F19,
        &autotune::Autotune_bwd_postprocess__A8__F20,
        &autotune::Autotune_bwd_postprocess__A8__F21,
        &autotune::Autotune_bwd_postprocess__A8__F22,
        &autotune::Autotune_bwd_postprocess__A8__F23,
        &autotune::Autotune_bwd_postprocess__A8__F24,
        &autotune::Autotune_bwd_postprocess__A8__F25,
        &autotune::Autotune_bwd_postprocess__A8__F26,
        &autotune::Autotune_bwd_postprocess__A8__F27,
        &autotune::Autotune_bwd_postprocess__A8__F28,
        &autotune::Autotune_bwd_postprocess__A8__F29,
        &autotune::Autotune_bwd_postprocess__A8__F30,
        &autotune::Autotune_bwd_postprocess__A8__F31,
        &autotune::Autotune_bwd_postprocess__A8__F32,
        &autotune::Autotune_bwd_postprocess__A8__F33,
        &autotune::Autotune_bwd_postprocess__A8__F34,
        &autotune::Autotune_bwd_postprocess__A8__F35,
        &autotune::Autotune_bwd_postprocess__A8__F36,
        &autotune::Autotune_bwd_postprocess__A8__F37,
        &autotune::Autotune_bwd_postprocess__A8__F38,
        &autotune::Autotune_bwd_postprocess__A8__F39,
        &autotune::Autotune_bwd_postprocess__A8__F40,
        &autotune::Autotune_bwd_postprocess__A8__F41,
        &autotune::Autotune_bwd_postprocess__A8__F42,
        &autotune::Autotune_bwd_postprocess__A8__F43,
        &autotune::Autotune_bwd_postprocess__A8__F44,
        &autotune::Autotune_bwd_postprocess__A8__F45,
        &autotune::Autotune_bwd_postprocess__A8__F46,
        &autotune::Autotune_bwd_postprocess__A8__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A9__F0,
        &autotune::Autotune_bwd_postprocess__A9__F1,
        &autotune::Autotune_bwd_postprocess__A9__F2,
        &autotune::Autotune_bwd_postprocess__A9__F3,
        &autotune::Autotune_bwd_postprocess__A9__F4,
        &autotune::Autotune_bwd_postprocess__A9__F5,
        &autotune::Autotune_bwd_postprocess__A9__F6,
        &autotune::Autotune_bwd_postprocess__A9__F7,
        &autotune::Autotune_bwd_postprocess__A9__F8,
        &autotune::Autotune_bwd_postprocess__A9__F9,
        &autotune::Autotune_bwd_postprocess__A9__F10,
        &autotune::Autotune_bwd_postprocess__A9__F11,
        &autotune::Autotune_bwd_postprocess__A9__F12,
        &autotune::Autotune_bwd_postprocess__A9__F13,
        &autotune::Autotune_bwd_postprocess__A9__F14,
        &autotune::Autotune_bwd_postprocess__A9__F15,
        &autotune::Autotune_bwd_postprocess__A9__F16,
        &autotune::Autotune_bwd_postprocess__A9__F17,
        &autotune::Autotune_bwd_postprocess__A9__F18,
        &autotune::Autotune_bwd_postprocess__A9__F19,
        &autotune::Autotune_bwd_postprocess__A9__F20,
        &autotune::Autotune_bwd_postprocess__A9__F21,
        &autotune::Autotune_bwd_postprocess__A9__F22,
        &autotune::Autotune_bwd_postprocess__A9__F23,
        &autotune::Autotune_bwd_postprocess__A9__F24,
        &autotune::Autotune_bwd_postprocess__A9__F25,
        &autotune::Autotune_bwd_postprocess__A9__F26,
        &autotune::Autotune_bwd_postprocess__A9__F27,
        &autotune::Autotune_bwd_postprocess__A9__F28,
        &autotune::Autotune_bwd_postprocess__A9__F29,
        &autotune::Autotune_bwd_postprocess__A9__F30,
        &autotune::Autotune_bwd_postprocess__A9__F31,
        &autotune::Autotune_bwd_postprocess__A9__F32,
        &autotune::Autotune_bwd_postprocess__A9__F33,
        &autotune::Autotune_bwd_postprocess__A9__F34,
        &autotune::Autotune_bwd_postprocess__A9__F35,
        &autotune::Autotune_bwd_postprocess__A9__F36,
        &autotune::Autotune_bwd_postprocess__A9__F37,
        &autotune::Autotune_bwd_postprocess__A9__F38,
        &autotune::Autotune_bwd_postprocess__A9__F39,
        &autotune::Autotune_bwd_postprocess__A9__F40,
        &autotune::Autotune_bwd_postprocess__A9__F41,
        &autotune::Autotune_bwd_postprocess__A9__F42,
        &autotune::Autotune_bwd_postprocess__A9__F43,
        &autotune::Autotune_bwd_postprocess__A9__F44,
        &autotune::Autotune_bwd_postprocess__A9__F45,
        &autotune::Autotune_bwd_postprocess__A9__F46,
        &autotune::Autotune_bwd_postprocess__A9__F47,
    },
    {
        &autotune::Autotune_bwd_postprocess__A10__F0,
        &autotune::Autotune_bwd_postprocess__A10__F1,
        &autotune::Autotune_bwd_postprocess__A10__F2,
        &autotune::Autotune_bwd_postprocess__A10__F3,
        &autotune::Autotune_bwd_postprocess__A10__F4,
        &autotune::Autotune_bwd_postprocess__A10__F5,
        &autotune::Autotune_bwd_postprocess__A10__F6,
        &autotune::Autotune_bwd_postprocess__A10__F7,
        &autotune::Autotune_bwd_postprocess__A10__F8,
        &autotune::Autotune_bwd_postprocess__A10__F9,
        &autotune::Autotune_bwd_postprocess__A10__F10,
        &autotune::Autotune_bwd_postprocess__A10__F11,
        &autotune::Autotune_bwd_postprocess__A10__F12,
        &autotune::Autotune_bwd_postprocess__A10__F13,
        &autotune::Autotune_bwd_postprocess__A10__F14,
        &autotune::Autotune_bwd_postprocess__A10__F15,
        &autotune::Autotune_bwd_postprocess__A10__F16,
        &autotune::Autotune_bwd_postprocess__A10__F17,
        &autotune::Autotune_bwd_postprocess__A10__F18,
        &autotune::Autotune_bwd_postprocess__A10__F19,
        &autotune::Autotune_bwd_postprocess__A10__F20,
        &autotune::Autotune_bwd_postprocess__A10__F21,
        &autotune::Autotune_bwd_postprocess__A10__F22,
        &autotune::Autotune_bwd_postprocess__A10__F23,
        &autotune::Autotune_bwd_postprocess__A10__F24,
        &autotune::Autotune_bwd_postprocess__A10__F25,
        &autotune::Autotune_bwd_postprocess__A10__F26,
        &autotune::Autotune_bwd_postprocess__A10__F27,
        &autotune::Autotune_bwd_postprocess__A10__F28,
        &autotune::Autotune_bwd_postprocess__A10__F29,
        &autotune::Autotune_bwd_postprocess__A10__F30,
        &autotune::Autotune_bwd_postprocess__A10__F31,
        &autotune::Autotune_bwd_postprocess__A10__F32,
        &autotune::Autotune_bwd_postprocess__A10__F33,
        &autotune::Autotune_bwd_postprocess__A10__F34,
        &autotune::Autotune_bwd_postprocess__A10__F35,
        &autotune::Autotune_bwd_postprocess__A10__F36,
        &autotune::Autotune_bwd_postprocess__A10__F37,
        &autotune::Autotune_bwd_postprocess__A10__F38,
        &autotune::Autotune_bwd_postprocess__A10__F39,
        &autotune::Autotune_bwd_postprocess__A10__F40,
        &autotune::Autotune_bwd_postprocess__A10__F41,
        &autotune::Autotune_bwd_postprocess__A10__F42,
        &autotune::Autotune_bwd_postprocess__A10__F43,
        &autotune::Autotune_bwd_postprocess__A10__F44,
        &autotune::Autotune_bwd_postprocess__A10__F45,
        &autotune::Autotune_bwd_postprocess__A10__F46,
        &autotune::Autotune_bwd_postprocess__A10__F47,
    },
};

}

// vim: set fileencoding=utf-8

