// Copyright 2025 RnD Center "ELVEES", JSC

#include "tests_tile_segmentation.hpp"

template <typename Ret_Type, typename Type, class create_func, class ref_func, class run_calc_ptr>
bool test_dotprod_sqr(Type* src0, Type* src1, create_func create_vector, ref_func reference, run_calc_ptr run_calc,
                      int size, int* localmem) {
  create_vector(src0, size, -3, 3);
  create_vector(src1, size, -3, 3);

  Ret_Type G = 4;
  Ret_Type ref_sqr_res = 0;
  Ret_Type opt_sqr_res = 0;

  FLUSH_ALL_CACHES();
  uint32_t tic_count[2], instruction_count[2];
  count_tics(tic_count, instruction_count);
  Ret_Type ref_result = reference(G, src0, src1, &ref_sqr_res, size);
  count_tics(&tic_count[1], &instruction_count[1]);

  std::cout << "Ref func result (size = " << size << "): tic = " << tic_count[1] - tic_count[0]
            << " instr = " << instruction_count[1] - instruction_count[0] << std::endl;

  TileSegConfig config;
  CreateTileSegConfigDotprodSqr(G, src0, src1, &opt_sqr_res, size, &config, (uint16_t*)localmem);

  FLUSH_ALL_CACHES();

  count_tics(tic_count, instruction_count);
  Ret_Type opt_result = run_calc(&config);
  count_tics(&tic_count[1], &instruction_count[1]);

  std::cout << "Opt func result (size = " << size << "): tic = " << tic_count[1] - tic_count[0]
            << " instr = " << instruction_count[1] - instruction_count[0] << std::endl;

  int ret = ((ref_result != opt_result) || (ref_sqr_res != opt_sqr_res));

  return ret;
}

int main() {
  disable_l2_cache();

  void* src0 = memalign(64, SIZE * sizeof(int64_t));
  void* src1 = memalign(64, SIZE * sizeof(int64_t));

  int ret = 0;
  int test_status = 0;

  for (int i = 1; i <= SIZE; i *= 2) {
    std::cout << "dotprod_sqr_s16" << std::endl;
#ifdef USE_REF_VER
    ret = test_dotprod_sqr<int32_t>(static_cast<int16_t*>(src0), static_cast<int16_t*>(src1), data_generator<int16_t>,
                                    ref_dotp_sqr, RunCalculationDotprodSqr16, i, &__local_mem);
#else
    ret = test_dotprod_sqr<int32_t>(static_cast<int16_t*>(src0), static_cast<int16_t*>(src1), data_generator<int16_t>,
                                    dotp_sqr, RunCalculationDotprodSqr16, i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "dotprod_sqr16 error!\n";

    std::cout << "dotprod_sqr_s32" << std::endl;
#ifdef USE_REF_VER
    ret = test_dotprod_sqr<int64_t>(static_cast<int32_t*>(src0), static_cast<int32_t*>(src1), data_generator<int32_t>,
                                    ref_dotp_sqr32, RunCalculationDotprodSqr32, i, &__local_mem);
#else
    ret = test_dotprod_sqr<int64_t>(static_cast<int32_t*>(src0), static_cast<int32_t*>(src1), data_generator<int32_t>,
                                    dotp_sqr32, RunCalculationDotprodSqr32, i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "dotprod_sqr32 error!\n";

    std::cout << "dotprod_sqr_fl" << std::endl;
#ifdef USE_REF_VER
    ret = test_dotprod_sqr<float>(static_cast<float*>(src0), static_cast<float*>(src1), data_generator<float>,
                                  ref_dotp_sqr_fl, RunCalculationDotprodSqrFl, i, &__local_mem);
#else
    ret = test_dotprod_sqr<float>(static_cast<float*>(src0), static_cast<float*>(src1), data_generator<float>,
                                  dotp_sqr_fl, RunCalculationDotprodSqrFl, i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "dotprod_sqr_fl error!\n";

    std::cout << "dotprod_sqr_db" << std::endl;
#ifdef USE_REF_VER
    ret = test_dotprod_sqr<double>(static_cast<double*>(src0), static_cast<double*>(src1), data_generator<double>,
                                   ref_dotp_sqr_db, RunCalculationDotprodSqrDb, i, &__local_mem);
#else
    ret = test_dotprod_sqr<double>(static_cast<double*>(src0), static_cast<double*>(src1), data_generator<double>,
                                   dotp_sqr_db, RunCalculationDotprodSqrDb, i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "dotprod_sqr_db error!\n";

    if (!test_status)
      std::cout << "Test passed" << std::endl;
    else
      std::cout << "Test failed" << std::endl;
  }

  free(src0);
  free(src1);

  enable_l2_cache(L2_CACHE_SIZE);

  return test_status;
}
