// Copyright 2018-2025 RnD Center "ELVEES", JSC

/*! \file
 *  \brief Тестирование функции mul16i16f
 *  \author Фролов Андрей
 */

#include "tests.h"

int main() {
  int failed_count = 0;
  int size[TEST_COUNT] = {512, 1024, 2048, 8192, 16384};
  int print = 0;

  print_table_header();
  float rel_eps = 0.002;

#ifndef LOCAL_MEM
  void* src0 = memalign(64, size[TEST_COUNT - 1] * sizeof(int32_t));
  void* src1 = memalign(64, size[TEST_COUNT - 1] * sizeof(int32_t));
  void* dst_opt = memalign(64, size[TEST_COUNT - 1] * sizeof(int32_t));
  void* dst_ref = memalign(64, size[TEST_COUNT - 1] * sizeof(int32_t));
#else
#ifdef BARE_METAL
  void* src0 = &__local_mem;
#else
  disable_l2_cache();
  void* src0 = &xyram_data;
#endif
  void* src1 = src0 + size[TEST_COUNT - 1] * sizeof(int32_t);
  void* dst_opt = src1 + size[TEST_COUNT - 1] * sizeof(int32_t);
  void* dst_ref = dst_opt + size[TEST_COUNT - 1] * sizeof(int32_t);
#endif

  create_vector_s32(src0, size[TEST_COUNT - 1], 0);
  create_vector_s32(src1, size[TEST_COUNT - 1], 0);

  for (int i = 0; i < TEST_COUNT; ++i) {
    int32_t input_bytes = size[i] * sizeof(int32_t) * 2;
    int32_t ti_tics = 3 * size[i] / 4 + 26;
    printf("| mul16i16f              | %14d |", size[i]);
    failed_count += test_mul16i16f((int32_t*)src0, (int32_t*)src1, (int32_t*)dst_opt, (int32_t*)dst_ref, size[i],
                                   print, rel_eps, input_bytes, ti_tics);
  }

#ifndef LOCAL_MEM
  free(src0);
  free(src1);
  free(dst_opt);
  free(dst_ref);
#else
#ifndef BARE_METAL
  enable_l2_cache(L2_CACHE_SIZE);
#endif
#endif

  return failed_count;
}

int test_mul16i16f(int32_t* src0, int32_t* src1, int32_t* dst_opt, int32_t* dst_ref, int size, int print,
                   float rel_eps, int32_t input_bytes, int32_t ti_tics) {
  int ret = 0;

  uint32_t tic_count[2], instruction_count[2];
  uint32_t ref_tic_count[2], ref_instruction_count[2];

  count_tics(ref_tic_count, ref_instruction_count);
  ref_mul16i16f(src0, src1, dst_ref, size);
  count_tics(&ref_tic_count[1], &ref_instruction_count[1]);

  count_tics(tic_count, instruction_count);
  mul16i16f(src0, src1, dst_opt, size);
  count_tics(&tic_count[1], &instruction_count[1]);

  if (print) {
    printf("vect1:");
    print_vector_s32(src0, size);
    printf("vect2:");
    print_vector_s32(src1, size);
    printf("dsp_res:");
    print_vector_s32(dst_opt, size);
    printf("ref_res:");
    print_vector_s32(dst_ref, size);
  }

  ret = compare_16i16f_eps(dst_ref, dst_opt, size, rel_eps);

  print_performance(ref_tic_count, ref_instruction_count, tic_count, instruction_count, input_bytes, ti_tics);

  if (ret == 0)
    printf(" passed |\n");
  else
    printf(" failed |\n");

  return ret;
}
