// Copyright 2018-2025 RnD Center "ELVEES", JSC

/*! \file
 *  \brief Тестирование функции w_vec
 *  \author Фролов Андрей
 */

#include "tests.h"

int main() {
  int failed_count = 0;
  int size[TEST_COUNT] = {512, 1024, 2048, 8192, 16384};
  int print = 0;

  print_table_header();
  int16_t w[TEST_COUNT] = {1, 1, 2, -2, -1};

#ifndef LOCAL_MEM
  void* src0 = memalign(64, size[TEST_COUNT - 1] * sizeof(int64_t));
  void* src1 = memalign(64, size[TEST_COUNT - 1] * sizeof(int64_t));
  void* dst_ref = memalign(64, size[TEST_COUNT - 1] * sizeof(int64_t));
  void* dst_opt = memalign(64, size[TEST_COUNT - 1] * sizeof(int64_t));
#else
#ifdef BARE_METAL
  void* src0 = &__local_mem;
#else
  disable_l2_cache();
  void* src0 = &xyram_data;
#endif
  void* src1 = src0 + size[TEST_COUNT - 1] * sizeof(int64_t);
  void* dst_ref = src1 + size[TEST_COUNT - 1] * sizeof(int64_t);
  void* dst_opt = dst_ref + size[TEST_COUNT - 1] * sizeof(int64_t);
#endif

  create_vector_s16((int16_t*)src0, size[TEST_COUNT - 1], 0);
  create_vector_s16((int16_t*)src1, size[TEST_COUNT - 1], 0);
  for (int i = 0; i < TEST_COUNT; ++i) {
    int32_t input_bytes = size[i] * sizeof(int16_t) * 2;
    int32_t ti_tics = 3 * size[i] / 8 + 35;
    printf("| w_vec_short            | %14d |", size[i]);
    failed_count += test_w_vec((int16_t*)src0, (int16_t*)src1, (int16_t*)dst_ref, (int16_t*)dst_opt, size[i], print,
                               w[i], input_bytes, ti_tics);
  }

  create_vector_s32((int32_t*)src0, size[TEST_COUNT - 1], 0);
  create_vector_s32((int32_t*)src1, size[TEST_COUNT - 1], 0);
  for (int i = 0; i < TEST_COUNT; ++i) {
    int32_t input_bytes = size[i] * sizeof(int32_t) * 2;
    int32_t ti_tics = 0;
    printf("| w_vec_int              | %14d |", size[i]);
    failed_count += test_w_vec32((int32_t*)src0, (int32_t*)src1, (int32_t*)dst_ref, (int32_t*)dst_opt, size[i], print,
                                 (int32_t)w[i], input_bytes, ti_tics);
  }

  create_vector_float((float*)src0, size[TEST_COUNT - 1], 0);
  create_vector_float((float*)src1, size[TEST_COUNT - 1], 0);
  for (int i = 0; i < TEST_COUNT; ++i) {
    int32_t input_bytes = size[i] * sizeof(float) * 2;
    int32_t ti_tics = (int)(0.75 * size[i] + 27);
    printf("| w_vec_fl               | %14d |", size[i]);
    failed_count += test_w_vec_fl((float*)src0, (float*)src1, (float*)dst_ref, (float*)dst_opt, size[i], print,
                                  (float)w[i], input_bytes, ti_tics);
  }

  create_vector_double((double*)src0, size[TEST_COUNT - 1], 0);
  create_vector_double((double*)src1, size[TEST_COUNT - 1], 0);
  for (int i = 0; i < TEST_COUNT; ++i) {
    int32_t input_bytes = size[i] * sizeof(double) * 2;
    int32_t ti_tics = 0;
    printf("| w_vec_db               | %14d |", size[i]);
    failed_count += test_w_vec_db((double*)src0, (double*)src1, (double*)dst_ref, (double*)dst_opt, size[i], print,
                                  (double)w[i], input_bytes, ti_tics);
  }

#ifndef LOCAL_MEM
  free(src0);
  free(src1);
  free(dst_ref);
  free(dst_opt);
#else
#ifndef BARE_METAL
  enable_l2_cache(L2_CACHE_SIZE);
#endif
#endif

  return failed_count;
}

int test_w_vec(int16_t* src0, int16_t* src1, int16_t* dst_ref, int16_t* dst_opt, int size, int print, int16_t w,
               int32_t input_bytes, int32_t ti_tics) {
  int ret = 0;

  uint32_t tic_count[2], instruction_count[2];
  uint32_t ref_tic_count[2], ref_instruction_count[2];

  count_tics(ref_tic_count, ref_instruction_count);
  ref_w_vec(src0, src1, w, dst_ref, size);
  count_tics(&ref_tic_count[1], &ref_instruction_count[1]);

  count_tics(tic_count, instruction_count);
  w_vec(src0, src1, w, dst_opt, size);
  count_tics(&tic_count[1], &instruction_count[1]);

  if (print) {
    printf("vect1:");
    print_vector_s16(src0, size);
    printf("vect2:");
    print_vector_s16(src1, size);
    printf("w:");
    printf("%d\n", w);
    printf("dsp_res:");
    print_vector_s16(dst_opt, size);
    printf("ref_res:");
    print_vector_s16(dst_ref, size);
  }

  ret = compare_s16(dst_ref, dst_opt, size);

  print_performance(ref_tic_count, ref_instruction_count, tic_count, instruction_count, input_bytes, ti_tics);

  if (ret == 0)
    printf(" passed |\n");
  else
    printf(" failed |\n");

  return ret;
}

int test_w_vec32(int32_t* src0, int32_t* src1, int32_t* dst_ref, int32_t* dst_opt, int size, int print, int32_t w,
                 int32_t input_bytes, int32_t ti_tics) {
  int ret = 0;

  uint32_t tic_count[2], instruction_count[2];
  uint32_t ref_tic_count[2], ref_instruction_count[2];

  count_tics(ref_tic_count, ref_instruction_count);
  ref_w_vec32(src0, src1, w, dst_ref, size);
  count_tics(&ref_tic_count[1], &ref_instruction_count[1]);

  count_tics(tic_count, instruction_count);
  w_vec32(src0, src1, w, dst_opt, size);
  count_tics(&tic_count[1], &instruction_count[1]);

  if (print) {
    printf("vect1:");
    print_vector_s32(src0, size);
    printf("vect2:");
    print_vector_s32(src1, size);
    printf("w:");
    printf("%d\n", w);
    printf("dsp_res:");
    print_vector_s32(dst_opt, size);
    printf("ref_res:");
    print_vector_s32(dst_ref, size);
  }

  ret = compare_s32(dst_ref, dst_opt, size);

  print_performance(ref_tic_count, ref_instruction_count, tic_count, instruction_count, input_bytes, ti_tics);

  if (ret == 0)
    printf(" passed |\n");
  else
    printf(" failed |\n");

  return ret;
}

int test_w_vec_fl(float* src0, float* src1, float* dst_ref, float* dst_opt, int size, int print, float w,
                  int32_t input_bytes, int32_t ti_tics) {
  int ret = 0;

  uint32_t tic_count[2], instruction_count[2];
  uint32_t ref_tic_count[2], ref_instruction_count[2];

  count_tics(ref_tic_count, ref_instruction_count);
  ref_w_vec_fl(src0, src1, w, dst_ref, size);
  count_tics(&ref_tic_count[1], &ref_instruction_count[1]);

  count_tics(tic_count, instruction_count);
  w_vec_fl(src0, src1, w, dst_opt, size);
  count_tics(&tic_count[1], &instruction_count[1]);

  if (print) {
    printf("vect1:");
    print_vector_float(src0, size);
    printf("vect2:");
    print_vector_float(src1, size);
    printf("w:");
    printf("%f\n", w);
    printf("dsp_res:");
    print_vector_float(dst_opt, size);
    printf("ref_res:");
    print_vector_float(dst_ref, size);
  }

  ret = compare_float(dst_ref, dst_opt, size);

  print_performance(ref_tic_count, ref_instruction_count, tic_count, instruction_count, input_bytes, ti_tics);

  if (ret == 0)
    printf(" passed |\n");
  else
    printf(" failed |\n");

  return ret;
}

int test_w_vec_db(double* src0, double* src1, double* dst_ref, double* dst_opt, int size, int print, double w,
                  int32_t input_bytes, int32_t ti_tics) {
  int ret = 0;

  uint32_t tic_count[2], instruction_count[2];
  uint32_t ref_tic_count[2], ref_instruction_count[2];

  count_tics(ref_tic_count, ref_instruction_count);
  ref_w_vec_db(src0, src1, w, dst_ref, size);
  count_tics(&ref_tic_count[1], &ref_instruction_count[1]);

  count_tics(tic_count, instruction_count);
  w_vec_db(src0, src1, w, dst_opt, size);
  count_tics(&tic_count[1], &instruction_count[1]);

  if (print) {
    printf("vect1:");
    print_vector_double(src0, size);
    printf("vect2:");
    print_vector_double(src1, size);
    printf("w:");
    printf("%f\n", w);
    printf("dsp_res:");
    print_vector_double(dst_opt, size);
    printf("ref_res:");
    print_vector_double(dst_ref, size);
  }

  ret = compare_double(dst_ref, dst_opt, size);

  print_performance(ref_tic_count, ref_instruction_count, tic_count, instruction_count, input_bytes, ti_tics);

  if (ret == 0)
    printf(" passed |\n");
  else
    printf(" failed |\n");

  return ret;
}
