cpu_fp16.cpp 953 B

12345678910111213141516171819202122232425262728293031323334
  1. #include <stdio.h>
  2. #if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700 && defined __AVX__) || (defined __INTEL_COMPILER && defined __AVX__)
  3. #include <immintrin.h>
  4. int test()
  5. {
  6. const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
  7. short dst[8];
  8. __m128 v_src = _mm_load_ps(src);
  9. __m128i v_dst = _mm_cvtps_ph(v_src, 0);
  10. _mm_storel_epi64((__m128i*)dst, v_dst);
  11. return (int)dst[0];
  12. }
  13. #elif (defined __GNUC__ && (defined __arm__ || defined __aarch64__)) /*|| (defined _MSC_VER && defined _M_ARM64)*/
  14. // Windows + ARM64 case disabled: https://github.com/opencv/opencv/issues/25052
  15. #include "arm_neon.h"
  16. int test()
  17. {
  18. const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
  19. short dst[8];
  20. float32x4_t v_src = *(float32x4_t*)src;
  21. float16x4_t v_dst = vcvt_f16_f32(v_src);
  22. *(float16x4_t*)dst = v_dst;
  23. return (int)dst[0];
  24. }
  25. #else
  26. #error "FP16 is not supported"
  27. #endif
  28. int main()
  29. {
  30. printf("%d\n", test());
  31. return 0;
  32. }