jsimdcpu.asm 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. ;
  2. ; SIMD instruction support check
  3. ;
  4. ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. ; Copyright (C) 2016, D. R. Commander.
  6. ; Copyright (C) 2023, Aliaksiej Kandracienka.
  7. ;
  8. ; Based on
  9. ; x86 SIMD extension for IJG JPEG library
  10. ; Copyright (C) 1999-2006, MIYASAKA Masaru.
  11. ; For conditions of distribution and use, see copyright notice in jsimdext.inc
  12. ;
  13. ; This file should be assembled with NASM (Netwide Assembler) or Yasm.
  14. %include "jsimdext.inc"
  15. ; --------------------------------------------------------------------------
  16. SECTION SEG_TEXT
  17. BITS 64
  18. ;
  19. ; Check if the CPU supports SIMD instructions
  20. ;
  21. ; GLOBAL(unsigned int)
  22. ; jpeg_simd_cpu_support(void)
  23. ;
  24. align 32
  25. GLOBAL_FUNCTION(jpeg_simd_cpu_support)
  26. EXTN(jpeg_simd_cpu_support):
  27. push rbp
  28. mov rbp, rsp
  29. push rbx
  30. push rdi
  31. xor rdi, rdi ; simd support flag
  32. ; Assume that all x86-64 processors support SSE & SSE2 instructions
  33. or rdi, JSIMD_SSE2
  34. or rdi, JSIMD_SSE
  35. ; Check whether CPUID leaf 07H is supported
  36. ; (leaf 07H is used to check for AVX2 instruction support)
  37. mov rax, 0
  38. cpuid
  39. cmp rax, 7
  40. jl short .return ; Maximum leaf < 07H
  41. ; Check for AVX2 instruction support
  42. mov rax, 7
  43. xor rcx, rcx
  44. cpuid
  45. mov rax, rbx ; rax = Extended feature flags
  46. test rax, 1<<5 ; bit5:AVX2
  47. jz short .return
  48. ; Check for AVX2 O/S support
  49. mov rax, 1
  50. xor rcx, rcx
  51. cpuid
  52. test rcx, 1<<27
  53. jz short .return ; O/S does not support XSAVE
  54. test rcx, 1<<28
  55. jz short .return ; CPU does not support AVX2
  56. xor rcx, rcx
  57. xgetbv
  58. and rax, 6
  59. cmp rax, 6 ; O/S does not manage XMM/YMM state
  60. ; using XSAVE
  61. jnz short .return
  62. or rdi, JSIMD_AVX2
  63. .return:
  64. mov rax, rdi
  65. pop rdi
  66. pop rbx
  67. pop rbp
  68. ret
  69. ; For some reason, the OS X linker does not honor the request to align the
  70. ; segment unless we do this.
  71. align 32