jsimdcpu.asm 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. ;
  2. ; SIMD instruction support check
  3. ;
  4. ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. ; Copyright (C) 2016, D. R. Commander.
  6. ;
  7. ; Based on the x86 SIMD extension for IJG JPEG library
  8. ; Copyright (C) 1999-2006, MIYASAKA Masaru.
  9. ; For conditions of distribution and use, see copyright notice in jsimdext.inc
  10. ;
  11. ; This file should be assembled with NASM (Netwide Assembler) or Yasm.
  12. %include "jsimdext.inc"
  13. ; --------------------------------------------------------------------------
  14. SECTION SEG_TEXT
  15. BITS 32
  16. ;
  17. ; Check if the CPU supports SIMD instructions
  18. ;
  19. ; GLOBAL(unsigned int)
  20. ; jpeg_simd_cpu_support(void)
  21. ;
  22. align 32
  23. GLOBAL_FUNCTION(jpeg_simd_cpu_support)
  24. EXTN(jpeg_simd_cpu_support):
  25. push ebx
  26. ; push ecx ; need not be preserved
  27. ; push edx ; need not be preserved
  28. ; push esi ; unused
  29. push edi
  30. xor edi, edi ; simd support flag
  31. pushfd
  32. pop eax
  33. mov edx, eax
  34. xor eax, 1<<21 ; flip ID bit in EFLAGS
  35. push eax
  36. popfd
  37. pushfd
  38. pop eax
  39. xor eax, edx
  40. jz near .return ; CPUID is not supported
  41. ; Check whether CPUID leaf 07H is supported
  42. ; (leaf 07H is used to check for AVX2 instruction support)
  43. xor eax, eax
  44. cpuid
  45. test eax, eax
  46. jz near .return
  47. cmp eax, 7
  48. jl short .no_avx2 ; Maximum leaf < 07H
  49. ; Check for AVX2 instruction support
  50. mov eax, 7
  51. xor ecx, ecx
  52. cpuid
  53. mov eax, ebx
  54. test eax, 1<<5 ; bit5:AVX2
  55. jz short .no_avx2
  56. ; Check for AVX2 O/S support
  57. mov eax, 1
  58. xor ecx, ecx
  59. cpuid
  60. test ecx, 1<<27
  61. jz short .no_avx2 ; O/S does not support XSAVE
  62. test ecx, 1<<28
  63. jz short .no_avx2 ; CPU does not support AVX2
  64. xor ecx, ecx
  65. xgetbv
  66. and eax, 6
  67. cmp eax, 6 ; O/S does not manage XMM/YMM state
  68. ; using XSAVE
  69. jnz short .no_avx2
  70. or edi, JSIMD_AVX2
  71. .no_avx2:
  72. ; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
  73. xor eax, eax
  74. inc eax
  75. cpuid
  76. mov eax, edx ; eax = Standard feature flags
  77. ; Check for MMX instruction support
  78. test eax, 1<<23 ; bit23:MMX
  79. jz short .no_mmx
  80. or edi, byte JSIMD_MMX
  81. .no_mmx:
  82. test eax, 1<<25 ; bit25:SSE
  83. jz short .no_sse
  84. or edi, byte JSIMD_SSE
  85. .no_sse:
  86. test eax, 1<<26 ; bit26:SSE2
  87. jz short .no_sse2
  88. or edi, byte JSIMD_SSE2
  89. .no_sse2:
  90. ; Check for 3DNow! instruction support
  91. mov eax, 0x80000000
  92. cpuid
  93. cmp eax, 0x80000000
  94. jbe short .return
  95. mov eax, 0x80000001
  96. cpuid
  97. mov eax, edx ; eax = Extended feature flags
  98. test eax, 1<<31 ; bit31:3DNow!(vendor independent)
  99. jz short .no_3dnow
  100. or edi, byte JSIMD_3DNOW
  101. .no_3dnow:
  102. .return:
  103. mov eax, edi
  104. pop edi
  105. ; pop esi ; unused
  106. ; pop edx ; need not be preserved
  107. ; pop ecx ; need not be preserved
  108. pop ebx
  109. ret
  110. ; For some reason, the OS X linker does not honor the request to align the
  111. ; segment unless we do this.
  112. align 32