jsimd.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310
  1. /*
  2. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  3. * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2024, D. R. Commander.
  4. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  5. *
  6. * Based on the x86 SIMD extension for IJG JPEG library,
  7. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  8. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  9. *
  10. * This file contains the interface between the "normal" portions
  11. * of the library and the SIMD implementations when running on a
  12. * 32-bit x86 architecture.
  13. */
  14. #define JPEG_INTERNALS
  15. #include "../../src/jinclude.h"
  16. #include "../../src/jpeglib.h"
  17. #include "../../src/jsimd.h"
  18. #include "../../src/jdct.h"
  19. #include "../../src/jsimddct.h"
  20. #include "../jsimd.h"
  21. /*
  22. * In the PIC cases, we have no guarantee that constants will keep
  23. * their alignment. This macro allows us to verify it at runtime.
  24. */
  25. #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
  26. #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
  27. #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
  28. static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
  29. static THREAD_LOCAL unsigned int simd_huffman = 1;
  30. /*
  31. * Check what SIMD accelerations are supported.
  32. */
  33. LOCAL(void)
  34. init_simd(void)
  35. {
  36. #ifndef NO_GETENV
  37. char env[2] = { 0 };
  38. #endif
  39. if (simd_support != ~0U)
  40. return;
  41. simd_support = jpeg_simd_cpu_support();
  42. #ifndef NO_GETENV
  43. /* Force different settings through environment variables */
  44. if (!GETENV_S(env, 2, "JSIMD_FORCEMMX") && !strcmp(env, "1"))
  45. simd_support &= JSIMD_MMX;
  46. if (!GETENV_S(env, 2, "JSIMD_FORCE3DNOW") && !strcmp(env, "1"))
  47. simd_support &= JSIMD_3DNOW | JSIMD_MMX;
  48. if (!GETENV_S(env, 2, "JSIMD_FORCESSE") && !strcmp(env, "1"))
  49. simd_support &= JSIMD_SSE | JSIMD_MMX;
  50. if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
  51. simd_support &= JSIMD_SSE2;
  52. if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
  53. simd_support &= JSIMD_AVX2;
  54. if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
  55. simd_support = 0;
  56. if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
  57. simd_huffman = 0;
  58. #endif
  59. }
  60. GLOBAL(int)
  61. jsimd_can_rgb_ycc(void)
  62. {
  63. init_simd();
  64. /* The code is optimised for these values only */
  65. if (BITS_IN_JSAMPLE != 8)
  66. return 0;
  67. if (sizeof(JDIMENSION) != 4)
  68. return 0;
  69. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  70. return 0;
  71. if ((simd_support & JSIMD_AVX2) &&
  72. IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
  73. return 1;
  74. if ((simd_support & JSIMD_SSE2) &&
  75. IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
  76. return 1;
  77. if (simd_support & JSIMD_MMX)
  78. return 1;
  79. return 0;
  80. }
  81. GLOBAL(int)
  82. jsimd_can_rgb_gray(void)
  83. {
  84. init_simd();
  85. /* The code is optimised for these values only */
  86. if (BITS_IN_JSAMPLE != 8)
  87. return 0;
  88. if (sizeof(JDIMENSION) != 4)
  89. return 0;
  90. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  91. return 0;
  92. if ((simd_support & JSIMD_AVX2) &&
  93. IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
  94. return 1;
  95. if ((simd_support & JSIMD_SSE2) &&
  96. IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
  97. return 1;
  98. if (simd_support & JSIMD_MMX)
  99. return 1;
  100. return 0;
  101. }
  102. GLOBAL(int)
  103. jsimd_can_ycc_rgb(void)
  104. {
  105. init_simd();
  106. /* The code is optimised for these values only */
  107. if (BITS_IN_JSAMPLE != 8)
  108. return 0;
  109. if (sizeof(JDIMENSION) != 4)
  110. return 0;
  111. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  112. return 0;
  113. if ((simd_support & JSIMD_AVX2) &&
  114. IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
  115. return 1;
  116. if ((simd_support & JSIMD_SSE2) &&
  117. IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
  118. return 1;
  119. if (simd_support & JSIMD_MMX)
  120. return 1;
  121. return 0;
  122. }
  123. GLOBAL(int)
  124. jsimd_can_ycc_rgb565(void)
  125. {
  126. return 0;
  127. }
  128. GLOBAL(void)
  129. jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  130. JSAMPIMAGE output_buf, JDIMENSION output_row,
  131. int num_rows)
  132. {
  133. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  134. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  135. void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  136. if (simd_support == ~0U)
  137. init_simd();
  138. switch (cinfo->in_color_space) {
  139. case JCS_EXT_RGB:
  140. avx2fct = jsimd_extrgb_ycc_convert_avx2;
  141. sse2fct = jsimd_extrgb_ycc_convert_sse2;
  142. mmxfct = jsimd_extrgb_ycc_convert_mmx;
  143. break;
  144. case JCS_EXT_RGBX:
  145. case JCS_EXT_RGBA:
  146. avx2fct = jsimd_extrgbx_ycc_convert_avx2;
  147. sse2fct = jsimd_extrgbx_ycc_convert_sse2;
  148. mmxfct = jsimd_extrgbx_ycc_convert_mmx;
  149. break;
  150. case JCS_EXT_BGR:
  151. avx2fct = jsimd_extbgr_ycc_convert_avx2;
  152. sse2fct = jsimd_extbgr_ycc_convert_sse2;
  153. mmxfct = jsimd_extbgr_ycc_convert_mmx;
  154. break;
  155. case JCS_EXT_BGRX:
  156. case JCS_EXT_BGRA:
  157. avx2fct = jsimd_extbgrx_ycc_convert_avx2;
  158. sse2fct = jsimd_extbgrx_ycc_convert_sse2;
  159. mmxfct = jsimd_extbgrx_ycc_convert_mmx;
  160. break;
  161. case JCS_EXT_XBGR:
  162. case JCS_EXT_ABGR:
  163. avx2fct = jsimd_extxbgr_ycc_convert_avx2;
  164. sse2fct = jsimd_extxbgr_ycc_convert_sse2;
  165. mmxfct = jsimd_extxbgr_ycc_convert_mmx;
  166. break;
  167. case JCS_EXT_XRGB:
  168. case JCS_EXT_ARGB:
  169. avx2fct = jsimd_extxrgb_ycc_convert_avx2;
  170. sse2fct = jsimd_extxrgb_ycc_convert_sse2;
  171. mmxfct = jsimd_extxrgb_ycc_convert_mmx;
  172. break;
  173. default:
  174. avx2fct = jsimd_rgb_ycc_convert_avx2;
  175. sse2fct = jsimd_rgb_ycc_convert_sse2;
  176. mmxfct = jsimd_rgb_ycc_convert_mmx;
  177. break;
  178. }
  179. if (simd_support & JSIMD_AVX2)
  180. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  181. else if (simd_support & JSIMD_SSE2)
  182. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  183. else
  184. mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  185. }
  186. GLOBAL(void)
  187. jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  188. JSAMPIMAGE output_buf, JDIMENSION output_row,
  189. int num_rows)
  190. {
  191. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  192. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  193. void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  194. if (simd_support == ~0U)
  195. init_simd();
  196. switch (cinfo->in_color_space) {
  197. case JCS_EXT_RGB:
  198. avx2fct = jsimd_extrgb_gray_convert_avx2;
  199. sse2fct = jsimd_extrgb_gray_convert_sse2;
  200. mmxfct = jsimd_extrgb_gray_convert_mmx;
  201. break;
  202. case JCS_EXT_RGBX:
  203. case JCS_EXT_RGBA:
  204. avx2fct = jsimd_extrgbx_gray_convert_avx2;
  205. sse2fct = jsimd_extrgbx_gray_convert_sse2;
  206. mmxfct = jsimd_extrgbx_gray_convert_mmx;
  207. break;
  208. case JCS_EXT_BGR:
  209. avx2fct = jsimd_extbgr_gray_convert_avx2;
  210. sse2fct = jsimd_extbgr_gray_convert_sse2;
  211. mmxfct = jsimd_extbgr_gray_convert_mmx;
  212. break;
  213. case JCS_EXT_BGRX:
  214. case JCS_EXT_BGRA:
  215. avx2fct = jsimd_extbgrx_gray_convert_avx2;
  216. sse2fct = jsimd_extbgrx_gray_convert_sse2;
  217. mmxfct = jsimd_extbgrx_gray_convert_mmx;
  218. break;
  219. case JCS_EXT_XBGR:
  220. case JCS_EXT_ABGR:
  221. avx2fct = jsimd_extxbgr_gray_convert_avx2;
  222. sse2fct = jsimd_extxbgr_gray_convert_sse2;
  223. mmxfct = jsimd_extxbgr_gray_convert_mmx;
  224. break;
  225. case JCS_EXT_XRGB:
  226. case JCS_EXT_ARGB:
  227. avx2fct = jsimd_extxrgb_gray_convert_avx2;
  228. sse2fct = jsimd_extxrgb_gray_convert_sse2;
  229. mmxfct = jsimd_extxrgb_gray_convert_mmx;
  230. break;
  231. default:
  232. avx2fct = jsimd_rgb_gray_convert_avx2;
  233. sse2fct = jsimd_rgb_gray_convert_sse2;
  234. mmxfct = jsimd_rgb_gray_convert_mmx;
  235. break;
  236. }
  237. if (simd_support & JSIMD_AVX2)
  238. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  239. else if (simd_support & JSIMD_SSE2)
  240. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  241. else
  242. mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  243. }
  244. GLOBAL(void)
  245. jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  246. JDIMENSION input_row, JSAMPARRAY output_buf,
  247. int num_rows)
  248. {
  249. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  250. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  251. void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  252. if (simd_support == ~0U)
  253. init_simd();
  254. switch (cinfo->out_color_space) {
  255. case JCS_EXT_RGB:
  256. avx2fct = jsimd_ycc_extrgb_convert_avx2;
  257. sse2fct = jsimd_ycc_extrgb_convert_sse2;
  258. mmxfct = jsimd_ycc_extrgb_convert_mmx;
  259. break;
  260. case JCS_EXT_RGBX:
  261. case JCS_EXT_RGBA:
  262. avx2fct = jsimd_ycc_extrgbx_convert_avx2;
  263. sse2fct = jsimd_ycc_extrgbx_convert_sse2;
  264. mmxfct = jsimd_ycc_extrgbx_convert_mmx;
  265. break;
  266. case JCS_EXT_BGR:
  267. avx2fct = jsimd_ycc_extbgr_convert_avx2;
  268. sse2fct = jsimd_ycc_extbgr_convert_sse2;
  269. mmxfct = jsimd_ycc_extbgr_convert_mmx;
  270. break;
  271. case JCS_EXT_BGRX:
  272. case JCS_EXT_BGRA:
  273. avx2fct = jsimd_ycc_extbgrx_convert_avx2;
  274. sse2fct = jsimd_ycc_extbgrx_convert_sse2;
  275. mmxfct = jsimd_ycc_extbgrx_convert_mmx;
  276. break;
  277. case JCS_EXT_XBGR:
  278. case JCS_EXT_ABGR:
  279. avx2fct = jsimd_ycc_extxbgr_convert_avx2;
  280. sse2fct = jsimd_ycc_extxbgr_convert_sse2;
  281. mmxfct = jsimd_ycc_extxbgr_convert_mmx;
  282. break;
  283. case JCS_EXT_XRGB:
  284. case JCS_EXT_ARGB:
  285. avx2fct = jsimd_ycc_extxrgb_convert_avx2;
  286. sse2fct = jsimd_ycc_extxrgb_convert_sse2;
  287. mmxfct = jsimd_ycc_extxrgb_convert_mmx;
  288. break;
  289. default:
  290. avx2fct = jsimd_ycc_rgb_convert_avx2;
  291. sse2fct = jsimd_ycc_rgb_convert_sse2;
  292. mmxfct = jsimd_ycc_rgb_convert_mmx;
  293. break;
  294. }
  295. if (simd_support & JSIMD_AVX2)
  296. avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  297. else if (simd_support & JSIMD_SSE2)
  298. sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  299. else
  300. mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  301. }
  302. GLOBAL(void)
  303. jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  304. JDIMENSION input_row, JSAMPARRAY output_buf,
  305. int num_rows)
  306. {
  307. }
  308. GLOBAL(int)
  309. jsimd_can_h2v2_downsample(void)
  310. {
  311. init_simd();
  312. /* The code is optimised for these values only */
  313. if (BITS_IN_JSAMPLE != 8)
  314. return 0;
  315. if (sizeof(JDIMENSION) != 4)
  316. return 0;
  317. if (simd_support & JSIMD_AVX2)
  318. return 1;
  319. if (simd_support & JSIMD_SSE2)
  320. return 1;
  321. if (simd_support & JSIMD_MMX)
  322. return 1;
  323. return 0;
  324. }
  325. GLOBAL(int)
  326. jsimd_can_h2v1_downsample(void)
  327. {
  328. init_simd();
  329. /* The code is optimised for these values only */
  330. if (BITS_IN_JSAMPLE != 8)
  331. return 0;
  332. if (sizeof(JDIMENSION) != 4)
  333. return 0;
  334. if (simd_support & JSIMD_AVX2)
  335. return 1;
  336. if (simd_support & JSIMD_SSE2)
  337. return 1;
  338. if (simd_support & JSIMD_MMX)
  339. return 1;
  340. return 0;
  341. }
  342. GLOBAL(void)
  343. jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  344. JSAMPARRAY input_data, JSAMPARRAY output_data)
  345. {
  346. if (simd_support == ~0U)
  347. init_simd();
  348. if (simd_support & JSIMD_AVX2)
  349. jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  350. compptr->v_samp_factor,
  351. compptr->width_in_blocks, input_data,
  352. output_data);
  353. else if (simd_support & JSIMD_SSE2)
  354. jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  355. compptr->v_samp_factor,
  356. compptr->width_in_blocks, input_data,
  357. output_data);
  358. else
  359. jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
  360. compptr->v_samp_factor, compptr->width_in_blocks,
  361. input_data, output_data);
  362. }
  363. GLOBAL(void)
  364. jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  365. JSAMPARRAY input_data, JSAMPARRAY output_data)
  366. {
  367. if (simd_support == ~0U)
  368. init_simd();
  369. if (simd_support & JSIMD_AVX2)
  370. jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  371. compptr->v_samp_factor,
  372. compptr->width_in_blocks, input_data,
  373. output_data);
  374. else if (simd_support & JSIMD_SSE2)
  375. jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  376. compptr->v_samp_factor,
  377. compptr->width_in_blocks, input_data,
  378. output_data);
  379. else
  380. jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
  381. compptr->v_samp_factor, compptr->width_in_blocks,
  382. input_data, output_data);
  383. }
  384. GLOBAL(int)
  385. jsimd_can_h2v2_upsample(void)
  386. {
  387. init_simd();
  388. /* The code is optimised for these values only */
  389. if (BITS_IN_JSAMPLE != 8)
  390. return 0;
  391. if (sizeof(JDIMENSION) != 4)
  392. return 0;
  393. if (simd_support & JSIMD_AVX2)
  394. return 1;
  395. if (simd_support & JSIMD_SSE2)
  396. return 1;
  397. if (simd_support & JSIMD_MMX)
  398. return 1;
  399. return 0;
  400. }
  401. GLOBAL(int)
  402. jsimd_can_h2v1_upsample(void)
  403. {
  404. init_simd();
  405. /* The code is optimised for these values only */
  406. if (BITS_IN_JSAMPLE != 8)
  407. return 0;
  408. if (sizeof(JDIMENSION) != 4)
  409. return 0;
  410. if (simd_support & JSIMD_AVX2)
  411. return 1;
  412. if (simd_support & JSIMD_SSE2)
  413. return 1;
  414. if (simd_support & JSIMD_MMX)
  415. return 1;
  416. return 0;
  417. }
  418. GLOBAL(void)
  419. jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  420. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  421. {
  422. if (simd_support == ~0U)
  423. init_simd();
  424. if (simd_support & JSIMD_AVX2)
  425. jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  426. input_data, output_data_ptr);
  427. else if (simd_support & JSIMD_SSE2)
  428. jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  429. input_data, output_data_ptr);
  430. else
  431. jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
  432. input_data, output_data_ptr);
  433. }
  434. GLOBAL(void)
  435. jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  436. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  437. {
  438. if (simd_support == ~0U)
  439. init_simd();
  440. if (simd_support & JSIMD_AVX2)
  441. jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  442. input_data, output_data_ptr);
  443. else if (simd_support & JSIMD_SSE2)
  444. jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  445. input_data, output_data_ptr);
  446. else
  447. jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
  448. input_data, output_data_ptr);
  449. }
  450. GLOBAL(int)
  451. jsimd_can_h2v2_fancy_upsample(void)
  452. {
  453. init_simd();
  454. /* The code is optimised for these values only */
  455. if (BITS_IN_JSAMPLE != 8)
  456. return 0;
  457. if (sizeof(JDIMENSION) != 4)
  458. return 0;
  459. if ((simd_support & JSIMD_AVX2) &&
  460. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  461. return 1;
  462. if ((simd_support & JSIMD_SSE2) &&
  463. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  464. return 1;
  465. if (simd_support & JSIMD_MMX)
  466. return 1;
  467. return 0;
  468. }
  469. GLOBAL(int)
  470. jsimd_can_h2v1_fancy_upsample(void)
  471. {
  472. init_simd();
  473. /* The code is optimised for these values only */
  474. if (BITS_IN_JSAMPLE != 8)
  475. return 0;
  476. if (sizeof(JDIMENSION) != 4)
  477. return 0;
  478. if ((simd_support & JSIMD_AVX2) &&
  479. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  480. return 1;
  481. if ((simd_support & JSIMD_SSE2) &&
  482. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  483. return 1;
  484. if (simd_support & JSIMD_MMX)
  485. return 1;
  486. return 0;
  487. }
  488. GLOBAL(void)
  489. jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  490. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  491. {
  492. if (simd_support == ~0U)
  493. init_simd();
  494. if (simd_support & JSIMD_AVX2)
  495. jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  496. compptr->downsampled_width, input_data,
  497. output_data_ptr);
  498. else if (simd_support & JSIMD_SSE2)
  499. jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  500. compptr->downsampled_width, input_data,
  501. output_data_ptr);
  502. else
  503. jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
  504. compptr->downsampled_width, input_data,
  505. output_data_ptr);
  506. }
  507. GLOBAL(void)
  508. jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  509. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  510. {
  511. if (simd_support == ~0U)
  512. init_simd();
  513. if (simd_support & JSIMD_AVX2)
  514. jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  515. compptr->downsampled_width, input_data,
  516. output_data_ptr);
  517. else if (simd_support & JSIMD_SSE2)
  518. jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  519. compptr->downsampled_width, input_data,
  520. output_data_ptr);
  521. else
  522. jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
  523. compptr->downsampled_width, input_data,
  524. output_data_ptr);
  525. }
  526. GLOBAL(int)
  527. jsimd_can_h2v2_merged_upsample(void)
  528. {
  529. init_simd();
  530. /* The code is optimised for these values only */
  531. if (BITS_IN_JSAMPLE != 8)
  532. return 0;
  533. if (sizeof(JDIMENSION) != 4)
  534. return 0;
  535. if ((simd_support & JSIMD_AVX2) &&
  536. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  537. return 1;
  538. if ((simd_support & JSIMD_SSE2) &&
  539. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  540. return 1;
  541. if (simd_support & JSIMD_MMX)
  542. return 1;
  543. return 0;
  544. }
  545. GLOBAL(int)
  546. jsimd_can_h2v1_merged_upsample(void)
  547. {
  548. init_simd();
  549. /* The code is optimised for these values only */
  550. if (BITS_IN_JSAMPLE != 8)
  551. return 0;
  552. if (sizeof(JDIMENSION) != 4)
  553. return 0;
  554. if ((simd_support & JSIMD_AVX2) &&
  555. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  556. return 1;
  557. if ((simd_support & JSIMD_SSE2) &&
  558. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  559. return 1;
  560. if (simd_support & JSIMD_MMX)
  561. return 1;
  562. return 0;
  563. }
  564. GLOBAL(void)
  565. jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  566. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  567. {
  568. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  569. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  570. void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  571. if (simd_support == ~0U)
  572. init_simd();
  573. switch (cinfo->out_color_space) {
  574. case JCS_EXT_RGB:
  575. avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
  576. sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
  577. mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx;
  578. break;
  579. case JCS_EXT_RGBX:
  580. case JCS_EXT_RGBA:
  581. avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
  582. sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
  583. mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx;
  584. break;
  585. case JCS_EXT_BGR:
  586. avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
  587. sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
  588. mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx;
  589. break;
  590. case JCS_EXT_BGRX:
  591. case JCS_EXT_BGRA:
  592. avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
  593. sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
  594. mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx;
  595. break;
  596. case JCS_EXT_XBGR:
  597. case JCS_EXT_ABGR:
  598. avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
  599. sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
  600. mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx;
  601. break;
  602. case JCS_EXT_XRGB:
  603. case JCS_EXT_ARGB:
  604. avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
  605. sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
  606. mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx;
  607. break;
  608. default:
  609. avx2fct = jsimd_h2v2_merged_upsample_avx2;
  610. sse2fct = jsimd_h2v2_merged_upsample_sse2;
  611. mmxfct = jsimd_h2v2_merged_upsample_mmx;
  612. break;
  613. }
  614. if (simd_support & JSIMD_AVX2)
  615. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  616. else if (simd_support & JSIMD_SSE2)
  617. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  618. else
  619. mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  620. }
  621. GLOBAL(void)
  622. jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  623. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  624. {
  625. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  626. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  627. void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  628. if (simd_support == ~0U)
  629. init_simd();
  630. switch (cinfo->out_color_space) {
  631. case JCS_EXT_RGB:
  632. avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
  633. sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
  634. mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx;
  635. break;
  636. case JCS_EXT_RGBX:
  637. case JCS_EXT_RGBA:
  638. avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
  639. sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
  640. mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx;
  641. break;
  642. case JCS_EXT_BGR:
  643. avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
  644. sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
  645. mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx;
  646. break;
  647. case JCS_EXT_BGRX:
  648. case JCS_EXT_BGRA:
  649. avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
  650. sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
  651. mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx;
  652. break;
  653. case JCS_EXT_XBGR:
  654. case JCS_EXT_ABGR:
  655. avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
  656. sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
  657. mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx;
  658. break;
  659. case JCS_EXT_XRGB:
  660. case JCS_EXT_ARGB:
  661. avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
  662. sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
  663. mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx;
  664. break;
  665. default:
  666. avx2fct = jsimd_h2v1_merged_upsample_avx2;
  667. sse2fct = jsimd_h2v1_merged_upsample_sse2;
  668. mmxfct = jsimd_h2v1_merged_upsample_mmx;
  669. break;
  670. }
  671. if (simd_support & JSIMD_AVX2)
  672. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  673. else if (simd_support & JSIMD_SSE2)
  674. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  675. else
  676. mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  677. }
  678. GLOBAL(int)
  679. jsimd_can_convsamp(void)
  680. {
  681. init_simd();
  682. /* The code is optimised for these values only */
  683. if (DCTSIZE != 8)
  684. return 0;
  685. if (BITS_IN_JSAMPLE != 8)
  686. return 0;
  687. if (sizeof(JDIMENSION) != 4)
  688. return 0;
  689. if (sizeof(DCTELEM) != 2)
  690. return 0;
  691. if (simd_support & JSIMD_AVX2)
  692. return 1;
  693. if (simd_support & JSIMD_SSE2)
  694. return 1;
  695. if (simd_support & JSIMD_MMX)
  696. return 1;
  697. return 0;
  698. }
  699. GLOBAL(int)
  700. jsimd_can_convsamp_float(void)
  701. {
  702. init_simd();
  703. /* The code is optimised for these values only */
  704. if (DCTSIZE != 8)
  705. return 0;
  706. if (BITS_IN_JSAMPLE != 8)
  707. return 0;
  708. if (sizeof(JDIMENSION) != 4)
  709. return 0;
  710. if (sizeof(FAST_FLOAT) != 4)
  711. return 0;
  712. if (simd_support & JSIMD_SSE2)
  713. return 1;
  714. if (simd_support & JSIMD_SSE)
  715. return 1;
  716. if (simd_support & JSIMD_3DNOW)
  717. return 1;
  718. return 0;
  719. }
  720. GLOBAL(void)
  721. jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
  722. DCTELEM *workspace)
  723. {
  724. if (simd_support == ~0U)
  725. init_simd();
  726. if (simd_support & JSIMD_AVX2)
  727. jsimd_convsamp_avx2(sample_data, start_col, workspace);
  728. else if (simd_support & JSIMD_SSE2)
  729. jsimd_convsamp_sse2(sample_data, start_col, workspace);
  730. else
  731. jsimd_convsamp_mmx(sample_data, start_col, workspace);
  732. }
  733. GLOBAL(void)
  734. jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
  735. FAST_FLOAT *workspace)
  736. {
  737. if (simd_support == ~0U)
  738. init_simd();
  739. if (simd_support & JSIMD_SSE2)
  740. jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
  741. else if (simd_support & JSIMD_SSE)
  742. jsimd_convsamp_float_sse(sample_data, start_col, workspace);
  743. else
  744. jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
  745. }
  746. GLOBAL(int)
  747. jsimd_can_fdct_islow(void)
  748. {
  749. init_simd();
  750. /* The code is optimised for these values only */
  751. if (DCTSIZE != 8)
  752. return 0;
  753. if (sizeof(DCTELEM) != 2)
  754. return 0;
  755. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
  756. return 1;
  757. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
  758. return 1;
  759. if (simd_support & JSIMD_MMX)
  760. return 1;
  761. return 0;
  762. }
  763. GLOBAL(int)
  764. jsimd_can_fdct_ifast(void)
  765. {
  766. init_simd();
  767. /* The code is optimised for these values only */
  768. if (DCTSIZE != 8)
  769. return 0;
  770. if (sizeof(DCTELEM) != 2)
  771. return 0;
  772. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
  773. return 1;
  774. if (simd_support & JSIMD_MMX)
  775. return 1;
  776. return 0;
  777. }
  778. GLOBAL(int)
  779. jsimd_can_fdct_float(void)
  780. {
  781. init_simd();
  782. /* The code is optimised for these values only */
  783. if (DCTSIZE != 8)
  784. return 0;
  785. if (sizeof(FAST_FLOAT) != 4)
  786. return 0;
  787. if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
  788. return 1;
  789. if (simd_support & JSIMD_3DNOW)
  790. return 1;
  791. return 0;
  792. }
  793. GLOBAL(void)
  794. jsimd_fdct_islow(DCTELEM *data)
  795. {
  796. if (simd_support == ~0U)
  797. init_simd();
  798. if (simd_support & JSIMD_AVX2)
  799. jsimd_fdct_islow_avx2(data);
  800. else if (simd_support & JSIMD_SSE2)
  801. jsimd_fdct_islow_sse2(data);
  802. else
  803. jsimd_fdct_islow_mmx(data);
  804. }
  805. GLOBAL(void)
  806. jsimd_fdct_ifast(DCTELEM *data)
  807. {
  808. if (simd_support == ~0U)
  809. init_simd();
  810. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
  811. jsimd_fdct_ifast_sse2(data);
  812. else
  813. jsimd_fdct_ifast_mmx(data);
  814. }
  815. GLOBAL(void)
  816. jsimd_fdct_float(FAST_FLOAT *data)
  817. {
  818. if (simd_support == ~0U)
  819. init_simd();
  820. if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
  821. jsimd_fdct_float_sse(data);
  822. else if (simd_support & JSIMD_3DNOW)
  823. jsimd_fdct_float_3dnow(data);
  824. }
  825. GLOBAL(int)
  826. jsimd_can_quantize(void)
  827. {
  828. init_simd();
  829. /* The code is optimised for these values only */
  830. if (DCTSIZE != 8)
  831. return 0;
  832. if (sizeof(JCOEF) != 2)
  833. return 0;
  834. if (sizeof(DCTELEM) != 2)
  835. return 0;
  836. if (simd_support & JSIMD_AVX2)
  837. return 1;
  838. if (simd_support & JSIMD_SSE2)
  839. return 1;
  840. if (simd_support & JSIMD_MMX)
  841. return 1;
  842. return 0;
  843. }
  844. GLOBAL(int)
  845. jsimd_can_quantize_float(void)
  846. {
  847. init_simd();
  848. /* The code is optimised for these values only */
  849. if (DCTSIZE != 8)
  850. return 0;
  851. if (sizeof(JCOEF) != 2)
  852. return 0;
  853. if (sizeof(FAST_FLOAT) != 4)
  854. return 0;
  855. if (simd_support & JSIMD_SSE2)
  856. return 1;
  857. if (simd_support & JSIMD_SSE)
  858. return 1;
  859. if (simd_support & JSIMD_3DNOW)
  860. return 1;
  861. return 0;
  862. }
  863. GLOBAL(void)
  864. jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
  865. {
  866. if (simd_support == ~0U)
  867. init_simd();
  868. if (simd_support & JSIMD_AVX2)
  869. jsimd_quantize_avx2(coef_block, divisors, workspace);
  870. else if (simd_support & JSIMD_SSE2)
  871. jsimd_quantize_sse2(coef_block, divisors, workspace);
  872. else
  873. jsimd_quantize_mmx(coef_block, divisors, workspace);
  874. }
  875. GLOBAL(void)
  876. jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
  877. FAST_FLOAT *workspace)
  878. {
  879. if (simd_support == ~0U)
  880. init_simd();
  881. if (simd_support & JSIMD_SSE2)
  882. jsimd_quantize_float_sse2(coef_block, divisors, workspace);
  883. else if (simd_support & JSIMD_SSE)
  884. jsimd_quantize_float_sse(coef_block, divisors, workspace);
  885. else
  886. jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
  887. }
  888. GLOBAL(int)
  889. jsimd_can_idct_2x2(void)
  890. {
  891. init_simd();
  892. /* The code is optimised for these values only */
  893. if (DCTSIZE != 8)
  894. return 0;
  895. if (sizeof(JCOEF) != 2)
  896. return 0;
  897. if (BITS_IN_JSAMPLE != 8)
  898. return 0;
  899. if (sizeof(JDIMENSION) != 4)
  900. return 0;
  901. if (sizeof(ISLOW_MULT_TYPE) != 2)
  902. return 0;
  903. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  904. return 1;
  905. if (simd_support & JSIMD_MMX)
  906. return 1;
  907. return 0;
  908. }
  909. GLOBAL(int)
  910. jsimd_can_idct_4x4(void)
  911. {
  912. init_simd();
  913. /* The code is optimised for these values only */
  914. if (DCTSIZE != 8)
  915. return 0;
  916. if (sizeof(JCOEF) != 2)
  917. return 0;
  918. if (BITS_IN_JSAMPLE != 8)
  919. return 0;
  920. if (sizeof(JDIMENSION) != 4)
  921. return 0;
  922. if (sizeof(ISLOW_MULT_TYPE) != 2)
  923. return 0;
  924. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  925. return 1;
  926. if (simd_support & JSIMD_MMX)
  927. return 1;
  928. return 0;
  929. }
  930. GLOBAL(void)
  931. jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  932. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  933. JDIMENSION output_col)
  934. {
  935. if (simd_support == ~0U)
  936. init_simd();
  937. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  938. jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
  939. output_col);
  940. else
  941. jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
  942. }
  943. GLOBAL(void)
  944. jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  945. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  946. JDIMENSION output_col)
  947. {
  948. if (simd_support == ~0U)
  949. init_simd();
  950. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  951. jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
  952. output_col);
  953. else
  954. jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
  955. }
  956. GLOBAL(int)
  957. jsimd_can_idct_islow(void)
  958. {
  959. init_simd();
  960. /* The code is optimised for these values only */
  961. if (DCTSIZE != 8)
  962. return 0;
  963. if (sizeof(JCOEF) != 2)
  964. return 0;
  965. if (BITS_IN_JSAMPLE != 8)
  966. return 0;
  967. if (sizeof(JDIMENSION) != 4)
  968. return 0;
  969. if (sizeof(ISLOW_MULT_TYPE) != 2)
  970. return 0;
  971. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
  972. return 1;
  973. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
  974. return 1;
  975. if (simd_support & JSIMD_MMX)
  976. return 1;
  977. return 0;
  978. }
  979. GLOBAL(int)
  980. jsimd_can_idct_ifast(void)
  981. {
  982. init_simd();
  983. /* The code is optimised for these values only */
  984. if (DCTSIZE != 8)
  985. return 0;
  986. if (sizeof(JCOEF) != 2)
  987. return 0;
  988. if (BITS_IN_JSAMPLE != 8)
  989. return 0;
  990. if (sizeof(JDIMENSION) != 4)
  991. return 0;
  992. if (sizeof(IFAST_MULT_TYPE) != 2)
  993. return 0;
  994. if (IFAST_SCALE_BITS != 2)
  995. return 0;
  996. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
  997. return 1;
  998. if (simd_support & JSIMD_MMX)
  999. return 1;
  1000. return 0;
  1001. }
  1002. GLOBAL(int)
  1003. jsimd_can_idct_float(void)
  1004. {
  1005. init_simd();
  1006. if (DCTSIZE != 8)
  1007. return 0;
  1008. if (sizeof(JCOEF) != 2)
  1009. return 0;
  1010. if (BITS_IN_JSAMPLE != 8)
  1011. return 0;
  1012. if (sizeof(JDIMENSION) != 4)
  1013. return 0;
  1014. if (sizeof(FAST_FLOAT) != 4)
  1015. return 0;
  1016. if (sizeof(FLOAT_MULT_TYPE) != 4)
  1017. return 0;
  1018. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
  1019. return 1;
  1020. if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
  1021. return 1;
  1022. if (simd_support & JSIMD_3DNOW)
  1023. return 1;
  1024. return 0;
  1025. }
  1026. GLOBAL(void)
  1027. jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  1028. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  1029. JDIMENSION output_col)
  1030. {
  1031. if (simd_support == ~0U)
  1032. init_simd();
  1033. if (simd_support & JSIMD_AVX2)
  1034. jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
  1035. output_col);
  1036. else if (simd_support & JSIMD_SSE2)
  1037. jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
  1038. output_col);
  1039. else
  1040. jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
  1041. output_col);
  1042. }
  1043. GLOBAL(void)
  1044. jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  1045. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  1046. JDIMENSION output_col)
  1047. {
  1048. if (simd_support == ~0U)
  1049. init_simd();
  1050. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
  1051. jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
  1052. output_col);
  1053. else
  1054. jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
  1055. output_col);
  1056. }
  1057. GLOBAL(void)
  1058. jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  1059. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  1060. JDIMENSION output_col)
  1061. {
  1062. if (simd_support == ~0U)
  1063. init_simd();
  1064. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
  1065. jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
  1066. output_col);
  1067. else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
  1068. jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
  1069. output_col);
  1070. else
  1071. jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
  1072. output_col);
  1073. }
  1074. GLOBAL(int)
  1075. jsimd_can_huff_encode_one_block(void)
  1076. {
  1077. init_simd();
  1078. if (DCTSIZE != 8)
  1079. return 0;
  1080. if (sizeof(JCOEF) != 2)
  1081. return 0;
  1082. if ((simd_support & JSIMD_SSE2) && simd_huffman &&
  1083. IS_ALIGNED_SSE(jconst_huff_encode_one_block))
  1084. return 1;
  1085. return 0;
  1086. }
  1087. GLOBAL(JOCTET *)
  1088. jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
  1089. int last_dc_val, c_derived_tbl *dctbl,
  1090. c_derived_tbl *actbl)
  1091. {
  1092. return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
  1093. dctbl, actbl);
  1094. }
  1095. GLOBAL(int)
  1096. jsimd_can_encode_mcu_AC_first_prepare(void)
  1097. {
  1098. init_simd();
  1099. if (DCTSIZE != 8)
  1100. return 0;
  1101. if (sizeof(JCOEF) != 2)
  1102. return 0;
  1103. if (SIZEOF_SIZE_T != 4)
  1104. return 0;
  1105. if (simd_support & JSIMD_SSE2)
  1106. return 1;
  1107. return 0;
  1108. }
  1109. GLOBAL(void)
  1110. jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
  1111. const int *jpeg_natural_order_start, int Sl,
  1112. int Al, UJCOEF *values, size_t *zerobits)
  1113. {
  1114. jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
  1115. Sl, Al, values, zerobits);
  1116. }
  1117. GLOBAL(int)
  1118. jsimd_can_encode_mcu_AC_refine_prepare(void)
  1119. {
  1120. init_simd();
  1121. if (DCTSIZE != 8)
  1122. return 0;
  1123. if (sizeof(JCOEF) != 2)
  1124. return 0;
  1125. if (SIZEOF_SIZE_T != 4)
  1126. return 0;
  1127. if (simd_support & JSIMD_SSE2)
  1128. return 1;
  1129. return 0;
  1130. }
  1131. GLOBAL(int)
  1132. jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
  1133. const int *jpeg_natural_order_start, int Sl,
  1134. int Al, UJCOEF *absvalues, size_t *bits)
  1135. {
  1136. return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
  1137. jpeg_natural_order_start,
  1138. Sl, Al, absvalues, bits);
  1139. }