jsimd.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108
  1. /*
  2. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  3. * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2024, D. R. Commander.
  4. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  5. *
  6. * Based on the x86 SIMD extension for IJG JPEG library,
  7. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  8. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  9. *
  10. * This file contains the interface between the "normal" portions
  11. * of the library and the SIMD implementations when running on a
  12. * 64-bit x86 architecture.
  13. */
  14. #define JPEG_INTERNALS
  15. #include "../../src/jinclude.h"
  16. #include "../../src/jpeglib.h"
  17. #include "../../src/jsimd.h"
  18. #include "../../src/jdct.h"
  19. #include "../../src/jsimddct.h"
  20. #include "../jsimd.h"
  21. /*
  22. * In the PIC cases, we have no guarantee that constants will keep
  23. * their alignment. This macro allows us to verify it at runtime.
  24. */
  25. #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
  26. #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
  27. #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
  28. static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
  29. static THREAD_LOCAL unsigned int simd_huffman = 1;
  30. /*
  31. * Check what SIMD accelerations are supported.
  32. */
  33. LOCAL(void)
  34. init_simd(void)
  35. {
  36. #ifndef NO_GETENV
  37. char env[2] = { 0 };
  38. #endif
  39. if (simd_support != ~0U)
  40. return;
  41. simd_support = jpeg_simd_cpu_support();
  42. #ifndef NO_GETENV
  43. /* Force different settings through environment variables */
  44. if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
  45. simd_support &= JSIMD_SSE2;
  46. if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
  47. simd_support &= JSIMD_AVX2;
  48. if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
  49. simd_support = 0;
  50. if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
  51. simd_huffman = 0;
  52. #endif
  53. }
  54. GLOBAL(int)
  55. jsimd_can_rgb_ycc(void)
  56. {
  57. init_simd();
  58. /* The code is optimised for these values only */
  59. if (BITS_IN_JSAMPLE != 8)
  60. return 0;
  61. if (sizeof(JDIMENSION) != 4)
  62. return 0;
  63. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  64. return 0;
  65. if ((simd_support & JSIMD_AVX2) &&
  66. IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
  67. return 1;
  68. if ((simd_support & JSIMD_SSE2) &&
  69. IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
  70. return 1;
  71. return 0;
  72. }
  73. GLOBAL(int)
  74. jsimd_can_rgb_gray(void)
  75. {
  76. init_simd();
  77. /* The code is optimised for these values only */
  78. if (BITS_IN_JSAMPLE != 8)
  79. return 0;
  80. if (sizeof(JDIMENSION) != 4)
  81. return 0;
  82. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  83. return 0;
  84. if ((simd_support & JSIMD_AVX2) &&
  85. IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
  86. return 1;
  87. if ((simd_support & JSIMD_SSE2) &&
  88. IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
  89. return 1;
  90. return 0;
  91. }
  92. GLOBAL(int)
  93. jsimd_can_ycc_rgb(void)
  94. {
  95. init_simd();
  96. /* The code is optimised for these values only */
  97. if (BITS_IN_JSAMPLE != 8)
  98. return 0;
  99. if (sizeof(JDIMENSION) != 4)
  100. return 0;
  101. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  102. return 0;
  103. if ((simd_support & JSIMD_AVX2) &&
  104. IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
  105. return 1;
  106. if ((simd_support & JSIMD_SSE2) &&
  107. IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
  108. return 1;
  109. return 0;
  110. }
  111. GLOBAL(int)
  112. jsimd_can_ycc_rgb565(void)
  113. {
  114. return 0;
  115. }
  116. GLOBAL(void)
  117. jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  118. JSAMPIMAGE output_buf, JDIMENSION output_row,
  119. int num_rows)
  120. {
  121. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  122. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  123. if (simd_support == ~0U)
  124. init_simd();
  125. switch (cinfo->in_color_space) {
  126. case JCS_EXT_RGB:
  127. avx2fct = jsimd_extrgb_ycc_convert_avx2;
  128. sse2fct = jsimd_extrgb_ycc_convert_sse2;
  129. break;
  130. case JCS_EXT_RGBX:
  131. case JCS_EXT_RGBA:
  132. avx2fct = jsimd_extrgbx_ycc_convert_avx2;
  133. sse2fct = jsimd_extrgbx_ycc_convert_sse2;
  134. break;
  135. case JCS_EXT_BGR:
  136. avx2fct = jsimd_extbgr_ycc_convert_avx2;
  137. sse2fct = jsimd_extbgr_ycc_convert_sse2;
  138. break;
  139. case JCS_EXT_BGRX:
  140. case JCS_EXT_BGRA:
  141. avx2fct = jsimd_extbgrx_ycc_convert_avx2;
  142. sse2fct = jsimd_extbgrx_ycc_convert_sse2;
  143. break;
  144. case JCS_EXT_XBGR:
  145. case JCS_EXT_ABGR:
  146. avx2fct = jsimd_extxbgr_ycc_convert_avx2;
  147. sse2fct = jsimd_extxbgr_ycc_convert_sse2;
  148. break;
  149. case JCS_EXT_XRGB:
  150. case JCS_EXT_ARGB:
  151. avx2fct = jsimd_extxrgb_ycc_convert_avx2;
  152. sse2fct = jsimd_extxrgb_ycc_convert_sse2;
  153. break;
  154. default:
  155. avx2fct = jsimd_rgb_ycc_convert_avx2;
  156. sse2fct = jsimd_rgb_ycc_convert_sse2;
  157. break;
  158. }
  159. if (simd_support & JSIMD_AVX2)
  160. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  161. else
  162. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  163. }
  164. GLOBAL(void)
  165. jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  166. JSAMPIMAGE output_buf, JDIMENSION output_row,
  167. int num_rows)
  168. {
  169. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  170. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  171. if (simd_support == ~0U)
  172. init_simd();
  173. switch (cinfo->in_color_space) {
  174. case JCS_EXT_RGB:
  175. avx2fct = jsimd_extrgb_gray_convert_avx2;
  176. sse2fct = jsimd_extrgb_gray_convert_sse2;
  177. break;
  178. case JCS_EXT_RGBX:
  179. case JCS_EXT_RGBA:
  180. avx2fct = jsimd_extrgbx_gray_convert_avx2;
  181. sse2fct = jsimd_extrgbx_gray_convert_sse2;
  182. break;
  183. case JCS_EXT_BGR:
  184. avx2fct = jsimd_extbgr_gray_convert_avx2;
  185. sse2fct = jsimd_extbgr_gray_convert_sse2;
  186. break;
  187. case JCS_EXT_BGRX:
  188. case JCS_EXT_BGRA:
  189. avx2fct = jsimd_extbgrx_gray_convert_avx2;
  190. sse2fct = jsimd_extbgrx_gray_convert_sse2;
  191. break;
  192. case JCS_EXT_XBGR:
  193. case JCS_EXT_ABGR:
  194. avx2fct = jsimd_extxbgr_gray_convert_avx2;
  195. sse2fct = jsimd_extxbgr_gray_convert_sse2;
  196. break;
  197. case JCS_EXT_XRGB:
  198. case JCS_EXT_ARGB:
  199. avx2fct = jsimd_extxrgb_gray_convert_avx2;
  200. sse2fct = jsimd_extxrgb_gray_convert_sse2;
  201. break;
  202. default:
  203. avx2fct = jsimd_rgb_gray_convert_avx2;
  204. sse2fct = jsimd_rgb_gray_convert_sse2;
  205. break;
  206. }
  207. if (simd_support & JSIMD_AVX2)
  208. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  209. else
  210. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  211. }
  212. GLOBAL(void)
  213. jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  214. JDIMENSION input_row, JSAMPARRAY output_buf,
  215. int num_rows)
  216. {
  217. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  218. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  219. if (simd_support == ~0U)
  220. init_simd();
  221. switch (cinfo->out_color_space) {
  222. case JCS_EXT_RGB:
  223. avx2fct = jsimd_ycc_extrgb_convert_avx2;
  224. sse2fct = jsimd_ycc_extrgb_convert_sse2;
  225. break;
  226. case JCS_EXT_RGBX:
  227. case JCS_EXT_RGBA:
  228. avx2fct = jsimd_ycc_extrgbx_convert_avx2;
  229. sse2fct = jsimd_ycc_extrgbx_convert_sse2;
  230. break;
  231. case JCS_EXT_BGR:
  232. avx2fct = jsimd_ycc_extbgr_convert_avx2;
  233. sse2fct = jsimd_ycc_extbgr_convert_sse2;
  234. break;
  235. case JCS_EXT_BGRX:
  236. case JCS_EXT_BGRA:
  237. avx2fct = jsimd_ycc_extbgrx_convert_avx2;
  238. sse2fct = jsimd_ycc_extbgrx_convert_sse2;
  239. break;
  240. case JCS_EXT_XBGR:
  241. case JCS_EXT_ABGR:
  242. avx2fct = jsimd_ycc_extxbgr_convert_avx2;
  243. sse2fct = jsimd_ycc_extxbgr_convert_sse2;
  244. break;
  245. case JCS_EXT_XRGB:
  246. case JCS_EXT_ARGB:
  247. avx2fct = jsimd_ycc_extxrgb_convert_avx2;
  248. sse2fct = jsimd_ycc_extxrgb_convert_sse2;
  249. break;
  250. default:
  251. avx2fct = jsimd_ycc_rgb_convert_avx2;
  252. sse2fct = jsimd_ycc_rgb_convert_sse2;
  253. break;
  254. }
  255. if (simd_support & JSIMD_AVX2)
  256. avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  257. else
  258. sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  259. }
  260. GLOBAL(void)
  261. jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  262. JDIMENSION input_row, JSAMPARRAY output_buf,
  263. int num_rows)
  264. {
  265. }
  266. GLOBAL(int)
  267. jsimd_can_h2v2_downsample(void)
  268. {
  269. init_simd();
  270. /* The code is optimised for these values only */
  271. if (BITS_IN_JSAMPLE != 8)
  272. return 0;
  273. if (sizeof(JDIMENSION) != 4)
  274. return 0;
  275. if (simd_support & JSIMD_AVX2)
  276. return 1;
  277. if (simd_support & JSIMD_SSE2)
  278. return 1;
  279. return 0;
  280. }
  281. GLOBAL(int)
  282. jsimd_can_h2v1_downsample(void)
  283. {
  284. init_simd();
  285. /* The code is optimised for these values only */
  286. if (BITS_IN_JSAMPLE != 8)
  287. return 0;
  288. if (sizeof(JDIMENSION) != 4)
  289. return 0;
  290. if (simd_support & JSIMD_AVX2)
  291. return 1;
  292. if (simd_support & JSIMD_SSE2)
  293. return 1;
  294. return 0;
  295. }
  296. GLOBAL(void)
  297. jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  298. JSAMPARRAY input_data, JSAMPARRAY output_data)
  299. {
  300. if (simd_support == ~0U)
  301. init_simd();
  302. if (simd_support & JSIMD_AVX2)
  303. jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  304. compptr->v_samp_factor,
  305. compptr->width_in_blocks, input_data,
  306. output_data);
  307. else
  308. jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  309. compptr->v_samp_factor,
  310. compptr->width_in_blocks, input_data,
  311. output_data);
  312. }
  313. GLOBAL(void)
  314. jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  315. JSAMPARRAY input_data, JSAMPARRAY output_data)
  316. {
  317. if (simd_support == ~0U)
  318. init_simd();
  319. if (simd_support & JSIMD_AVX2)
  320. jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  321. compptr->v_samp_factor,
  322. compptr->width_in_blocks, input_data,
  323. output_data);
  324. else
  325. jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  326. compptr->v_samp_factor,
  327. compptr->width_in_blocks, input_data,
  328. output_data);
  329. }
  330. GLOBAL(int)
  331. jsimd_can_h2v2_upsample(void)
  332. {
  333. init_simd();
  334. /* The code is optimised for these values only */
  335. if (BITS_IN_JSAMPLE != 8)
  336. return 0;
  337. if (sizeof(JDIMENSION) != 4)
  338. return 0;
  339. if (simd_support & JSIMD_AVX2)
  340. return 1;
  341. if (simd_support & JSIMD_SSE2)
  342. return 1;
  343. return 0;
  344. }
  345. GLOBAL(int)
  346. jsimd_can_h2v1_upsample(void)
  347. {
  348. init_simd();
  349. /* The code is optimised for these values only */
  350. if (BITS_IN_JSAMPLE != 8)
  351. return 0;
  352. if (sizeof(JDIMENSION) != 4)
  353. return 0;
  354. if (simd_support & JSIMD_AVX2)
  355. return 1;
  356. if (simd_support & JSIMD_SSE2)
  357. return 1;
  358. return 0;
  359. }
  360. GLOBAL(void)
  361. jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  362. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  363. {
  364. if (simd_support == ~0U)
  365. init_simd();
  366. if (simd_support & JSIMD_AVX2)
  367. jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  368. input_data, output_data_ptr);
  369. else
  370. jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  371. input_data, output_data_ptr);
  372. }
  373. GLOBAL(void)
  374. jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  375. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  376. {
  377. if (simd_support == ~0U)
  378. init_simd();
  379. if (simd_support & JSIMD_AVX2)
  380. jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  381. input_data, output_data_ptr);
  382. else
  383. jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  384. input_data, output_data_ptr);
  385. }
  386. GLOBAL(int)
  387. jsimd_can_h2v2_fancy_upsample(void)
  388. {
  389. init_simd();
  390. /* The code is optimised for these values only */
  391. if (BITS_IN_JSAMPLE != 8)
  392. return 0;
  393. if (sizeof(JDIMENSION) != 4)
  394. return 0;
  395. if ((simd_support & JSIMD_AVX2) &&
  396. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  397. return 1;
  398. if ((simd_support & JSIMD_SSE2) &&
  399. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  400. return 1;
  401. return 0;
  402. }
  403. GLOBAL(int)
  404. jsimd_can_h2v1_fancy_upsample(void)
  405. {
  406. init_simd();
  407. /* The code is optimised for these values only */
  408. if (BITS_IN_JSAMPLE != 8)
  409. return 0;
  410. if (sizeof(JDIMENSION) != 4)
  411. return 0;
  412. if ((simd_support & JSIMD_AVX2) &&
  413. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  414. return 1;
  415. if ((simd_support & JSIMD_SSE2) &&
  416. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  417. return 1;
  418. return 0;
  419. }
  420. GLOBAL(void)
  421. jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  422. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  423. {
  424. if (simd_support == ~0U)
  425. init_simd();
  426. if (simd_support & JSIMD_AVX2)
  427. jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  428. compptr->downsampled_width, input_data,
  429. output_data_ptr);
  430. else
  431. jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  432. compptr->downsampled_width, input_data,
  433. output_data_ptr);
  434. }
  435. GLOBAL(void)
  436. jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  437. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  438. {
  439. if (simd_support == ~0U)
  440. init_simd();
  441. if (simd_support & JSIMD_AVX2)
  442. jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  443. compptr->downsampled_width, input_data,
  444. output_data_ptr);
  445. else
  446. jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  447. compptr->downsampled_width, input_data,
  448. output_data_ptr);
  449. }
  450. GLOBAL(int)
  451. jsimd_can_h2v2_merged_upsample(void)
  452. {
  453. init_simd();
  454. /* The code is optimised for these values only */
  455. if (BITS_IN_JSAMPLE != 8)
  456. return 0;
  457. if (sizeof(JDIMENSION) != 4)
  458. return 0;
  459. if ((simd_support & JSIMD_AVX2) &&
  460. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  461. return 1;
  462. if ((simd_support & JSIMD_SSE2) &&
  463. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  464. return 1;
  465. return 0;
  466. }
  467. GLOBAL(int)
  468. jsimd_can_h2v1_merged_upsample(void)
  469. {
  470. init_simd();
  471. /* The code is optimised for these values only */
  472. if (BITS_IN_JSAMPLE != 8)
  473. return 0;
  474. if (sizeof(JDIMENSION) != 4)
  475. return 0;
  476. if ((simd_support & JSIMD_AVX2) &&
  477. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  478. return 1;
  479. if ((simd_support & JSIMD_SSE2) &&
  480. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  481. return 1;
  482. return 0;
  483. }
  484. GLOBAL(void)
  485. jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  486. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  487. {
  488. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  489. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  490. if (simd_support == ~0U)
  491. init_simd();
  492. switch (cinfo->out_color_space) {
  493. case JCS_EXT_RGB:
  494. avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
  495. sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
  496. break;
  497. case JCS_EXT_RGBX:
  498. case JCS_EXT_RGBA:
  499. avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
  500. sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
  501. break;
  502. case JCS_EXT_BGR:
  503. avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
  504. sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
  505. break;
  506. case JCS_EXT_BGRX:
  507. case JCS_EXT_BGRA:
  508. avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
  509. sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
  510. break;
  511. case JCS_EXT_XBGR:
  512. case JCS_EXT_ABGR:
  513. avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
  514. sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
  515. break;
  516. case JCS_EXT_XRGB:
  517. case JCS_EXT_ARGB:
  518. avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
  519. sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
  520. break;
  521. default:
  522. avx2fct = jsimd_h2v2_merged_upsample_avx2;
  523. sse2fct = jsimd_h2v2_merged_upsample_sse2;
  524. break;
  525. }
  526. if (simd_support & JSIMD_AVX2)
  527. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  528. else
  529. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  530. }
  531. GLOBAL(void)
  532. jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  533. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  534. {
  535. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  536. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  537. if (simd_support == ~0U)
  538. init_simd();
  539. switch (cinfo->out_color_space) {
  540. case JCS_EXT_RGB:
  541. avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
  542. sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
  543. break;
  544. case JCS_EXT_RGBX:
  545. case JCS_EXT_RGBA:
  546. avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
  547. sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
  548. break;
  549. case JCS_EXT_BGR:
  550. avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
  551. sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
  552. break;
  553. case JCS_EXT_BGRX:
  554. case JCS_EXT_BGRA:
  555. avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
  556. sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
  557. break;
  558. case JCS_EXT_XBGR:
  559. case JCS_EXT_ABGR:
  560. avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
  561. sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
  562. break;
  563. case JCS_EXT_XRGB:
  564. case JCS_EXT_ARGB:
  565. avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
  566. sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
  567. break;
  568. default:
  569. avx2fct = jsimd_h2v1_merged_upsample_avx2;
  570. sse2fct = jsimd_h2v1_merged_upsample_sse2;
  571. break;
  572. }
  573. if (simd_support & JSIMD_AVX2)
  574. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  575. else
  576. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  577. }
  578. GLOBAL(int)
  579. jsimd_can_convsamp(void)
  580. {
  581. init_simd();
  582. /* The code is optimised for these values only */
  583. if (DCTSIZE != 8)
  584. return 0;
  585. if (BITS_IN_JSAMPLE != 8)
  586. return 0;
  587. if (sizeof(JDIMENSION) != 4)
  588. return 0;
  589. if (sizeof(DCTELEM) != 2)
  590. return 0;
  591. if (simd_support & JSIMD_AVX2)
  592. return 1;
  593. if (simd_support & JSIMD_SSE2)
  594. return 1;
  595. return 0;
  596. }
  597. GLOBAL(int)
  598. jsimd_can_convsamp_float(void)
  599. {
  600. init_simd();
  601. /* The code is optimised for these values only */
  602. if (DCTSIZE != 8)
  603. return 0;
  604. if (BITS_IN_JSAMPLE != 8)
  605. return 0;
  606. if (sizeof(JDIMENSION) != 4)
  607. return 0;
  608. if (sizeof(FAST_FLOAT) != 4)
  609. return 0;
  610. if (simd_support & JSIMD_SSE2)
  611. return 1;
  612. return 0;
  613. }
  614. GLOBAL(void)
  615. jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
  616. DCTELEM *workspace)
  617. {
  618. if (simd_support == ~0U)
  619. init_simd();
  620. if (simd_support & JSIMD_AVX2)
  621. jsimd_convsamp_avx2(sample_data, start_col, workspace);
  622. else
  623. jsimd_convsamp_sse2(sample_data, start_col, workspace);
  624. }
  625. GLOBAL(void)
  626. jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
  627. FAST_FLOAT *workspace)
  628. {
  629. jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
  630. }
  631. GLOBAL(int)
  632. jsimd_can_fdct_islow(void)
  633. {
  634. init_simd();
  635. /* The code is optimised for these values only */
  636. if (DCTSIZE != 8)
  637. return 0;
  638. if (sizeof(DCTELEM) != 2)
  639. return 0;
  640. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
  641. return 1;
  642. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
  643. return 1;
  644. return 0;
  645. }
  646. GLOBAL(int)
  647. jsimd_can_fdct_ifast(void)
  648. {
  649. init_simd();
  650. /* The code is optimised for these values only */
  651. if (DCTSIZE != 8)
  652. return 0;
  653. if (sizeof(DCTELEM) != 2)
  654. return 0;
  655. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
  656. return 1;
  657. return 0;
  658. }
  659. GLOBAL(int)
  660. jsimd_can_fdct_float(void)
  661. {
  662. init_simd();
  663. /* The code is optimised for these values only */
  664. if (DCTSIZE != 8)
  665. return 0;
  666. if (sizeof(FAST_FLOAT) != 4)
  667. return 0;
  668. if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
  669. return 1;
  670. return 0;
  671. }
  672. GLOBAL(void)
  673. jsimd_fdct_islow(DCTELEM *data)
  674. {
  675. if (simd_support == ~0U)
  676. init_simd();
  677. if (simd_support & JSIMD_AVX2)
  678. jsimd_fdct_islow_avx2(data);
  679. else
  680. jsimd_fdct_islow_sse2(data);
  681. }
  682. GLOBAL(void)
  683. jsimd_fdct_ifast(DCTELEM *data)
  684. {
  685. jsimd_fdct_ifast_sse2(data);
  686. }
  687. GLOBAL(void)
  688. jsimd_fdct_float(FAST_FLOAT *data)
  689. {
  690. jsimd_fdct_float_sse(data);
  691. }
  692. GLOBAL(int)
  693. jsimd_can_quantize(void)
  694. {
  695. init_simd();
  696. /* The code is optimised for these values only */
  697. if (DCTSIZE != 8)
  698. return 0;
  699. if (sizeof(JCOEF) != 2)
  700. return 0;
  701. if (sizeof(DCTELEM) != 2)
  702. return 0;
  703. if (simd_support & JSIMD_AVX2)
  704. return 1;
  705. if (simd_support & JSIMD_SSE2)
  706. return 1;
  707. return 0;
  708. }
  709. GLOBAL(int)
  710. jsimd_can_quantize_float(void)
  711. {
  712. init_simd();
  713. /* The code is optimised for these values only */
  714. if (DCTSIZE != 8)
  715. return 0;
  716. if (sizeof(JCOEF) != 2)
  717. return 0;
  718. if (sizeof(FAST_FLOAT) != 4)
  719. return 0;
  720. if (simd_support & JSIMD_SSE2)
  721. return 1;
  722. return 0;
  723. }
  724. GLOBAL(void)
  725. jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
  726. {
  727. if (simd_support == ~0U)
  728. init_simd();
  729. if (simd_support & JSIMD_AVX2)
  730. jsimd_quantize_avx2(coef_block, divisors, workspace);
  731. else
  732. jsimd_quantize_sse2(coef_block, divisors, workspace);
  733. }
  734. GLOBAL(void)
  735. jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
  736. FAST_FLOAT *workspace)
  737. {
  738. jsimd_quantize_float_sse2(coef_block, divisors, workspace);
  739. }
  740. GLOBAL(int)
  741. jsimd_can_idct_2x2(void)
  742. {
  743. init_simd();
  744. /* The code is optimised for these values only */
  745. if (DCTSIZE != 8)
  746. return 0;
  747. if (sizeof(JCOEF) != 2)
  748. return 0;
  749. if (BITS_IN_JSAMPLE != 8)
  750. return 0;
  751. if (sizeof(JDIMENSION) != 4)
  752. return 0;
  753. if (sizeof(ISLOW_MULT_TYPE) != 2)
  754. return 0;
  755. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  756. return 1;
  757. return 0;
  758. }
  759. GLOBAL(int)
  760. jsimd_can_idct_4x4(void)
  761. {
  762. init_simd();
  763. /* The code is optimised for these values only */
  764. if (DCTSIZE != 8)
  765. return 0;
  766. if (sizeof(JCOEF) != 2)
  767. return 0;
  768. if (BITS_IN_JSAMPLE != 8)
  769. return 0;
  770. if (sizeof(JDIMENSION) != 4)
  771. return 0;
  772. if (sizeof(ISLOW_MULT_TYPE) != 2)
  773. return 0;
  774. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  775. return 1;
  776. return 0;
  777. }
  778. GLOBAL(void)
  779. jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  780. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  781. JDIMENSION output_col)
  782. {
  783. jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  784. }
  785. GLOBAL(void)
  786. jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  787. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  788. JDIMENSION output_col)
  789. {
  790. jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  791. }
  792. GLOBAL(int)
  793. jsimd_can_idct_islow(void)
  794. {
  795. init_simd();
  796. /* The code is optimised for these values only */
  797. if (DCTSIZE != 8)
  798. return 0;
  799. if (sizeof(JCOEF) != 2)
  800. return 0;
  801. if (BITS_IN_JSAMPLE != 8)
  802. return 0;
  803. if (sizeof(JDIMENSION) != 4)
  804. return 0;
  805. if (sizeof(ISLOW_MULT_TYPE) != 2)
  806. return 0;
  807. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
  808. return 1;
  809. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
  810. return 1;
  811. return 0;
  812. }
  813. GLOBAL(int)
  814. jsimd_can_idct_ifast(void)
  815. {
  816. init_simd();
  817. /* The code is optimised for these values only */
  818. if (DCTSIZE != 8)
  819. return 0;
  820. if (sizeof(JCOEF) != 2)
  821. return 0;
  822. if (BITS_IN_JSAMPLE != 8)
  823. return 0;
  824. if (sizeof(JDIMENSION) != 4)
  825. return 0;
  826. if (sizeof(IFAST_MULT_TYPE) != 2)
  827. return 0;
  828. if (IFAST_SCALE_BITS != 2)
  829. return 0;
  830. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
  831. return 1;
  832. return 0;
  833. }
  834. GLOBAL(int)
  835. jsimd_can_idct_float(void)
  836. {
  837. init_simd();
  838. if (DCTSIZE != 8)
  839. return 0;
  840. if (sizeof(JCOEF) != 2)
  841. return 0;
  842. if (BITS_IN_JSAMPLE != 8)
  843. return 0;
  844. if (sizeof(JDIMENSION) != 4)
  845. return 0;
  846. if (sizeof(FAST_FLOAT) != 4)
  847. return 0;
  848. if (sizeof(FLOAT_MULT_TYPE) != 4)
  849. return 0;
  850. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
  851. return 1;
  852. return 0;
  853. }
  854. GLOBAL(void)
  855. jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  856. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  857. JDIMENSION output_col)
  858. {
  859. if (simd_support == ~0U)
  860. init_simd();
  861. if (simd_support & JSIMD_AVX2)
  862. jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
  863. output_col);
  864. else
  865. jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
  866. output_col);
  867. }
  868. GLOBAL(void)
  869. jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  870. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  871. JDIMENSION output_col)
  872. {
  873. jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
  874. output_col);
  875. }
  876. GLOBAL(void)
  877. jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  878. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  879. JDIMENSION output_col)
  880. {
  881. jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
  882. output_col);
  883. }
  884. GLOBAL(int)
  885. jsimd_can_huff_encode_one_block(void)
  886. {
  887. init_simd();
  888. if (DCTSIZE != 8)
  889. return 0;
  890. if (sizeof(JCOEF) != 2)
  891. return 0;
  892. if ((simd_support & JSIMD_SSE2) && simd_huffman &&
  893. IS_ALIGNED_SSE(jconst_huff_encode_one_block))
  894. return 1;
  895. return 0;
  896. }
  897. GLOBAL(JOCTET *)
  898. jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
  899. int last_dc_val, c_derived_tbl *dctbl,
  900. c_derived_tbl *actbl)
  901. {
  902. return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
  903. dctbl, actbl);
  904. }
  905. GLOBAL(int)
  906. jsimd_can_encode_mcu_AC_first_prepare(void)
  907. {
  908. init_simd();
  909. if (DCTSIZE != 8)
  910. return 0;
  911. if (sizeof(JCOEF) != 2)
  912. return 0;
  913. if (simd_support & JSIMD_SSE2)
  914. return 1;
  915. return 0;
  916. }
  917. GLOBAL(void)
  918. jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
  919. const int *jpeg_natural_order_start, int Sl,
  920. int Al, UJCOEF *values, size_t *zerobits)
  921. {
  922. jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
  923. Sl, Al, values, zerobits);
  924. }
  925. GLOBAL(int)
  926. jsimd_can_encode_mcu_AC_refine_prepare(void)
  927. {
  928. init_simd();
  929. if (DCTSIZE != 8)
  930. return 0;
  931. if (sizeof(JCOEF) != 2)
  932. return 0;
  933. if (simd_support & JSIMD_SSE2)
  934. return 1;
  935. return 0;
  936. }
  937. GLOBAL(int)
  938. jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
  939. const int *jpeg_natural_order_start, int Sl,
  940. int Al, UJCOEF *absvalues, size_t *bits)
  941. {
  942. return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
  943. jpeg_natural_order_start,
  944. Sl, Al, absvalues, bits);
  945. }