jfdctint-neon.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. /*
  2. * Accurate integer FDCT (Arm Neon)
  3. *
  4. * Copyright (C) 2020, Arm Limited. All Rights Reserved.
  5. * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
  6. *
  7. * This software is provided 'as-is', without any express or implied
  8. * warranty. In no event will the authors be held liable for any damages
  9. * arising from the use of this software.
  10. *
  11. * Permission is granted to anyone to use this software for any purpose,
  12. * including commercial applications, and to alter it and redistribute it
  13. * freely, subject to the following restrictions:
  14. *
  15. * 1. The origin of this software must not be misrepresented; you must not
  16. * claim that you wrote the original software. If you use this software
  17. * in a product, an acknowledgment in the product documentation would be
  18. * appreciated but is not required.
  19. * 2. Altered source versions must be plainly marked as such, and must not be
  20. * misrepresented as being the original software.
  21. * 3. This notice may not be removed or altered from any source distribution.
  22. */
  23. #define JPEG_INTERNALS
  24. #include "../../src/jinclude.h"
  25. #include "../../src/jpeglib.h"
  26. #include "../../src/jsimd.h"
  27. #include "../../src/jdct.h"
  28. #include "../../src/jsimddct.h"
  29. #include "../jsimd.h"
  30. #include "align.h"
  31. #include "neon-compat.h"
  32. #include <arm_neon.h>
  33. /* jsimd_fdct_islow_neon() performs a slower but more accurate forward DCT
  34. * (Discrete Cosine Transform) on one block of samples. It uses the same
  35. * calculations and produces exactly the same output as IJG's original
  36. * jpeg_fdct_islow() function, which can be found in jfdctint.c.
  37. *
  38. * Scaled integer constants are used to avoid floating-point arithmetic:
  39. * 0.298631336 = 2446 * 2^-13
  40. * 0.390180644 = 3196 * 2^-13
  41. * 0.541196100 = 4433 * 2^-13
  42. * 0.765366865 = 6270 * 2^-13
  43. * 0.899976223 = 7373 * 2^-13
  44. * 1.175875602 = 9633 * 2^-13
  45. * 1.501321110 = 12299 * 2^-13
  46. * 1.847759065 = 15137 * 2^-13
  47. * 1.961570560 = 16069 * 2^-13
  48. * 2.053119869 = 16819 * 2^-13
  49. * 2.562915447 = 20995 * 2^-13
  50. * 3.072711026 = 25172 * 2^-13
  51. *
  52. * See jfdctint.c for further details of the DCT algorithm. Where possible,
  53. * the variable names and comments here in jsimd_fdct_islow_neon() match up
  54. * with those in jpeg_fdct_islow().
  55. */
  56. #define CONST_BITS 13
  57. #define PASS1_BITS 2
  58. #define DESCALE_P1 (CONST_BITS - PASS1_BITS)
  59. #define DESCALE_P2 (CONST_BITS + PASS1_BITS)
  60. #define F_0_298 2446
  61. #define F_0_390 3196
  62. #define F_0_541 4433
  63. #define F_0_765 6270
  64. #define F_0_899 7373
  65. #define F_1_175 9633
  66. #define F_1_501 12299
  67. #define F_1_847 15137
  68. #define F_1_961 16069
  69. #define F_2_053 16819
  70. #define F_2_562 20995
  71. #define F_3_072 25172
  72. ALIGN(16) static const int16_t jsimd_fdct_islow_neon_consts[] = {
  73. F_0_298, -F_0_390, F_0_541, F_0_765,
  74. -F_0_899, F_1_175, F_1_501, -F_1_847,
  75. -F_1_961, F_2_053, -F_2_562, F_3_072
  76. };
  77. void jsimd_fdct_islow_neon(DCTELEM *data)
  78. {
  79. /* Load DCT constants. */
  80. #ifdef HAVE_VLD1_S16_X3
  81. const int16x4x3_t consts = vld1_s16_x3(jsimd_fdct_islow_neon_consts);
  82. #else
  83. /* GCC does not currently support the intrinsic vld1_<type>_x3(). */
  84. const int16x4_t consts1 = vld1_s16(jsimd_fdct_islow_neon_consts);
  85. const int16x4_t consts2 = vld1_s16(jsimd_fdct_islow_neon_consts + 4);
  86. const int16x4_t consts3 = vld1_s16(jsimd_fdct_islow_neon_consts + 8);
  87. const int16x4x3_t consts = { { consts1, consts2, consts3 } };
  88. #endif
  89. /* Load an 8x8 block of samples into Neon registers. De-interleaving loads
  90. * are used, followed by vuzp to transpose the block such that we have a
  91. * column of samples per vector - allowing all rows to be processed at once.
  92. */
  93. int16x8x4_t s_rows_0123 = vld4q_s16(data);
  94. int16x8x4_t s_rows_4567 = vld4q_s16(data + 4 * DCTSIZE);
  95. int16x8x2_t cols_04 = vuzpq_s16(s_rows_0123.val[0], s_rows_4567.val[0]);
  96. int16x8x2_t cols_15 = vuzpq_s16(s_rows_0123.val[1], s_rows_4567.val[1]);
  97. int16x8x2_t cols_26 = vuzpq_s16(s_rows_0123.val[2], s_rows_4567.val[2]);
  98. int16x8x2_t cols_37 = vuzpq_s16(s_rows_0123.val[3], s_rows_4567.val[3]);
  99. int16x8_t col0 = cols_04.val[0];
  100. int16x8_t col1 = cols_15.val[0];
  101. int16x8_t col2 = cols_26.val[0];
  102. int16x8_t col3 = cols_37.val[0];
  103. int16x8_t col4 = cols_04.val[1];
  104. int16x8_t col5 = cols_15.val[1];
  105. int16x8_t col6 = cols_26.val[1];
  106. int16x8_t col7 = cols_37.val[1];
  107. /* Pass 1: process rows. */
  108. int16x8_t tmp0 = vaddq_s16(col0, col7);
  109. int16x8_t tmp7 = vsubq_s16(col0, col7);
  110. int16x8_t tmp1 = vaddq_s16(col1, col6);
  111. int16x8_t tmp6 = vsubq_s16(col1, col6);
  112. int16x8_t tmp2 = vaddq_s16(col2, col5);
  113. int16x8_t tmp5 = vsubq_s16(col2, col5);
  114. int16x8_t tmp3 = vaddq_s16(col3, col4);
  115. int16x8_t tmp4 = vsubq_s16(col3, col4);
  116. /* Even part */
  117. int16x8_t tmp10 = vaddq_s16(tmp0, tmp3);
  118. int16x8_t tmp13 = vsubq_s16(tmp0, tmp3);
  119. int16x8_t tmp11 = vaddq_s16(tmp1, tmp2);
  120. int16x8_t tmp12 = vsubq_s16(tmp1, tmp2);
  121. col0 = vshlq_n_s16(vaddq_s16(tmp10, tmp11), PASS1_BITS);
  122. col4 = vshlq_n_s16(vsubq_s16(tmp10, tmp11), PASS1_BITS);
  123. int16x8_t tmp12_add_tmp13 = vaddq_s16(tmp12, tmp13);
  124. int32x4_t z1_l =
  125. vmull_lane_s16(vget_low_s16(tmp12_add_tmp13), consts.val[0], 2);
  126. int32x4_t z1_h =
  127. vmull_lane_s16(vget_high_s16(tmp12_add_tmp13), consts.val[0], 2);
  128. int32x4_t col2_scaled_l =
  129. vmlal_lane_s16(z1_l, vget_low_s16(tmp13), consts.val[0], 3);
  130. int32x4_t col2_scaled_h =
  131. vmlal_lane_s16(z1_h, vget_high_s16(tmp13), consts.val[0], 3);
  132. col2 = vcombine_s16(vrshrn_n_s32(col2_scaled_l, DESCALE_P1),
  133. vrshrn_n_s32(col2_scaled_h, DESCALE_P1));
  134. int32x4_t col6_scaled_l =
  135. vmlal_lane_s16(z1_l, vget_low_s16(tmp12), consts.val[1], 3);
  136. int32x4_t col6_scaled_h =
  137. vmlal_lane_s16(z1_h, vget_high_s16(tmp12), consts.val[1], 3);
  138. col6 = vcombine_s16(vrshrn_n_s32(col6_scaled_l, DESCALE_P1),
  139. vrshrn_n_s32(col6_scaled_h, DESCALE_P1));
  140. /* Odd part */
  141. int16x8_t z1 = vaddq_s16(tmp4, tmp7);
  142. int16x8_t z2 = vaddq_s16(tmp5, tmp6);
  143. int16x8_t z3 = vaddq_s16(tmp4, tmp6);
  144. int16x8_t z4 = vaddq_s16(tmp5, tmp7);
  145. /* sqrt(2) * c3 */
  146. int32x4_t z5_l = vmull_lane_s16(vget_low_s16(z3), consts.val[1], 1);
  147. int32x4_t z5_h = vmull_lane_s16(vget_high_s16(z3), consts.val[1], 1);
  148. z5_l = vmlal_lane_s16(z5_l, vget_low_s16(z4), consts.val[1], 1);
  149. z5_h = vmlal_lane_s16(z5_h, vget_high_s16(z4), consts.val[1], 1);
  150. /* sqrt(2) * (-c1+c3+c5-c7) */
  151. int32x4_t tmp4_l = vmull_lane_s16(vget_low_s16(tmp4), consts.val[0], 0);
  152. int32x4_t tmp4_h = vmull_lane_s16(vget_high_s16(tmp4), consts.val[0], 0);
  153. /* sqrt(2) * ( c1+c3-c5+c7) */
  154. int32x4_t tmp5_l = vmull_lane_s16(vget_low_s16(tmp5), consts.val[2], 1);
  155. int32x4_t tmp5_h = vmull_lane_s16(vget_high_s16(tmp5), consts.val[2], 1);
  156. /* sqrt(2) * ( c1+c3+c5-c7) */
  157. int32x4_t tmp6_l = vmull_lane_s16(vget_low_s16(tmp6), consts.val[2], 3);
  158. int32x4_t tmp6_h = vmull_lane_s16(vget_high_s16(tmp6), consts.val[2], 3);
  159. /* sqrt(2) * ( c1+c3-c5-c7) */
  160. int32x4_t tmp7_l = vmull_lane_s16(vget_low_s16(tmp7), consts.val[1], 2);
  161. int32x4_t tmp7_h = vmull_lane_s16(vget_high_s16(tmp7), consts.val[1], 2);
  162. /* sqrt(2) * (c7-c3) */
  163. z1_l = vmull_lane_s16(vget_low_s16(z1), consts.val[1], 0);
  164. z1_h = vmull_lane_s16(vget_high_s16(z1), consts.val[1], 0);
  165. /* sqrt(2) * (-c1-c3) */
  166. int32x4_t z2_l = vmull_lane_s16(vget_low_s16(z2), consts.val[2], 2);
  167. int32x4_t z2_h = vmull_lane_s16(vget_high_s16(z2), consts.val[2], 2);
  168. /* sqrt(2) * (-c3-c5) */
  169. int32x4_t z3_l = vmull_lane_s16(vget_low_s16(z3), consts.val[2], 0);
  170. int32x4_t z3_h = vmull_lane_s16(vget_high_s16(z3), consts.val[2], 0);
  171. /* sqrt(2) * (c5-c3) */
  172. int32x4_t z4_l = vmull_lane_s16(vget_low_s16(z4), consts.val[0], 1);
  173. int32x4_t z4_h = vmull_lane_s16(vget_high_s16(z4), consts.val[0], 1);
  174. z3_l = vaddq_s32(z3_l, z5_l);
  175. z3_h = vaddq_s32(z3_h, z5_h);
  176. z4_l = vaddq_s32(z4_l, z5_l);
  177. z4_h = vaddq_s32(z4_h, z5_h);
  178. tmp4_l = vaddq_s32(tmp4_l, z1_l);
  179. tmp4_h = vaddq_s32(tmp4_h, z1_h);
  180. tmp4_l = vaddq_s32(tmp4_l, z3_l);
  181. tmp4_h = vaddq_s32(tmp4_h, z3_h);
  182. col7 = vcombine_s16(vrshrn_n_s32(tmp4_l, DESCALE_P1),
  183. vrshrn_n_s32(tmp4_h, DESCALE_P1));
  184. tmp5_l = vaddq_s32(tmp5_l, z2_l);
  185. tmp5_h = vaddq_s32(tmp5_h, z2_h);
  186. tmp5_l = vaddq_s32(tmp5_l, z4_l);
  187. tmp5_h = vaddq_s32(tmp5_h, z4_h);
  188. col5 = vcombine_s16(vrshrn_n_s32(tmp5_l, DESCALE_P1),
  189. vrshrn_n_s32(tmp5_h, DESCALE_P1));
  190. tmp6_l = vaddq_s32(tmp6_l, z2_l);
  191. tmp6_h = vaddq_s32(tmp6_h, z2_h);
  192. tmp6_l = vaddq_s32(tmp6_l, z3_l);
  193. tmp6_h = vaddq_s32(tmp6_h, z3_h);
  194. col3 = vcombine_s16(vrshrn_n_s32(tmp6_l, DESCALE_P1),
  195. vrshrn_n_s32(tmp6_h, DESCALE_P1));
  196. tmp7_l = vaddq_s32(tmp7_l, z1_l);
  197. tmp7_h = vaddq_s32(tmp7_h, z1_h);
  198. tmp7_l = vaddq_s32(tmp7_l, z4_l);
  199. tmp7_h = vaddq_s32(tmp7_h, z4_h);
  200. col1 = vcombine_s16(vrshrn_n_s32(tmp7_l, DESCALE_P1),
  201. vrshrn_n_s32(tmp7_h, DESCALE_P1));
  202. /* Transpose to work on columns in pass 2. */
  203. int16x8x2_t cols_01 = vtrnq_s16(col0, col1);
  204. int16x8x2_t cols_23 = vtrnq_s16(col2, col3);
  205. int16x8x2_t cols_45 = vtrnq_s16(col4, col5);
  206. int16x8x2_t cols_67 = vtrnq_s16(col6, col7);
  207. int32x4x2_t cols_0145_l = vtrnq_s32(vreinterpretq_s32_s16(cols_01.val[0]),
  208. vreinterpretq_s32_s16(cols_45.val[0]));
  209. int32x4x2_t cols_0145_h = vtrnq_s32(vreinterpretq_s32_s16(cols_01.val[1]),
  210. vreinterpretq_s32_s16(cols_45.val[1]));
  211. int32x4x2_t cols_2367_l = vtrnq_s32(vreinterpretq_s32_s16(cols_23.val[0]),
  212. vreinterpretq_s32_s16(cols_67.val[0]));
  213. int32x4x2_t cols_2367_h = vtrnq_s32(vreinterpretq_s32_s16(cols_23.val[1]),
  214. vreinterpretq_s32_s16(cols_67.val[1]));
  215. int32x4x2_t rows_04 = vzipq_s32(cols_0145_l.val[0], cols_2367_l.val[0]);
  216. int32x4x2_t rows_15 = vzipq_s32(cols_0145_h.val[0], cols_2367_h.val[0]);
  217. int32x4x2_t rows_26 = vzipq_s32(cols_0145_l.val[1], cols_2367_l.val[1]);
  218. int32x4x2_t rows_37 = vzipq_s32(cols_0145_h.val[1], cols_2367_h.val[1]);
  219. int16x8_t row0 = vreinterpretq_s16_s32(rows_04.val[0]);
  220. int16x8_t row1 = vreinterpretq_s16_s32(rows_15.val[0]);
  221. int16x8_t row2 = vreinterpretq_s16_s32(rows_26.val[0]);
  222. int16x8_t row3 = vreinterpretq_s16_s32(rows_37.val[0]);
  223. int16x8_t row4 = vreinterpretq_s16_s32(rows_04.val[1]);
  224. int16x8_t row5 = vreinterpretq_s16_s32(rows_15.val[1]);
  225. int16x8_t row6 = vreinterpretq_s16_s32(rows_26.val[1]);
  226. int16x8_t row7 = vreinterpretq_s16_s32(rows_37.val[1]);
  227. /* Pass 2: process columns. */
  228. tmp0 = vaddq_s16(row0, row7);
  229. tmp7 = vsubq_s16(row0, row7);
  230. tmp1 = vaddq_s16(row1, row6);
  231. tmp6 = vsubq_s16(row1, row6);
  232. tmp2 = vaddq_s16(row2, row5);
  233. tmp5 = vsubq_s16(row2, row5);
  234. tmp3 = vaddq_s16(row3, row4);
  235. tmp4 = vsubq_s16(row3, row4);
  236. /* Even part */
  237. tmp10 = vaddq_s16(tmp0, tmp3);
  238. tmp13 = vsubq_s16(tmp0, tmp3);
  239. tmp11 = vaddq_s16(tmp1, tmp2);
  240. tmp12 = vsubq_s16(tmp1, tmp2);
  241. row0 = vrshrq_n_s16(vaddq_s16(tmp10, tmp11), PASS1_BITS);
  242. row4 = vrshrq_n_s16(vsubq_s16(tmp10, tmp11), PASS1_BITS);
  243. tmp12_add_tmp13 = vaddq_s16(tmp12, tmp13);
  244. z1_l = vmull_lane_s16(vget_low_s16(tmp12_add_tmp13), consts.val[0], 2);
  245. z1_h = vmull_lane_s16(vget_high_s16(tmp12_add_tmp13), consts.val[0], 2);
  246. int32x4_t row2_scaled_l =
  247. vmlal_lane_s16(z1_l, vget_low_s16(tmp13), consts.val[0], 3);
  248. int32x4_t row2_scaled_h =
  249. vmlal_lane_s16(z1_h, vget_high_s16(tmp13), consts.val[0], 3);
  250. row2 = vcombine_s16(vrshrn_n_s32(row2_scaled_l, DESCALE_P2),
  251. vrshrn_n_s32(row2_scaled_h, DESCALE_P2));
  252. int32x4_t row6_scaled_l =
  253. vmlal_lane_s16(z1_l, vget_low_s16(tmp12), consts.val[1], 3);
  254. int32x4_t row6_scaled_h =
  255. vmlal_lane_s16(z1_h, vget_high_s16(tmp12), consts.val[1], 3);
  256. row6 = vcombine_s16(vrshrn_n_s32(row6_scaled_l, DESCALE_P2),
  257. vrshrn_n_s32(row6_scaled_h, DESCALE_P2));
  258. /* Odd part */
  259. z1 = vaddq_s16(tmp4, tmp7);
  260. z2 = vaddq_s16(tmp5, tmp6);
  261. z3 = vaddq_s16(tmp4, tmp6);
  262. z4 = vaddq_s16(tmp5, tmp7);
  263. /* sqrt(2) * c3 */
  264. z5_l = vmull_lane_s16(vget_low_s16(z3), consts.val[1], 1);
  265. z5_h = vmull_lane_s16(vget_high_s16(z3), consts.val[1], 1);
  266. z5_l = vmlal_lane_s16(z5_l, vget_low_s16(z4), consts.val[1], 1);
  267. z5_h = vmlal_lane_s16(z5_h, vget_high_s16(z4), consts.val[1], 1);
  268. /* sqrt(2) * (-c1+c3+c5-c7) */
  269. tmp4_l = vmull_lane_s16(vget_low_s16(tmp4), consts.val[0], 0);
  270. tmp4_h = vmull_lane_s16(vget_high_s16(tmp4), consts.val[0], 0);
  271. /* sqrt(2) * ( c1+c3-c5+c7) */
  272. tmp5_l = vmull_lane_s16(vget_low_s16(tmp5), consts.val[2], 1);
  273. tmp5_h = vmull_lane_s16(vget_high_s16(tmp5), consts.val[2], 1);
  274. /* sqrt(2) * ( c1+c3+c5-c7) */
  275. tmp6_l = vmull_lane_s16(vget_low_s16(tmp6), consts.val[2], 3);
  276. tmp6_h = vmull_lane_s16(vget_high_s16(tmp6), consts.val[2], 3);
  277. /* sqrt(2) * ( c1+c3-c5-c7) */
  278. tmp7_l = vmull_lane_s16(vget_low_s16(tmp7), consts.val[1], 2);
  279. tmp7_h = vmull_lane_s16(vget_high_s16(tmp7), consts.val[1], 2);
  280. /* sqrt(2) * (c7-c3) */
  281. z1_l = vmull_lane_s16(vget_low_s16(z1), consts.val[1], 0);
  282. z1_h = vmull_lane_s16(vget_high_s16(z1), consts.val[1], 0);
  283. /* sqrt(2) * (-c1-c3) */
  284. z2_l = vmull_lane_s16(vget_low_s16(z2), consts.val[2], 2);
  285. z2_h = vmull_lane_s16(vget_high_s16(z2), consts.val[2], 2);
  286. /* sqrt(2) * (-c3-c5) */
  287. z3_l = vmull_lane_s16(vget_low_s16(z3), consts.val[2], 0);
  288. z3_h = vmull_lane_s16(vget_high_s16(z3), consts.val[2], 0);
  289. /* sqrt(2) * (c5-c3) */
  290. z4_l = vmull_lane_s16(vget_low_s16(z4), consts.val[0], 1);
  291. z4_h = vmull_lane_s16(vget_high_s16(z4), consts.val[0], 1);
  292. z3_l = vaddq_s32(z3_l, z5_l);
  293. z3_h = vaddq_s32(z3_h, z5_h);
  294. z4_l = vaddq_s32(z4_l, z5_l);
  295. z4_h = vaddq_s32(z4_h, z5_h);
  296. tmp4_l = vaddq_s32(tmp4_l, z1_l);
  297. tmp4_h = vaddq_s32(tmp4_h, z1_h);
  298. tmp4_l = vaddq_s32(tmp4_l, z3_l);
  299. tmp4_h = vaddq_s32(tmp4_h, z3_h);
  300. row7 = vcombine_s16(vrshrn_n_s32(tmp4_l, DESCALE_P2),
  301. vrshrn_n_s32(tmp4_h, DESCALE_P2));
  302. tmp5_l = vaddq_s32(tmp5_l, z2_l);
  303. tmp5_h = vaddq_s32(tmp5_h, z2_h);
  304. tmp5_l = vaddq_s32(tmp5_l, z4_l);
  305. tmp5_h = vaddq_s32(tmp5_h, z4_h);
  306. row5 = vcombine_s16(vrshrn_n_s32(tmp5_l, DESCALE_P2),
  307. vrshrn_n_s32(tmp5_h, DESCALE_P2));
  308. tmp6_l = vaddq_s32(tmp6_l, z2_l);
  309. tmp6_h = vaddq_s32(tmp6_h, z2_h);
  310. tmp6_l = vaddq_s32(tmp6_l, z3_l);
  311. tmp6_h = vaddq_s32(tmp6_h, z3_h);
  312. row3 = vcombine_s16(vrshrn_n_s32(tmp6_l, DESCALE_P2),
  313. vrshrn_n_s32(tmp6_h, DESCALE_P2));
  314. tmp7_l = vaddq_s32(tmp7_l, z1_l);
  315. tmp7_h = vaddq_s32(tmp7_h, z1_h);
  316. tmp7_l = vaddq_s32(tmp7_l, z4_l);
  317. tmp7_h = vaddq_s32(tmp7_h, z4_h);
  318. row1 = vcombine_s16(vrshrn_n_s32(tmp7_l, DESCALE_P2),
  319. vrshrn_n_s32(tmp7_h, DESCALE_P2));
  320. vst1q_s16(data + 0 * DCTSIZE, row0);
  321. vst1q_s16(data + 1 * DCTSIZE, row1);
  322. vst1q_s16(data + 2 * DCTSIZE, row2);
  323. vst1q_s16(data + 3 * DCTSIZE, row3);
  324. vst1q_s16(data + 4 * DCTSIZE, row4);
  325. vst1q_s16(data + 5 * DCTSIZE, row5);
  326. vst1q_s16(data + 6 * DCTSIZE, row6);
  327. vst1q_s16(data + 7 * DCTSIZE, row7);
  328. }