mirror of
https://github.com/libjpeg-turbo/libjpeg-turbo.git
synced 2026-01-12 00:04:17 +08:00
MMI: Format comments consistently with MMX code
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
* Loongson MMI optimizations for libjpeg-turbo
|
||||
*
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2015, 2019, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2015, 2019, 2025, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
|
||||
* All Rights Reserved.
|
||||
* Authors: ZhuChen <zhuchen@loongson.cn>
|
||||
@@ -208,29 +208,39 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
|
||||
#if RGB_PIXELSIZE == 3
|
||||
|
||||
/* mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) */
|
||||
/* mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) */
|
||||
mmA = _mm_unpacklo_pi8(mmA, mmC); /* (00 10 02 12 04 14 06 16) */
|
||||
mmE = _mm_unpacklo_pi8(mmE, mmB); /* (20 01 22 03 24 05 26 07) */
|
||||
mmD = _mm_unpacklo_pi8(mmD, mmF); /* (11 21 13 23 15 25 17 27) */
|
||||
/* NOTE: The values of RGB_RED, RGB_GREEN, and RGB_BLUE determine the
|
||||
* mapping of components A, B, and C to red, green, and blue.
|
||||
*
|
||||
* mmA = (A0 A2 A4 A6 ** ** ** **) = AE
|
||||
* mmB = (A1 A3 A5 A7 ** ** ** **) = AO
|
||||
* mmC = (B0 B2 B4 B6 ** ** ** **) = BE
|
||||
* mmD = (B1 B3 B5 B7 ** ** ** **) = BO
|
||||
* mmE = (C0 C2 C4 C6 ** ** ** **) = CE
|
||||
* mmF = (C1 C3 C5 C7 ** ** ** **) = CO
|
||||
* mmG = (** ** ** ** ** ** ** **)
|
||||
* mmH = (** ** ** ** ** ** ** **)
|
||||
*/
|
||||
mmA = _mm_unpacklo_pi8(mmA, mmC); /* (A0 B0 A2 B2 A4 B4 A6 B6) */
|
||||
mmE = _mm_unpacklo_pi8(mmE, mmB); /* (C0 A1 C2 A3 C4 A5 C6 A7) */
|
||||
mmD = _mm_unpacklo_pi8(mmD, mmF); /* (B1 C1 B3 C3 B5 C5 B7 C7) */
|
||||
|
||||
mmH = _mm_srli_si64(mmA, 2 * BYTE_BIT);
|
||||
|
||||
mmG = _mm_unpackhi_pi16(mmA, mmE); /* (04 14 24 05 06 16 26 07) */
|
||||
mmA = _mm_unpacklo_pi16(mmA, mmE); /* (00 10 20 01 02 12 22 03) */
|
||||
mmG = _mm_unpackhi_pi16(mmA, mmE); /* (A4 B4 C4 A5 A6 B6 C6 A7) */
|
||||
mmA = _mm_unpacklo_pi16(mmA, mmE); /* (A0 B0 C0 A1 A2 B2 C2 A3) */
|
||||
|
||||
mmE = _mm_srli_si64(mmE, 2 * BYTE_BIT);
|
||||
mmB = _mm_srli_si64(mmD, 2 * BYTE_BIT); /* (13 23 15 25 17 27 -- --) */
|
||||
mmB = _mm_srli_si64(mmD, 2 * BYTE_BIT); /* (B3 C3 B5 C5 B7 C7 -- --) */
|
||||
|
||||
mmC = _mm_unpackhi_pi16(mmD, mmH); /* (15 25 06 16 17 27 -- --) */
|
||||
mmD = _mm_unpacklo_pi16(mmD, mmH); /* (11 21 02 12 13 23 04 14) */
|
||||
mmC = _mm_unpackhi_pi16(mmD, mmH); /* (B5 C5 A6 B6 B7 C7 -- --) */
|
||||
mmD = _mm_unpacklo_pi16(mmD, mmH); /* (B1 C1 A2 B2 B3 C3 A4 B4) */
|
||||
|
||||
mmF = _mm_unpackhi_pi16(mmE, mmB); /* (26 07 17 27 -- -- -- --) */
|
||||
mmE = _mm_unpacklo_pi16(mmE, mmB); /* (22 03 13 23 24 05 15 25) */
|
||||
mmF = _mm_unpackhi_pi16(mmE, mmB); /* (C6 A7 B7 C7 -- -- -- --) */
|
||||
mmE = _mm_unpacklo_pi16(mmE, mmB); /* (C2 A3 B3 C3 C4 A5 B5 C5) */
|
||||
|
||||
mmA = _mm_unpacklo_pi32(mmA, mmD); /* (00 10 20 01 11 21 02 12) */
|
||||
mmE = _mm_unpacklo_pi32(mmE, mmG); /* (22 03 13 23 04 14 24 05) */
|
||||
mmC = _mm_unpacklo_pi32(mmC, mmF); /* (15 25 06 16 26 07 17 27) */
|
||||
mmA = _mm_unpacklo_pi32(mmA, mmD); /* (A0 B0 C0 A1 B1 C1 A2 B2) */
|
||||
mmE = _mm_unpacklo_pi32(mmE, mmG); /* (C2 A3 B3 C3 A4 B4 C4 A5) */
|
||||
mmC = _mm_unpacklo_pi32(mmC, mmF); /* (B5 C5 A6 B6 C6 A7 B7 C7) */
|
||||
|
||||
if (num_cols >= 8) {
|
||||
if (!(((long)outptr) & 7)) {
|
||||
@@ -320,25 +330,33 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
xe = _mm_xor_si64(xe, xe);
|
||||
xo = _mm_xor_si64(xo, xo);
|
||||
#endif
|
||||
/* mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) */
|
||||
/* mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) */
|
||||
/* mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) */
|
||||
/* mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) */
|
||||
|
||||
mmA = _mm_unpacklo_pi8(mmA, mmC); /* (00 10 02 12 04 14 06 16) */
|
||||
mmE = _mm_unpacklo_pi8(mmE, mmG); /* (20 30 22 32 24 34 26 36) */
|
||||
mmB = _mm_unpacklo_pi8(mmB, mmD); /* (01 11 03 13 05 15 07 17) */
|
||||
mmF = _mm_unpacklo_pi8(mmF, mmH); /* (21 31 23 33 25 35 27 37) */
|
||||
/* NOTE: The values of RGB_RED, RGB_GREEN, and RGB_BLUE determine the
|
||||
* mapping of components A, B, C, and D to red, green, and blue.
|
||||
*
|
||||
* mmA = (A0 A2 A4 A6 ** ** ** **) = AE
|
||||
* mmB = (A1 A3 A5 A7 ** ** ** **) = AO
|
||||
* mmC = (B0 B2 B4 B6 ** ** ** **) = BE
|
||||
* mmD = (B1 B3 B5 B7 ** ** ** **) = BO
|
||||
* mmE = (C0 C2 C4 C6 ** ** ** **) = CE
|
||||
* mmF = (C1 C3 C5 C7 ** ** ** **) = CO
|
||||
* mmG = (D0 D2 D4 D6 ** ** ** **) = DE
|
||||
* mmH = (D1 D3 D5 D7 ** ** ** **) = DO
|
||||
*/
|
||||
mmA = _mm_unpacklo_pi8(mmA, mmC); /* (A0 B0 A2 B2 A4 B4 A6 B6) */
|
||||
mmE = _mm_unpacklo_pi8(mmE, mmG); /* (C0 D0 C2 D2 C4 D4 C6 D6) */
|
||||
mmB = _mm_unpacklo_pi8(mmB, mmD); /* (A1 B1 A3 B3 A5 B5 A7 B7) */
|
||||
mmF = _mm_unpacklo_pi8(mmF, mmH); /* (C1 D1 C3 D3 C5 D5 C7 D7) */
|
||||
|
||||
mmC = _mm_unpackhi_pi16(mmA, mmE); /* (04 14 24 34 06 16 26 36) */
|
||||
mmA = _mm_unpacklo_pi16(mmA, mmE); /* (00 10 20 30 02 12 22 32) */
|
||||
mmG = _mm_unpackhi_pi16(mmB, mmF); /* (05 15 25 35 07 17 27 37) */
|
||||
mmB = _mm_unpacklo_pi16(mmB, mmF); /* (01 11 21 31 03 13 23 33) */
|
||||
mmC = _mm_unpackhi_pi16(mmA, mmE); /* (A4 B4 C4 D4 A6 B6 C6 D6) */
|
||||
mmA = _mm_unpacklo_pi16(mmA, mmE); /* (A0 B0 C0 D0 A2 B2 C2 D2) */
|
||||
mmG = _mm_unpackhi_pi16(mmB, mmF); /* (A5 B5 C5 D5 A7 B7 C7 D7) */
|
||||
mmB = _mm_unpacklo_pi16(mmB, mmF); /* (A1 B1 C1 D1 A3 B3 C3 D3) */
|
||||
|
||||
mmD = _mm_unpackhi_pi32(mmA, mmB); /* (02 12 22 32 03 13 23 33) */
|
||||
mmA = _mm_unpacklo_pi32(mmA, mmB); /* (00 10 20 30 01 11 21 31) */
|
||||
mmH = _mm_unpackhi_pi32(mmC, mmG); /* (06 16 26 36 07 17 27 37) */
|
||||
mmC = _mm_unpacklo_pi32(mmC, mmG); /* (04 14 24 34 05 15 25 35) */
|
||||
mmD = _mm_unpackhi_pi32(mmA, mmB); /* (A2 B2 C2 D2 A3 B3 C3 D3) */
|
||||
mmA = _mm_unpacklo_pi32(mmA, mmB); /* (A0 B0 C0 D0 A1 B1 C1 D1) */
|
||||
mmH = _mm_unpackhi_pi32(mmC, mmG); /* (A6 B6 C6 D6 A7 B7 C7 D7) */
|
||||
mmC = _mm_unpacklo_pi32(mmC, mmG); /* (A4 B4 C4 D4 A5 B5 C5 D5) */
|
||||
|
||||
if (num_cols >= 8) {
|
||||
if (!(((long)outptr) & 7)) {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* Loongson MMI optimizations for libjpeg-turbo
|
||||
*
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2015, 2019, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2015, 2019, 2025, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
|
||||
* All Rights Reserved.
|
||||
* Authors: ZhangLixia <zhanglixia-hf@loongson.cn>
|
||||
@@ -220,38 +220,47 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
|
||||
#if RGB_PIXELSIZE == 3
|
||||
|
||||
/* mmA=(00 02 04 06 08 0A 0C 0E), mmB=(01 03 05 07 09 0B 0D 0F) */
|
||||
/* mmC=(10 12 14 16 18 1A 1C 1E), mmD=(11 13 15 17 19 1B 1D 1F) */
|
||||
/* mmE=(20 22 24 26 28 2A 2C 2E), mmF=(21 23 25 27 29 2B 2D 2F) */
|
||||
mmG = _mm_unpacklo_pi8(mmA, mmC); /* (00 10 02 12 04 14 06 16) */
|
||||
mmA = _mm_unpackhi_pi8(mmA, mmC); /* (08 18 0A 1A 0C 1C 0E 1E) */
|
||||
mmH = _mm_unpacklo_pi8(mmE, mmB); /* (20 01 22 03 24 05 26 07) */
|
||||
mmE = _mm_unpackhi_pi8(mmE, mmB); /* (28 09 2A 0B 2C 0D 2E 0F) */
|
||||
mmC = _mm_unpacklo_pi8(mmD, mmF); /* (11 21 13 23 15 25 17 27) */
|
||||
mmD = _mm_unpackhi_pi8(mmD, mmF); /* (19 29 1B 2B 1D 2D 1F 2F) */
|
||||
/* NOTE: The values of RGB_RED, RGB_GREEN, and RGB_BLUE determine the
|
||||
* mapping of components A, B, and C to red, green, and blue.
|
||||
*
|
||||
* mmA = (A0 A2 A4 A6 A8 Aa Ac Ae) = AE
|
||||
* mmB = (A1 A3 A5 A7 A9 Ab Ad Af) = AO
|
||||
* mmC = (B0 B2 B4 B6 B8 Ba Bc Be) = BE
|
||||
* mmD = (B1 B3 B5 B7 B9 Bb Bd Bf) = BO
|
||||
* mmE = (C0 C2 C4 C6 C8 Ca Cc Ce) = CE
|
||||
* mmF = (C1 C3 C5 C7 C9 Cb Cd Cf) = CO
|
||||
* mmG = (** ** ** ** ** ** ** **)
|
||||
* mmH = (** ** ** ** ** ** ** **)
|
||||
*/
|
||||
mmG = _mm_unpacklo_pi8(mmA, mmC); /* (A0 B0 A2 B2 A4 B4 A6 B6) */
|
||||
mmA = _mm_unpackhi_pi8(mmA, mmC); /* (A8 B8 Aa Ba Ac Bc Ae Be) */
|
||||
mmH = _mm_unpacklo_pi8(mmE, mmB); /* (C0 A1 C2 A3 C4 A5 C6 A7) */
|
||||
mmE = _mm_unpackhi_pi8(mmE, mmB); /* (C8 A9 Ca Ab Cc Ad Ce Af) */
|
||||
mmC = _mm_unpacklo_pi8(mmD, mmF); /* (B1 C1 B3 C3 B5 C5 B7 C7) */
|
||||
mmD = _mm_unpackhi_pi8(mmD, mmF); /* (B9 C9 Bb Cb Bd Cd Bf Cf) */
|
||||
|
||||
mmB = _mm_unpacklo_pi16(mmG, mmA); /* (00 10 08 18 02 12 0A 1A) */
|
||||
mmA = _mm_unpackhi_pi16(mmG, mmA); /* (04 14 0C 1C 06 16 0E 1E) */
|
||||
mmF = _mm_unpacklo_pi16(mmH, mmE); /* (20 01 28 09 22 03 2A 0B) */
|
||||
mmE = _mm_unpackhi_pi16(mmH, mmE); /* (24 05 2C 0D 26 07 2E 0F) */
|
||||
mmH = _mm_unpacklo_pi16(mmC, mmD); /* (11 21 19 29 13 23 1B 2B) */
|
||||
mmG = _mm_unpackhi_pi16(mmC, mmD); /* (15 25 1D 2D 17 27 1F 2F) */
|
||||
mmB = _mm_unpacklo_pi16(mmG, mmA); /* (A0 B0 A8 B8 A2 B2 Aa Ba) */
|
||||
mmA = _mm_unpackhi_pi16(mmG, mmA); /* (A4 B4 Ac Bc A6 B6 Ae Be) */
|
||||
mmF = _mm_unpacklo_pi16(mmH, mmE); /* (C0 A1 C8 A9 C2 A3 Ca Ab) */
|
||||
mmE = _mm_unpackhi_pi16(mmH, mmE); /* (C4 A5 Cc Ad C6 A7 Ce Af) */
|
||||
mmH = _mm_unpacklo_pi16(mmC, mmD); /* (B1 C1 B9 C9 B3 C3 Bb Cb) */
|
||||
mmG = _mm_unpackhi_pi16(mmC, mmD); /* (B5 C5 Bd Cd B7 C7 Bf Cf) */
|
||||
|
||||
mmC = _mm_unpacklo_pi16(mmB, mmF); /* (00 10 20 01 08 18 28 09) */
|
||||
mmC = _mm_unpacklo_pi16(mmB, mmF); /* (A0 B0 C0 A1 A8 B8 C8 A9) */
|
||||
mmB = _mm_srli_si64(mmB, 4 * BYTE_BIT);
|
||||
mmB = _mm_unpacklo_pi16(mmH, mmB); /* (11 21 02 12 19 29 0A 1A) */
|
||||
mmD = _mm_unpackhi_pi16(mmF, mmH); /* (22 03 13 23 2A 0B 1B 2B) */
|
||||
mmF = _mm_unpacklo_pi16(mmA, mmE); /* (04 14 24 05 0C 1C 2C 0D) */
|
||||
mmB = _mm_unpacklo_pi16(mmH, mmB); /* (B1 C1 A2 B2 B9 C9 Aa Ba) */
|
||||
mmD = _mm_unpackhi_pi16(mmF, mmH); /* (C2 A3 B3 C3 Ca Ab Bb Cb) */
|
||||
mmF = _mm_unpacklo_pi16(mmA, mmE); /* (A4 B4 C4 A5 Ac Bc Cc Ad) */
|
||||
mmA = _mm_srli_si64(mmA, 4 * BYTE_BIT);
|
||||
mmH = _mm_unpacklo_pi16(mmG, mmA); /* (15 25 06 16 1D 2D 0E 1E) */
|
||||
mmG = _mm_unpackhi_pi16(mmE, mmG); /* (26 07 17 27 2E 0F 1F 2F) */
|
||||
mmH = _mm_unpacklo_pi16(mmG, mmA); /* (B5 C5 A6 B6 Bd Cd Ae Be) */
|
||||
mmG = _mm_unpackhi_pi16(mmE, mmG); /* (C6 A7 B7 C7 Ce Af Bf Cf) */
|
||||
|
||||
mmA = _mm_unpacklo_pi32(mmC, mmB); /* (00 10 20 01 11 21 02 12) */
|
||||
mmE = _mm_unpackhi_pi32(mmC, mmB); /* (08 18 28 09 19 29 0A 1A) */
|
||||
mmB = _mm_unpacklo_pi32(mmD, mmF); /* (22 03 13 23 04 14 24 05) */
|
||||
mmF = _mm_unpackhi_pi32(mmD, mmF); /* (2A 0B 1B 2B 0C 1C 2C 0D) */
|
||||
mmC = _mm_unpacklo_pi32(mmH, mmG); /* (15 25 06 16 26 07 17 27) */
|
||||
mmG = _mm_unpackhi_pi32(mmH, mmG); /* (1D 2D 0E 1E 2E 0F 1F 2F) */
|
||||
mmA = _mm_unpacklo_pi32(mmC, mmB); /* (A0 B0 C0 A1 B1 C1 A2 B2) */
|
||||
mmE = _mm_unpackhi_pi32(mmC, mmB); /* (A8 B8 C8 A9 B9 C9 Aa Ba) */
|
||||
mmB = _mm_unpacklo_pi32(mmD, mmF); /* (C2 A3 B3 C3 A4 B4 C4 A5) */
|
||||
mmF = _mm_unpackhi_pi32(mmD, mmF); /* (Ca Ab Bb Cb Ac Bc Cc Ad) */
|
||||
mmC = _mm_unpacklo_pi32(mmH, mmG); /* (B5 C5 A6 B6 C6 A7 B7 C7) */
|
||||
mmG = _mm_unpackhi_pi32(mmH, mmG); /* (Bd Cd Ae Be Ce Af Bf Cf) */
|
||||
|
||||
if (num_cols >= 8) {
|
||||
if (!(((long)outptr) & 7)) {
|
||||
@@ -367,40 +376,48 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
xe = _mm_xor_si64(xe, xe);
|
||||
xo = _mm_xor_si64(xo, xo);
|
||||
#endif
|
||||
/* mmA=(00 02 04 06 08 0A 0C 0E), mmB=(01 03 05 07 09 0B 0D 0F) */
|
||||
/* mmC=(10 12 14 16 18 1A 1C 1E), mmD=(11 13 15 17 19 1B 1D 1F) */
|
||||
/* mmE=(20 22 24 26 28 2A 2C 2E), mmF=(21 23 25 27 29 2B 2D 2F) */
|
||||
/* mmG=(30 32 34 36 38 3A 3C 3E), mmH=(31 33 35 37 39 3B 3D 3F) */
|
||||
|
||||
mm8 = _mm_unpacklo_pi8(mmA, mmC); /* (00 10 02 12 04 14 06 16) */
|
||||
mm9 = _mm_unpackhi_pi8(mmA, mmC); /* (08 18 0A 1A 0C 1C 0E 1E) */
|
||||
mmA = _mm_unpacklo_pi8(mmE, mmG); /* (20 30 22 32 24 34 26 36) */
|
||||
mmE = _mm_unpackhi_pi8(mmE, mmG); /* (28 38 2A 3A 2C 3C 2E 3E) */
|
||||
/* NOTE: The values of RGB_RED, RGB_GREEN, and RGB_BLUE determine the
|
||||
* mapping of components A, B, C, and D to red, green, and blue.
|
||||
*
|
||||
* mmA = (A0 A2 A4 A6 A8 Aa Ac Ae) = AE
|
||||
* mmB = (A1 A3 A5 A7 A9 Ab Ad Af) = AO
|
||||
* mmC = (B0 B2 B4 B6 B8 Ba Bc Be) = BE
|
||||
* mmD = (B1 B3 B5 B7 B9 Bb Bd Bf) = BO
|
||||
* mmE = (C0 C2 C4 C6 C8 Ca Cc Ce) = CE
|
||||
* mmF = (C1 C3 C5 C7 C9 Cb Cd Cf) = CO
|
||||
* mmG = (D0 D2 D4 D6 D8 Da Dc De) = DE
|
||||
* mmH = (D1 D3 D5 D7 D9 Db Dd Df) = DO
|
||||
*/
|
||||
mm8 = _mm_unpacklo_pi8(mmA, mmC); /* (A0 B0 A2 B2 A4 B4 A6 B6) */
|
||||
mm9 = _mm_unpackhi_pi8(mmA, mmC); /* (A8 B8 Aa Ba Ac Bc Ae Be) */
|
||||
mmA = _mm_unpacklo_pi8(mmE, mmG); /* (C0 D0 C2 D2 C4 D4 C6 D6) */
|
||||
mmE = _mm_unpackhi_pi8(mmE, mmG); /* (C8 D8 Ca Da Cc Dc Ce De) */
|
||||
|
||||
mmG = _mm_unpacklo_pi8(mmB, mmD); /* (01 11 03 13 05 15 07 17) */
|
||||
mmB = _mm_unpackhi_pi8(mmB, mmD); /* (09 19 0B 1B 0D 1D 0F 1F) */
|
||||
mmD = _mm_unpacklo_pi8(mmF, mmH); /* (21 31 23 33 25 35 27 37) */
|
||||
mmF = _mm_unpackhi_pi8(mmF, mmH); /* (29 39 2B 3B 2D 3D 2F 3F) */
|
||||
mmG = _mm_unpacklo_pi8(mmB, mmD); /* (A1 B1 A3 B3 A5 B5 A7 B7) */
|
||||
mmB = _mm_unpackhi_pi8(mmB, mmD); /* (A9 B9 Ab Bb Ad Bd Af Bf) */
|
||||
mmD = _mm_unpacklo_pi8(mmF, mmH); /* (C1 D1 C3 D3 C5 D5 C7 D7) */
|
||||
mmF = _mm_unpackhi_pi8(mmF, mmH); /* (C9 D9 Cb Db Cd Dd Cf Df) */
|
||||
|
||||
mmH = _mm_unpacklo_pi16(mm8, mmA); /* (00 10 20 30 02 12 22 32) */
|
||||
mm8 = _mm_unpackhi_pi16(mm8, mmA); /* (04 14 24 34 06 16 26 36) */
|
||||
mmA = _mm_unpacklo_pi16(mmG, mmD); /* (01 11 21 31 03 13 23 33) */
|
||||
mmD = _mm_unpackhi_pi16(mmG, mmD); /* (05 15 25 35 07 17 27 37) */
|
||||
mmH = _mm_unpacklo_pi16(mm8, mmA); /* (A0 B0 C0 D0 A2 B2 C2 D2) */
|
||||
mm8 = _mm_unpackhi_pi16(mm8, mmA); /* (A4 B4 C4 D4 A6 B6 C6 D6) */
|
||||
mmA = _mm_unpacklo_pi16(mmG, mmD); /* (A1 B1 C1 D1 A3 B3 C3 D3) */
|
||||
mmD = _mm_unpackhi_pi16(mmG, mmD); /* (A5 B5 C5 D5 A7 B7 C7 D7) */
|
||||
|
||||
mmG = _mm_unpackhi_pi16(mm9, mmE); /* (0C 1C 2C 3C 0E 1E 2E 3E) */
|
||||
mm9 = _mm_unpacklo_pi16(mm9, mmE); /* (08 18 28 38 0A 1A 2A 3A) */
|
||||
mmE = _mm_unpacklo_pi16(mmB, mmF); /* (09 19 29 39 0B 1B 2B 3B) */
|
||||
mmF = _mm_unpackhi_pi16(mmB, mmF); /* (0D 1D 2D 3D 0F 1F 2F 3F) */
|
||||
mmG = _mm_unpackhi_pi16(mm9, mmE); /* (Ac Bc Cc Dc Ae Be Ce De) */
|
||||
mm9 = _mm_unpacklo_pi16(mm9, mmE); /* (A8 B8 C8 D8 Aa Ba Ca Da) */
|
||||
mmE = _mm_unpacklo_pi16(mmB, mmF); /* (A9 B9 C9 D9 Ab Bb Cb Db) */
|
||||
mmF = _mm_unpackhi_pi16(mmB, mmF); /* (Ad Bd Cd Dd Af Bf Cf Df) */
|
||||
|
||||
mmB = _mm_unpackhi_pi32(mmH, mmA); /* (02 12 22 32 03 13 23 33) */
|
||||
mmA = _mm_unpacklo_pi32(mmH, mmA); /* (00 10 20 30 01 11 21 31) */
|
||||
mmC = _mm_unpacklo_pi32(mm8, mmD); /* (04 14 24 34 05 15 25 35) */
|
||||
mmD = _mm_unpackhi_pi32(mm8, mmD); /* (06 16 26 36 07 17 27 37) */
|
||||
mmB = _mm_unpackhi_pi32(mmH, mmA); /* (A2 B2 C2 D2 A3 B3 C3 D3) */
|
||||
mmA = _mm_unpacklo_pi32(mmH, mmA); /* (A0 B0 C0 D0 A1 B1 C1 D1) */
|
||||
mmC = _mm_unpacklo_pi32(mm8, mmD); /* (A4 B4 C4 D4 A5 B5 C5 D5) */
|
||||
mmD = _mm_unpackhi_pi32(mm8, mmD); /* (A6 B6 C6 D6 A7 B7 C7 D7) */
|
||||
|
||||
mmH = _mm_unpackhi_pi32(mmG, mmF); /* (0E 1E 2E 3E 0F 1F 2F 3F) */
|
||||
mmG = _mm_unpacklo_pi32(mmG, mmF); /* (0C 1C 2C 3C 0D 1D 2D 3D) */
|
||||
mmF = _mm_unpackhi_pi32(mm9, mmE); /* (0A 1A 2A 3A 0B 1B 2B 3B) */
|
||||
mmE = _mm_unpacklo_pi32(mm9, mmE); /* (08 18 28 38 09 19 29 39) */
|
||||
mmH = _mm_unpackhi_pi32(mmG, mmF); /* (Ae Be Ce De Af Bf Cf Df) */
|
||||
mmG = _mm_unpacklo_pi32(mmG, mmF); /* (Ac Bc Cc Dc Ad Bd Cd Dd) */
|
||||
mmF = _mm_unpackhi_pi32(mm9, mmE); /* (Aa Ba Ca Da Ab Bb Cb Db) */
|
||||
mmE = _mm_unpacklo_pi32(mm9, mmE); /* (A8 B8 C8 D8 A9 B9 C9 D9) */
|
||||
|
||||
if (num_cols >= 8) {
|
||||
if (!(((long)outptr) & 7)) {
|
||||
|
||||
@@ -234,10 +234,11 @@ static uint64_t const_value[] = {
|
||||
\
|
||||
DO_IDCT_COMMON() \
|
||||
\
|
||||
/* out0=(00 10 20 30), out1=(01 11 21 31) */ \
|
||||
/* out2=(02 12 22 32), out3=(03 13 23 33) */ \
|
||||
/* out4=(04 14 24 34), out5=(05 15 25 35) */ \
|
||||
/* out6=(06 16 26 36), out7=(07 17 27 37) */ \
|
||||
/* out0 = (00 10 20 30), out1 = (01 11 21 31) \
|
||||
* out2 = (02 12 22 32), out3 = (03 13 23 33) \
|
||||
* out4 = (04 14 24 34), out5 = (05 15 25 35) \
|
||||
* out6 = (06 16 26 36), out7 = (07 17 27 37) \
|
||||
*/ \
|
||||
\
|
||||
/* Transpose coefficients */ \
|
||||
\
|
||||
@@ -312,11 +313,11 @@ static uint64_t const_value[] = {
|
||||
\
|
||||
DO_IDCT_COMMON() \
|
||||
\
|
||||
/* out0=(00 01 02 03), out1=(10 11 12 13) */ \
|
||||
/* out2=(20 21 22 23), out3=(30 31 32 33) */ \
|
||||
/* out4=(40 41 42 43), out5=(50 51 52 53) */ \
|
||||
/* out6=(60 61 62 63), out7=(70 71 72 73) */ \
|
||||
\
|
||||
/* out0 = (00 01 02 03), out1 = (10 11 12 13) \
|
||||
* out2 = (20 21 22 23), out3 = (30 31 32 33) \
|
||||
* out4 = (40 41 42 43), out5 = (50 51 52 53) \
|
||||
* out6 = (60 61 62 63), out7 = (70 71 72 73) \
|
||||
*/ \
|
||||
out0 = _mm_srai_pi16(out0, PASS1_BITS + 3); \
|
||||
out1 = _mm_srai_pi16(out1, PASS1_BITS + 3); \
|
||||
out2 = _mm_srai_pi16(out2, PASS1_BITS + 3); \
|
||||
|
||||
@@ -392,10 +392,11 @@ static uint64_t const_value[] = {
|
||||
\
|
||||
DO_IDCT_COMMON(1) \
|
||||
\
|
||||
/* out0=(00 10 20 30), out1=(01 11 21 31) */ \
|
||||
/* out2=(02 12 22 32), out3=(03 13 23 33) */ \
|
||||
/* out4=(04 14 24 34), out5=(05 15 25 35) */ \
|
||||
/* out6=(06 16 26 36), out7=(07 17 27 37) */ \
|
||||
/* out0 = (00 10 20 30), out1 = (01 11 21 31) \
|
||||
* out2 = (02 12 22 32), out3 = (03 13 23 33) \
|
||||
* out4 = (04 14 24 34), out5 = (05 15 25 35) \
|
||||
* out6 = (06 16 26 36), out7 = (07 17 27 37) \
|
||||
*/ \
|
||||
\
|
||||
/* Transpose coefficients */ \
|
||||
\
|
||||
@@ -497,11 +498,11 @@ static uint64_t const_value[] = {
|
||||
\
|
||||
DO_IDCT_COMMON(2) \
|
||||
\
|
||||
/* out0=(00 01 02 03), out1=(10 11 12 13) */ \
|
||||
/* out2=(20 21 22 23), out3=(30 31 32 33) */ \
|
||||
/* out4=(40 41 42 43), out5=(50 51 52 53) */ \
|
||||
/* out6=(60 61 62 63), out7=(70 71 72 73) */ \
|
||||
\
|
||||
/* out0 = (00 01 02 03), out1 = (10 11 12 13) \
|
||||
* out2 = (20 21 22 23), out3 = (30 31 32 33) \
|
||||
* out4 = (40 41 42 43), out5 = (50 51 52 53) \
|
||||
* out6 = (60 61 62 63), out7 = (70 71 72 73) \
|
||||
*/ \
|
||||
row06 = _mm_packs_pi16(out0, out6); /* row06 = (00 01 02 03 60 61 62 63) */ \
|
||||
row17 = _mm_packs_pi16(out1, out7); /* row17 = (10 11 12 13 70 71 72 73) */ \
|
||||
row24 = _mm_packs_pi16(out2, out4); /* row24 = (20 21 22 23 40 41 42 43) */ \
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* Authors: ZhuChen <zhuchen@loongson.cn>
|
||||
* CaiWanwei <caiwanwei@loongson.cn>
|
||||
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
|
||||
* Copyright (C) 2018-2019, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2018-2019, 2025, D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* Based on the x86 SIMD extension for IJG JPEG library
|
||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -62,9 +62,10 @@
|
||||
rowl = _mm_mulhi_pi16(rowl, recipl); \
|
||||
rowh = _mm_mulhi_pi16(rowh, reciph); \
|
||||
\
|
||||
/* reciprocal is always negative (MSB=1), so we always need to add the */ \
|
||||
/* initial value (input value is never negative as we inverted it at the */ \
|
||||
/* start of this routine) */ \
|
||||
/* reciprocal is always negative (MSB = 1), so we always need to add the \
|
||||
* initial value. (The input value is never negative, as we inverted it at \
|
||||
* the start of this routine.) \
|
||||
*/ \
|
||||
rowlsave = rowl = _mm_add_pi16(rowl, rowlsave); \
|
||||
rowhsave = rowh = _mm_add_pi16(rowh, rowhsave); \
|
||||
\
|
||||
|
||||
Reference in New Issue
Block a user