mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-01-12 00:06:51 +08:00
avcodec/ppc/vc1dsp_altivec: Don't read too much data
vc1_inv_trans_8x4_altivec() is supposed to process a block of 8x4 words, yet it read and processed eight lines. This led to ASAN failures (see [1]) that this commit intends to fix. It should also lead to performance improvements, but I don't have real hardware to bench it. [1]: https://fate.ffmpeg.org/report.cgi?time=20251207214004&slot=ppc64-linux-gcc-14.3-asan Reviewed-by: Sean McGovern <gseanmcg@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -235,7 +235,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride,
|
||||
{
|
||||
vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
|
||||
vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
|
||||
vector signed int s8, s9, sA, sB;
|
||||
vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
|
||||
const vector unsigned int vec_7 = vec_splat_u32(7);
|
||||
@@ -253,40 +253,42 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride,
|
||||
src1 = vec_ld( 16, block);
|
||||
src2 = vec_ld( 32, block);
|
||||
src3 = vec_ld( 48, block);
|
||||
src4 = vec_ld( 64, block);
|
||||
src5 = vec_ld( 80, block);
|
||||
src6 = vec_ld( 96, block);
|
||||
src7 = vec_ld(112, block);
|
||||
|
||||
TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
s0 = vec_unpackl(src0);
|
||||
s1 = vec_unpackl(src1);
|
||||
s2 = vec_unpackl(src2);
|
||||
s3 = vec_unpackl(src3);
|
||||
s4 = vec_unpackl(src4);
|
||||
s5 = vec_unpackl(src5);
|
||||
s6 = vec_unpackl(src6);
|
||||
s7 = vec_unpackl(src7);
|
||||
s8 = vec_unpackh(src0);
|
||||
s9 = vec_unpackh(src1);
|
||||
sA = vec_unpackh(src2);
|
||||
sB = vec_unpackh(src3);
|
||||
sC = vec_unpackh(src4);
|
||||
sD = vec_unpackh(src5);
|
||||
sE = vec_unpackh(src6);
|
||||
sF = vec_unpackh(src7);
|
||||
// Transpose 8x4 matrix of 16-bit elements (in-place)
|
||||
vec_s16 A1, B1, C1, D1;
|
||||
vec_s16 A2, B2, C2, D2;
|
||||
|
||||
A1 = vec_mergeh(src0, src2);
|
||||
B1 = vec_mergel(src0, src2);
|
||||
C1 = vec_mergeh(src1, src3);
|
||||
D1 = vec_mergel(src1, src3);
|
||||
|
||||
A2 = vec_mergeh(A1, C1);
|
||||
B2 = vec_mergel(A1, C1);
|
||||
C2 = vec_mergeh(B1, D1);
|
||||
D2 = vec_mergel(B1, D1);
|
||||
|
||||
s0 = vec_unpackh(A2);
|
||||
s1 = vec_unpackl(A2);
|
||||
s2 = vec_unpackh(B2);
|
||||
s3 = vec_unpackl(B2);
|
||||
s4 = vec_unpackh(C2);
|
||||
s5 = vec_unpackl(C2);
|
||||
s6 = vec_unpackh(D2);
|
||||
s7 = vec_unpackl(D2);
|
||||
|
||||
STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
|
||||
SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
|
||||
STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
|
||||
SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
|
||||
src0 = vec_pack(s8, s0);
|
||||
src1 = vec_pack(s9, s1);
|
||||
src2 = vec_pack(sA, s2);
|
||||
src3 = vec_pack(sB, s3);
|
||||
src4 = vec_pack(sC, s4);
|
||||
src5 = vec_pack(sD, s5);
|
||||
src6 = vec_pack(sE, s6);
|
||||
src7 = vec_pack(sF, s7);
|
||||
|
||||
src0 = vec_pack(s0, s0);
|
||||
src1 = vec_pack(s1, s1);
|
||||
src2 = vec_pack(s2, s2);
|
||||
src3 = vec_pack(s3, s3);
|
||||
src4 = vec_pack(s4, s4);
|
||||
src5 = vec_pack(s5, s5);
|
||||
src6 = vec_pack(s6, s6);
|
||||
src7 = vec_pack(s7, s7);
|
||||
|
||||
TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
|
||||
s0 = vec_unpackh(src0);
|
||||
|
||||
Reference in New Issue
Block a user