mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-01-12 00:06:51 +08:00
avcodec/{arm,neon}/mpegvideo: Fix h263 unquantize functions
These functions currently operate on the assumption that the number of coefficients to process is always of the form 16k+m with m<=4 or >8. Yet this is not true when the IDCT permutation is of type FF_IDCT_PERM_LIBMPEG2 (i.e. when FF_IDCT_INT is in use). Reviewed-by: Martin Storsjö <martin@martin.st> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -36,7 +36,7 @@ function ff_dct_unquantize_h263_neon, export=1
|
||||
vdup.16 q15, r0 @ qmul
|
||||
vdup.16 q14, r2 @ qadd
|
||||
vneg.s16 q13, q14
|
||||
cmp r3, #4
|
||||
cmp r3, #8
|
||||
mov r0, r1
|
||||
ble 2f
|
||||
1:
|
||||
@@ -62,14 +62,14 @@ function ff_dct_unquantize_h263_neon, export=1
|
||||
cmp r3, #8
|
||||
bgt 1b
|
||||
2:
|
||||
vld1.16 {d0}, [r0,:64]
|
||||
vclt.s16 d3, d0, #0
|
||||
vceq.s16 d1, d0, #0
|
||||
vmul.s16 d2, d0, d30
|
||||
vbsl d3, d26, d28
|
||||
vadd.s16 d2, d2, d3
|
||||
vbif d0, d2, d1
|
||||
vst1.16 {d0}, [r1,:64]
|
||||
vld1.16 {q0}, [r0,:128]
|
||||
vclt.s16 q3, q0, #0
|
||||
vceq.s16 q1, q0, #0
|
||||
vmul.s16 q2, q0, q15
|
||||
vbsl q3, q13, q14
|
||||
vadd.s16 q2, q2, q3
|
||||
vbif q0, q2, q1
|
||||
vst1.16 {q0}, [r1,:128]
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
|
||||
@@ -39,12 +39,7 @@ static void inline ff_dct_unquantize_h263_neon(int qscale, int qadd, int nCoeffs
|
||||
{
|
||||
int16x8_t q0s16, q2s16, q3s16, q8s16, q10s16, q11s16, q13s16;
|
||||
int16x8_t q14s16, q15s16, qzs16;
|
||||
int16x4_t d0s16, d2s16, d3s16, dzs16;
|
||||
uint16x8_t q1u16, q9u16;
|
||||
uint16x4_t d1u16;
|
||||
|
||||
dzs16 = vdup_n_s16(0);
|
||||
qzs16 = vdupq_n_s16(0);
|
||||
|
||||
q15s16 = vdupq_n_s16(qscale << 1);
|
||||
q14s16 = vdupq_n_s16(qadd);
|
||||
@@ -73,15 +68,14 @@ static void inline ff_dct_unquantize_h263_neon(int qscale, int qadd, int nCoeffs
|
||||
if (nCoeffs <= 0)
|
||||
return;
|
||||
|
||||
d0s16 = vld1_s16(block);
|
||||
d3s16 = vreinterpret_s16_u16(vclt_s16(d0s16, dzs16));
|
||||
d1u16 = vceq_s16(d0s16, dzs16);
|
||||
d2s16 = vmul_s16(d0s16, vget_high_s16(q15s16));
|
||||
d3s16 = vbsl_s16(vreinterpret_u16_s16(d3s16),
|
||||
vget_high_s16(q13s16), vget_high_s16(q14s16));
|
||||
d2s16 = vadd_s16(d2s16, d3s16);
|
||||
d0s16 = vbsl_s16(d1u16, d0s16, d2s16);
|
||||
vst1_s16(block, d0s16);
|
||||
q0s16 = vld1q_s16(block);
|
||||
q3s16 = vreinterpretq_s16_u16(vcltq_s16(q0s16, qzs16));
|
||||
q1u16 = vceqq_s16(q0s16, qzs16);
|
||||
q2s16 = vmulq_s16(q0s16, q15s16);
|
||||
q3s16 = vbslq_s16(vreinterpretq_u16_s16(q3s16), q13s16, q14s16);
|
||||
q2s16 = vaddq_s16(q2s16, q3s16);
|
||||
q0s16 = vbslq_s16(q1u16, q0s16, q2s16);
|
||||
vst1q_s16(block, q0s16);
|
||||
}
|
||||
|
||||
static void dct_unquantize_h263_inter_neon(const MPVContext *s, int16_t *block,
|
||||
|
||||
Reference in New Issue
Block a user