avcodec/vp9mc: Remove MMXEXT functions overridden by SSSE3

SSSE3 is already quite old (introduced 2006 for Intel, 2011 for AMD),
so that the overwhelming majority of our users (particularly those
that actually update their FFmpeg) will be using the SSSE3 versions.
This commit therefore removes the MMXEXT functions overridden
by them (which don't abide by the ABI) to get closer to a removal
of emms_c.

Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2025-11-30 18:08:38 +01:00
parent a8df08f628
commit 6e418af810
4 changed files with 29 additions and 33 deletions

View File

@@ -41,7 +41,6 @@ decl_fpel_func(put, 64, , avx);
decl_fpel_func(avg, 32, _8, avx2);
decl_fpel_func(avg, 64, _8, avx2);
decl_mc_funcs(4, mmxext, int16_t, 8, 8);
decl_mc_funcs(8, sse2, int16_t, 8, 8);
decl_mc_funcs(4, ssse3, int8_t, 32, 8);
decl_mc_funcs(8, ssse3, int8_t, 32, 8);
@@ -70,10 +69,11 @@ mc_rep_funcs(64, 32, 32, avx2, int8_t, 32, 8)
extern const int8_t ff_filters_ssse3[3][15][4][32];
extern const int16_t ff_filters_sse2[3][15][8][8];
filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2)
filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2)
filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3)
filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3)
filters_8tap_2d_fn2(put, 16, 8, 1, sse2, sse2)
filters_8tap_2d_fn2(avg, 16, 8, 1, sse2, sse2)
filters_8tap_2d_fn3(put, 16, 8, 1, ssse3, ssse3)
filters_8tap_2d_fn3(avg, 16, 8, 1, ssse3, ssse3)
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3)
filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3)
@@ -81,10 +81,10 @@ filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3)
filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3)
#endif
filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2)
filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2)
filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3)
filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3)
filters_8tap_1d_fn3(put, 8, sse2, sse2)
filters_8tap_1d_fn3(avg, 8, sse2, sse2)
filters_8tap_1d_fn4(put, 8, ssse3, ssse3)
filters_8tap_1d_fn4(avg, 8, ssse3, ssse3)
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3)
filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3)
@@ -285,8 +285,6 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext;
dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext;
dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext;
init_subpel2(4, 0, 4, put, 8, mmxext);
init_subpel2(4, 1, 4, avg, 8, mmxext);
init_fpel_func(4, 1, 4, avg, _8, mmxext);
init_fpel_func(3, 1, 8, avg, _8, mmxext);
dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;

View File

@@ -107,12 +107,15 @@ filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, dir, dvar, bpp, o
filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \
filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt)
#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \
#define filters_8tap_1d_fn3(op, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt)
#define filters_8tap_1d_fn4(op, bpp, opt, f_opt) \
filters_8tap_1d_fn3(op, bpp, opt, f_opt) \
filters_8tap_1d_fn2(op, 4, bpp, opt, f_opt) \
#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \
static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
@@ -133,12 +136,15 @@ filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes
filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, align, bpp, bytes, opt) \
filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, align, bpp, bytes, opt)
#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \
#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
#define filters_8tap_2d_fn3(op, align, bpp, bytes, opt, f_opt) \
filters_8tap_2d_fn2(op, align, bpp, bytes, opt, f_opt) \
filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt, f_opt)
#define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \

View File

@@ -40,8 +40,8 @@ mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC)
mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC)
#endif
filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp)
filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp)
filters_8tap_2d_fn3(put, 16, BPC, 2, sse2, 16bpp)
filters_8tap_2d_fn3(avg, 16, BPC, 2, sse2, 16bpp)
#if HAVE_AVX2_EXTERNAL
filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp)
filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp)
@@ -51,8 +51,8 @@ filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp)
filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp)
#endif
filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp)
filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp)
filters_8tap_1d_fn4(put, BPC, sse2, 16bpp)
filters_8tap_1d_fn4(avg, BPC, sse2, 16bpp)
#if HAVE_AVX2_EXTERNAL
filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp)
filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp)

View File

@@ -205,7 +205,7 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h
pxor m5, m5
mova m6, [pw_64]
mova m7, [filteryq+ 0]
%if ARCH_X86_64 && mmsize > 8
%if ARCH_X86_64
mova m8, [filteryq+ 16]
mova m9, [filteryq+ 32]
mova m10, [filteryq+ 48]
@@ -226,7 +226,7 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h
punpcklbw m3, m5
punpcklbw m4, m5
pmullw m0, m7
%if ARCH_X86_64 && mmsize > 8
%if ARCH_X86_64
pmullw m1, m8
pmullw m2, m9
pmullw m3, m10
@@ -247,7 +247,7 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h
punpcklbw m1, m5
punpcklbw m3, m5
punpcklbw m4, m5
%if ARCH_X86_64 && mmsize > 8
%if ARCH_X86_64
pmullw m1, m12
pmullw m3, m13
pmullw m4, m14
@@ -276,10 +276,6 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h
RET
%endmacro
INIT_MMX mmxext
filter_sse2_h_fn put
filter_sse2_h_fn avg
INIT_XMM sse2
filter_sse2_h_fn put
filter_sse2_h_fn avg
@@ -421,7 +417,7 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f
lea src4q, [srcq+sstrideq]
sub srcq, sstride3q
mova m7, [filteryq+ 0]
%if ARCH_X86_64 && mmsize > 8
%ifdef m8
mova m8, [filteryq+ 16]
mova m9, [filteryq+ 32]
mova m10, [filteryq+ 48]
@@ -446,7 +442,7 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f
punpcklbw m3, m5
punpcklbw m4, m5
pmullw m0, m7
%if ARCH_X86_64 && mmsize > 8
%ifdef m8
pmullw m1, m8
pmullw m2, m9
pmullw m3, m10
@@ -467,7 +463,7 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f
punpcklbw m1, m5
punpcklbw m3, m5
punpcklbw m4, m5
%if ARCH_X86_64 && mmsize > 8
%ifdef m8
pmullw m1, m12
pmullw m3, m13
pmullw m4, m14
@@ -496,10 +492,6 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f
RET
%endmacro
INIT_MMX mmxext
filter_sse2_v_fn put
filter_sse2_v_fn avg
INIT_XMM sse2
filter_sse2_v_fn put
filter_sse2_v_fn avg