mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-02-04 14:30:55 +08:00
avcodec/x86/fpel: Add blocksize x blocksize avg/put functions
This commit deduplicates the wrappers around the fpel functions for copying whole blocks (i.e. height equaling width). It does this in a manner which avoids having push/pop function arguments when the calling convention forces one to pass them on the stack (as in 32bit systems). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -46,36 +46,10 @@ static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride)
|
||||
|
||||
#endif /* HAVE_SSE2_EXTERNAL */
|
||||
|
||||
#if HAVE_MMX_EXTERNAL
|
||||
static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_put_pixels8_mmx(dst, src, stride, 8);
|
||||
}
|
||||
|
||||
static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_avg_pixels8_mmxext(dst, src, stride, 8);
|
||||
}
|
||||
|
||||
static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_put_pixels16_sse2(dst, src, stride, 16);
|
||||
}
|
||||
|
||||
static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_avg_pixels16_sse2(dst, src, stride, 16);
|
||||
}
|
||||
#endif
|
||||
|
||||
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c)
|
||||
{
|
||||
#if HAVE_MMX_EXTERNAL
|
||||
c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
|
||||
c->put_cavs_qpel_pixels_tab[1][0] = ff_put_pixels8x8_mmx;
|
||||
#endif /* HAVE_MMX_EXTERNAL */
|
||||
}
|
||||
|
||||
@@ -129,12 +103,12 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c)
|
||||
|
||||
#if HAVE_MMX_EXTERNAL
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
|
||||
c->avg_cavs_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
|
||||
}
|
||||
#endif
|
||||
#if HAVE_SSE2_EXTERNAL
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->put_cavs_qpel_pixels_tab[0][ 0] = put_cavs_qpel16_mc00_sse2;
|
||||
c->put_cavs_qpel_pixels_tab[0][ 0] = ff_put_pixels16x16_sse2;
|
||||
c->put_cavs_qpel_pixels_tab[0][ 2] = put_cavs_qpel16_mc20_sse2;
|
||||
c->put_cavs_qpel_pixels_tab[0][ 4] = put_cavs_qpel16_mc01_sse2;
|
||||
c->put_cavs_qpel_pixels_tab[0][ 8] = put_cavs_qpel16_mc02_sse2;
|
||||
@@ -144,7 +118,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c)
|
||||
c->put_cavs_qpel_pixels_tab[1][ 8] = ff_put_cavs_qpel8_mc02_sse2;
|
||||
c->put_cavs_qpel_pixels_tab[1][12] = ff_put_cavs_qpel8_mc03_sse2;
|
||||
|
||||
c->avg_cavs_qpel_pixels_tab[0][ 0] = avg_cavs_qpel16_mc00_sse2;
|
||||
c->avg_cavs_qpel_pixels_tab[0][ 0] = ff_avg_pixels16x16_sse2;
|
||||
c->avg_cavs_qpel_pixels_tab[0][ 2] = avg_cavs_qpel16_mc20_sse2;
|
||||
c->avg_cavs_qpel_pixels_tab[0][ 4] = avg_cavs_qpel16_mc01_sse2;
|
||||
c->avg_cavs_qpel_pixels_tab[0][ 8] = avg_cavs_qpel16_mc02_sse2;
|
||||
|
||||
@@ -35,7 +35,12 @@ SECTION .text
|
||||
%define LOAD movu
|
||||
%define SAVE mova
|
||||
%endif
|
||||
cglobal %1_pixels%2x%2, 3,5,4
|
||||
mov r3d, %2
|
||||
jmp %1_pixels%2_after_prologue
|
||||
|
||||
cglobal %1_pixels%2, 4,5,4
|
||||
%1_pixels%2_after_prologue:
|
||||
lea r4, [r2*3]
|
||||
.loop:
|
||||
LOAD m0, [r1]
|
||||
|
||||
@@ -24,12 +24,20 @@
|
||||
|
||||
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels8x8_mmx(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
|
||||
|
||||
#endif /* AVCODEC_X86_FPEL_H */
|
||||
|
||||
@@ -163,23 +163,6 @@ H264_MC_V(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
|
||||
H264_MC_H(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
|
||||
H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
|
||||
|
||||
static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_put_pixels16_sse2(dst, src, stride, 16);
|
||||
}
|
||||
static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_avg_pixels16_sse2(dst, src, stride, 16);
|
||||
}
|
||||
|
||||
static void avg_h264_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_avg_pixels8_mmxext(dst, src, stride, 8);
|
||||
}
|
||||
|
||||
#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN, UNUSED) \
|
||||
static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
|
||||
{\
|
||||
@@ -424,7 +407,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
if (!high_bit_depth) {
|
||||
SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, );
|
||||
c->avg_h264_qpel_pixels_tab[1][0] = avg_h264_qpel8_mc00_mmxext;
|
||||
c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
|
||||
SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2, 4, mmxext, );
|
||||
c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext;
|
||||
} else if (bit_depth == 10) {
|
||||
@@ -447,8 +430,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
||||
H264_QPEL_FUNCS(3, 1, sse2);
|
||||
H264_QPEL_FUNCS(3, 2, sse2);
|
||||
H264_QPEL_FUNCS(3, 3, sse2);
|
||||
c->put_h264_qpel_pixels_tab[0][0] = put_h264_qpel16_mc00_sse2;
|
||||
c->avg_h264_qpel_pixels_tab[0][0] = avg_h264_qpel16_mc00_sse2;
|
||||
c->put_h264_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
|
||||
c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
|
||||
}
|
||||
|
||||
if (bit_depth == 10) {
|
||||
|
||||
@@ -489,19 +489,6 @@ QPEL_OP(put_, _, mmxext, PASSTHROUGH)
|
||||
QPEL_OP(avg_, _, mmxext, STRIP_HEIGHT)
|
||||
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext, PASSTHROUGH)
|
||||
|
||||
#define MC00(OPNAME, SIZE, EXT) \
|
||||
static void OPNAME ## _qpel ## SIZE ## _mc00_ ## EXT(uint8_t *dst, \
|
||||
const uint8_t *src,\
|
||||
ptrdiff_t stride) \
|
||||
{ \
|
||||
ff_ ## OPNAME ## _pixels ## SIZE ##_ ## EXT(dst, src, stride, SIZE);\
|
||||
}
|
||||
|
||||
MC00(put, 8, mmx)
|
||||
MC00(avg, 8, mmxext)
|
||||
MC00(put, 16, sse2)
|
||||
MC00(avg, 16, sse2)
|
||||
|
||||
#endif /* HAVE_X86ASM */
|
||||
|
||||
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
|
||||
@@ -530,12 +517,12 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
|
||||
if (X86_MMXEXT(cpu_flags)) {
|
||||
#if HAVE_MMXEXT_EXTERNAL
|
||||
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
|
||||
c->avg_qpel_pixels_tab[1][0] = avg_qpel8_mc00_mmxext;
|
||||
c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
|
||||
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
|
||||
|
||||
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
|
||||
c->put_no_rnd_qpel_pixels_tab[1][0] =
|
||||
c->put_qpel_pixels_tab[1][0] = put_qpel8_mc00_mmx;
|
||||
c->put_qpel_pixels_tab[1][0] = ff_put_pixels8x8_mmx;
|
||||
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
|
||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
|
||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
|
||||
@@ -544,8 +531,8 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
|
||||
#if HAVE_SSE2_EXTERNAL
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->put_no_rnd_qpel_pixels_tab[0][0] =
|
||||
c->put_qpel_pixels_tab[0][0] = put_qpel16_mc00_sse2;
|
||||
c->avg_qpel_pixels_tab[0][0] = avg_qpel16_mc00_sse2;
|
||||
c->put_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
|
||||
c->avg_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user