Files
FFmpeg/libavcodec/bswapdsp.c
Zhao Zhili 8777fa60e6 avcodec/bswapdsp: improve performance by remove manually unroll
Manually unrolling loops increases code size, which can sometimes
improve performance, but more often than not, it degrades performance.
Keep the C version simple, and add assembly optimizations when needed.

                 x86-clang    x86-gcc-arch-native  x86-msvc     m1-clang      rpi5-clang       pi5-gcc-14
-------------------------------------------------------------------------------------------------------------
bswap_buf_c      57.3 ( 1.00x)  19.4 ( 1.00x)   55.4 ( 1.00x)   0.5 ( 1.00x)  143.5 ( 1.00x)   59.8 ( 1.00x)
bswap_buf_this*  49.0 ( 1.17x)  12.5 ( 1.56x)   17.7 ( 3.13x)   0.3 ( 2.04x)   57.9 ( 2.48x)   73.5 ( 0.81x)
bswap_buf_sse2   28.4 ( 2.02x)  24.3 ( 0.80x)   25.5 ( 2.18x)   -              -               -
bswap_buf_ssse3  24.6 ( 2.32x)  16.0 ( 1.22x)   19.0 ( 2.92x)   -              -               -
bswap_buf_avx2   21.2 ( 2.70x)  11.1 ( 1.74x)   11.2 ( 4.95x)   -              -               -

bswap_buf_c: C implementation before this patch
bswap_buf_this: C implementation after this patch

Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
2026-01-10 18:56:26 +00:00

48 lines
1.3 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/bswap.h"
#include "bswapdsp.h"
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
{
for (int i = 0; i < w; i++)
dst[i + 0] = av_bswap32(src[i + 0]);
}
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
{
while (len--)
*dst++ = av_bswap16(*src++);
}
av_cold void ff_bswapdsp_init(BswapDSPContext *c)
{
c->bswap_buf = bswap_buf;
c->bswap16_buf = bswap16_buf;
#if ARCH_RISCV
ff_bswapdsp_init_riscv(c);
#elif ARCH_X86 && HAVE_X86ASM
ff_bswapdsp_init_x86(c);
#endif
}