mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-01-12 00:06:51 +08:00
Manually unrolling loops increases code size, which can sometimes
improve performance, but more often than not, it degrades performance.
Keep the C version simple, and add assembly optimizations when needed.
x86-clang x86-gcc-arch-native x86-msvc m1-clang rpi5-clang pi5-gcc-14
-------------------------------------------------------------------------------------------------------------
bswap_buf_c 57.3 ( 1.00x) 19.4 ( 1.00x) 55.4 ( 1.00x) 0.5 ( 1.00x) 143.5 ( 1.00x) 59.8 ( 1.00x)
bswap_buf_this* 49.0 ( 1.17x) 12.5 ( 1.56x) 17.7 ( 3.13x) 0.3 ( 2.04x) 57.9 ( 2.48x) 73.5 ( 0.81x)
bswap_buf_sse2 28.4 ( 2.02x) 24.3 ( 0.80x) 25.5 ( 2.18x) - - -
bswap_buf_ssse3 24.6 ( 2.32x) 16.0 ( 1.22x) 19.0 ( 2.92x) - - -
bswap_buf_avx2 21.2 ( 2.70x) 11.1 ( 1.74x) 11.2 ( 4.95x) - - -
bswap_buf_c: C implementation before this patch
bswap_buf_this: C implementation after this patch
Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
48 lines
1.3 KiB
C
48 lines
1.3 KiB
C
/*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "libavutil/attributes.h"
|
|
#include "libavutil/bswap.h"
|
|
#include "bswapdsp.h"
|
|
|
|
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
|
|
{
|
|
for (int i = 0; i < w; i++)
|
|
dst[i + 0] = av_bswap32(src[i + 0]);
|
|
}
|
|
|
|
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
|
|
{
|
|
while (len--)
|
|
*dst++ = av_bswap16(*src++);
|
|
}
|
|
|
|
av_cold void ff_bswapdsp_init(BswapDSPContext *c)
|
|
{
|
|
c->bswap_buf = bswap_buf;
|
|
c->bswap16_buf = bswap16_buf;
|
|
|
|
#if ARCH_RISCV
|
|
ff_bswapdsp_init_riscv(c);
|
|
#elif ARCH_X86 && HAVE_X86ASM
|
|
ff_bswapdsp_init_x86(c);
|
|
#endif
|
|
}
|