lavc/mpv_unquantize: R-V V H.263 DCT unquantize

SpacemiT X60:
dct_unquantize_h263_inter_c:                           417.8 ( 1.00x)
dct_unquantize_h263_inter_rvv_i32:                      66.0 ( 6.33x)
dct_unquantize_h263_intra_c:                           140.2 ( 1.00x)
dct_unquantize_h263_intra_rvv_i32:                      67.7 ( 2.07x)

Note that the C benchmarks are not stable, depending heavily on the
number of coefficients picked by the RNG. The R-V V benchmarks are
however very stable and generally better than C's.
This commit is contained in:
Rémi Denis-Courmont
2024-06-08 23:08:21 +03:00
parent c384b1e803
commit f222eb2b08
5 changed files with 120 additions and 0 deletions

View File

@@ -280,6 +280,8 @@ av_cold void ff_mpv_unquantize_init(MPVUnquantDSPContext *s,
ff_mpv_unquantize_init_arm(s, bitexact);
#elif ARCH_PPC
ff_mpv_unquantize_init_ppc(s, bitexact);
#elif ARCH_RISCV
ff_mpv_unquantize_init_riscv(s, bitexact);
#elif ARCH_X86
ff_mpv_unquantize_init_x86(s, bitexact);
#elif ARCH_MIPS

View File

@@ -55,6 +55,7 @@ void ff_mpv_unquantize_init(MPVUnquantDSPContext *s,
void ff_mpv_unquantize_init_arm (MPVUnquantDSPContext *s, int bitexact);
void ff_mpv_unquantize_init_neon(MPVUnquantDSPContext *s, int bitexact);
void ff_mpv_unquantize_init_ppc (MPVUnquantDSPContext *s, int bitexact);
void ff_mpv_unquantize_init_riscv(MPVUnquantDSPContext *s, int bitexact);
void ff_mpv_unquantize_init_x86 (MPVUnquantDSPContext *s, int bitexact);
void ff_mpv_unquantize_init_mips(MPVUnquantDSPContext *s, int bitexact,
int q_scale_type);

View File

@@ -51,6 +51,8 @@ OBJS-$(CONFIG_LPC) += riscv/lpc_init.o
RVV-OBJS-$(CONFIG_LPC) += riscv/lpc_rvv.o
OBJS-$(CONFIG_ME_CMP) += riscv/me_cmp_init.o
RVV-OBJS-$(CONFIG_ME_CMP) += riscv/me_cmp_rvv.o
OBJS-$(CONFIG_MPEGVIDEO) += riscv/mpegvideo_init.o
RVV-OBJS-$(CONFIG_MPEGVIDEO) += riscv/mpegvideo_rvv.o
OBJS-$(CONFIG_MPEGVIDEOENCDSP) += riscv/mpegvideoencdsp_init.o
RVV-OBJS-$(CONFIG_MPEGVIDEOENCDSP) += riscv/mpegvideoencdsp_rvv.o
OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_init.o

View File

@@ -0,0 +1,62 @@
/*
* Copyright © 2022 Rémi Denis-Courmont.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/mpegvideo_unquantize.h"
void ff_h263_dct_unquantize_intra_rvv(const MPVContext *s, int16_t *block,
ptrdiff_t len, int qscale, int aic);
void ff_h263_dct_unquantize_inter_rvv(const MPVContext *s, int16_t *block,
ptrdiff_t len, int qscale);
static void dct_unquantize_h263_intra_rvv(const MPVContext *s,
int16_t *block, int n, int qscale)
{
if (!s->h263_aic)
block[0] *= (n < 4) ? s->y_dc_scale : s->c_dc_scale;
n = s->ac_pred ? 63
: s->intra_scantable.raster_end[s->block_last_index[n]];
ff_h263_dct_unquantize_intra_rvv(s, block, n, qscale, s->h263_aic);
}
static void dct_unquantize_h263_inter_rvv(const MPVContext *s,
int16_t *block, int n, int qscale)
{
n = s->inter_scantable.raster_end[s->block_last_index[n]];
ff_h263_dct_unquantize_inter_rvv(s, block, n, qscale);
}
av_cold
void ff_mpv_unquantize_init_riscv(MPVUnquantDSPContext *c, int bitexact)
{
#if HAVE_RVV
int flags = av_get_cpu_flags();
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB)) {
c->dct_unquantize_h263_intra = dct_unquantize_h263_intra_rvv;
c->dct_unquantize_h263_inter = dct_unquantize_h263_inter_rvv;
}
#endif
}

View File

@@ -0,0 +1,53 @@
/*
* Copyright © 2024 Rémi Denis-Courmont.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/riscv/asm.S"
func ff_h263_dct_unquantize_intra_rvv
lpad 0
addi a1, a1, 2
beqz a4, 1f
slli a3, a3, 1
mv a4, zero
j 2f
endfunc
func ff_h263_dct_unquantize_inter_rvv, zve32x, zba
lpad 0
addi a2, a2, 1
1:
addi a4, a3, -1
slli a3, a3, 1
ori a4, a4, 1
2:
vsetvli t0, a2, e16, m8, ta, mu
vle16.v v8, (a1)
sub a2, a2, t0
vmv.v.x v16, a4
vmslt.vi v0, v8, 0
vneg.v v16, v16, v0.t
vmsne.vi v0, v8, 0
vmadd.vx v8, a3, v16, v0.t
vse16.v v8, (a1)
sh1add a1, t0, a1
bnez a2, 2b
ret
endfunc