avfilter/vf_convolution: add x86 SIMD for filter_3x3()

Tested using a simple command (apply edge enhance):
./ffmpeg_g -i ~/Downloads/bbb_sunflower_1080p_30fps_normal.mp4 \
 -vf convolution="0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:5:1:1:1:0:128:128:128" \
 -an -vframes 1000 -f null /dev/null

The fps increase from 151 to 270 on my local machine.

Signed-off-by: Ruiling Song <ruiling.song@intel.com>
This commit is contained in:
Ruiling Song
2019-06-27 10:07:21 +08:00
parent 6c67c8ca9a
commit 98e419cbf5
5 changed files with 271 additions and 38 deletions

View File

@@ -25,48 +25,11 @@
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "convolution.h"
#include "formats.h"
#include "internal.h"
#include "video.h"
enum MatrixMode {
MATRIX_SQUARE,
MATRIX_ROW,
MATRIX_COLUMN,
MATRIX_NBMODES,
};
typedef struct ConvolutionContext {
const AVClass *class;
char *matrix_str[4];
float rdiv[4];
float bias[4];
int mode[4];
float scale;
float delta;
int planes;
int size[4];
int depth;
int max;
int bpc;
int nb_planes;
int nb_threads;
int planewidth[4];
int planeheight[4];
int matrix[4][49];
int matrix_length[4];
int copy[4];
void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int stride,
int x, int width, int y, int height, int bpc);
void (*filter[4])(uint8_t *dst, int width,
float rdiv, float bias, const int *const matrix,
const uint8_t *c[], int peak, int radius,
int dstride, int stride);
} ConvolutionContext;
#define OFFSET(x) offsetof(ConvolutionContext, x)
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
@@ -625,6 +588,8 @@ static int config_input(AVFilterLink *inlink)
s->filter[p] = filter16_7x7;
}
}
if (ARCH_X86_64)
ff_convolution_init_x86(s);
} else if (!strcmp(ctx->filter->name, "prewitt")) {
if (s->depth > 8)
for (p = 0; p < s->nb_planes; p++)