From 531ce713a0e848069d18311e38977b5bf8b9499d Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 29 Oct 2025 15:27:47 +0100 Subject: [PATCH] dpxdec: add a Vulkan hwaccel --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/dpx.c | 5 + libavcodec/hwaccels.h | 1 + libavcodec/vulkan/Makefile | 4 + libavcodec/vulkan/dpx_copy.comp | 51 ++++ libavcodec/vulkan/dpx_unpack.comp | 83 ++++++ libavcodec/vulkan_decode.c | 16 + libavcodec/vulkan_dpx.c | 475 ++++++++++++++++++++++++++++++ 9 files changed, 638 insertions(+) create mode 100644 libavcodec/vulkan/dpx_copy.comp create mode 100644 libavcodec/vulkan/dpx_unpack.comp create mode 100644 libavcodec/vulkan_dpx.c diff --git a/configure b/configure index 7ef50095a3..fd6f602e1d 100755 --- a/configure +++ b/configure @@ -3262,6 +3262,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox" av1_videotoolbox_hwaccel_select="av1_decoder" av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" +dpx_vulkan_hwaccel_deps="vulkan spirv_compiler" +dpx_vulkan_hwaccel_select="dpx_decoder" ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler" ffv1_vulkan_hwaccel_select="ffv1_decoder" h263_vaapi_hwaccel_deps="vaapi" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index fba9f0aff0..45c8237181 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1050,6 +1050,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o +OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan_decode.o vulkan_dpx.o OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c index 47efcb7572..7355b50f7a 100644 --- a/libavcodec/dpx.c +++ b/libavcodec/dpx.c @@ -837,7 +837,12 @@ const FFCodec ff_dpx_decoder = { .close = decode_end, UPDATE_THREAD_CONTEXT(update_thread_context), .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | + FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM, .hw_configs = (const AVCodecHWConfigInternal *const []) { +#if CONFIG_DPX_VULKAN_HWACCEL + HWACCEL_VULKAN(dpx), +#endif NULL }, }; diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 638a7bfb1d..3de191288a 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel; extern const struct FFHWAccel ff_av1_vdpau_hwaccel; extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel; extern const struct FFHWAccel ff_av1_vulkan_hwaccel; +extern const struct FFHWAccel ff_dpx_vulkan_hwaccel; extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel; extern const struct FFHWAccel ff_h263_vaapi_hwaccel; extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel; diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 16a4116ef1..26e8e147c2 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -23,6 +23,10 @@ OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \ vulkan/prores_vld.o \ vulkan/prores_idct.o +OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/common.o \ + vulkan/dpx_unpack.o \ + vulkan/dpx_copy.o + VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) libavcodec/vulkan/%.c: TAG = VULKAN diff --git a/libavcodec/vulkan/dpx_copy.comp b/libavcodec/vulkan/dpx_copy.comp new file mode 100644 index 0000000000..da0a11db93 --- /dev/null +++ b/libavcodec/vulkan/dpx_copy.comp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2025 Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +TYPE read_data(uint off) +{ +#ifdef BIG_ENDIAN + return TYPE_REVERSE(data[off]); +#else + return data[off]; +#endif +} + +void main(void) +{ + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (!IS_WITHIN(pos, imageSize(dst[0]))) + return; + + uint offs = (pos.y*imageSize(dst[0]).x + pos.x)*COMPONENTS; +#if NB_IMAGES == 1 + TYPE_VEC val; + for (int i = 0; i < COMPONENTS; i++) + val[i] = read_data(offs + i); + val >>= SHIFT; + imageStore(dst[0], pos, val); +#else + const ivec4 fmt_lut = ivec4(2, 0, 1, 3); + for (int i = 0; i < COMPONENTS; i++) { + TYPE val = read_data(offs + i); + val >>= SHIFT; + imageStore(dst[fmt_lut[i]], pos, TYPE_VEC(val)); + } +#endif +} diff --git a/libavcodec/vulkan/dpx_unpack.comp b/libavcodec/vulkan/dpx_unpack.comp new file mode 100644 index 0000000000..5a44de87bf --- /dev/null +++ b/libavcodec/vulkan/dpx_unpack.comp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2025 Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +uint32_t read_data(uint off) +{ +#ifdef BIG_ENDIAN + return reverse4(data[off]); +#else + return data[off]; +#endif +} + +#ifdef PACKED_10BIT +i16vec4 parse_packed_in_32(ivec2 pos, int stride) +{ + uint32_t d = read_data(pos.y*stride + pos.x); + i16vec4 v; + d = d << 10 | d >> 22 & 0x3FFFFF; + v[0] = int16_t(d & 0x3FF); + d = d << 10 | d >> 22 & 0x3FFFFF; + v[1] = int16_t(d & 0x3FF); + d = d << 10 | d >> 22 & 0x3FFFFF; + v[2] = int16_t(d & 0x3FF); + v[3] = int16_t(0); + return v; +} +#else +i16vec4 parse_packed_in_32(ivec2 pos, int stride) +{ + uint line_off = pos.y*(stride*BITS_PER_COMP*COMPONENTS + + (need_align << 3)); + uint pix_off = pos.x*BITS_PER_COMP*COMPONENTS; + + uint off = (line_off + pix_off >> 5); + uint bit = pix_off & 0x1f; + + uint32_t d0 = read_data(off + 0); + uint32_t d1 = read_data(off + 1); + + uint64_t combined = (uint64_t(d1) << 32) | d0; + combined >>= bit; + + return i16vec4(combined, + combined >> (BITS_PER_COMP*1), + combined >> (BITS_PER_COMP*2), + combined >> (BITS_PER_COMP*3)) & + int16_t((1 << BITS_PER_COMP) - 1); +} +#endif + +void main(void) +{ + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (!IS_WITHIN(pos, imageSize(dst[0]))) + return; + + i16vec4 p = parse_packed_in_32(pos, imageSize(dst[0]).x); + +#if NB_IMAGES == 1 + imageStore(dst[0], pos, p); +#else + const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3); + for (uint i = 0; i < COMPONENTS; i++) + imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i])); +#endif +} diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index d22ccc21aa..d6f6ec8c3b 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -26,6 +26,7 @@ #define DECODER_IS_SDR(codec_id) \ (((codec_id) == AV_CODEC_ID_FFV1) || \ + ((codec_id) == AV_CODEC_ID_DPX) || \ ((codec_id) == AV_CODEC_ID_PRORES_RAW) || \ ((codec_id) == AV_CODEC_ID_PRORES)) @@ -50,6 +51,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc; #if CONFIG_PRORES_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc; #endif +#if CONFIG_DPX_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc; +#endif static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_H264_VULKAN_HWACCEL @@ -73,6 +77,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_PRORES_VULKAN_HWACCEL &ff_vk_dec_prores_desc, #endif +#if CONFIG_DPX_VULKAN_HWACCEL + &ff_vk_dec_dpx_desc, +#endif }; typedef struct FFVulkanDecodeProfileData { @@ -1117,10 +1124,19 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) /* This should be more efficient for downloading and using */ frames_ctx->sw_format = AV_PIX_FMT_RGBA64; break; + case AV_PIX_FMT_RGB48LE: + case AV_PIX_FMT_RGB48BE: /* DPX outputs RGB48BE, so we need both */ + /* Almost nothing supports native 3-component RGB */ + frames_ctx->sw_format = AV_PIX_FMT_GBRP16; + break; + case AV_PIX_FMT_RGBA64BE: /* DPX again, fix for little-endian systems */ + frames_ctx->sw_format = AV_PIX_FMT_RGBA64; + break; case AV_PIX_FMT_GBRP10: /* This saves memory bandwidth when downloading */ frames_ctx->sw_format = AV_PIX_FMT_X2BGR10; break; + case AV_PIX_FMT_RGB24: case AV_PIX_FMT_BGR0: /* mpv has issues with bgr0 mapping, so just remap it */ frames_ctx->sw_format = AV_PIX_FMT_RGB0; diff --git a/libavcodec/vulkan_dpx.c b/libavcodec/vulkan_dpx.c new file mode 100644 index 0000000000..1af417fdf7 --- /dev/null +++ b/libavcodec/vulkan_dpx.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2025 Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_decode.h" +#include "hwaccel_internal.h" + +#include "dpx.h" +#include "libavutil/vulkan_spirv.h" +#include "libavutil/mem.h" + +extern const char *ff_source_common_comp; +extern const char *ff_source_dpx_unpack_comp; +extern const char *ff_source_dpx_copy_comp; + +const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc = { + .codec_id = AV_CODEC_ID_DPX, + .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, + .queue_flags = VK_QUEUE_COMPUTE_BIT, +}; + +typedef struct DPXVulkanDecodePicture { + FFVulkanDecodePicture vp; +} DPXVulkanDecodePicture; + +typedef struct DPXVulkanDecodeContext { + FFVulkanShader shader; + AVBufferPool *frame_data_pool; +} DPXVulkanDecodeContext; + +typedef struct DecodePushData { + int stride; + int need_align; + int padded_10bit; +} DecodePushData; + +static int host_upoad_image(AVCodecContext *avctx, + FFVulkanDecodeContext *dec, DPXDecContext *dpx, + const uint8_t *src, uint32_t size) +{ + int err; + VkImage temp; + + FFVulkanDecodeShared *ctx = dec->shared_ctx; + DPXVulkanDecodeContext *dxv = ctx->sd_ctx; + VkPhysicalDeviceLimits *limits = &ctx->s.props.properties.limits; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) || + avctx->bits_per_raw_sample == 10; + if (unpack) + return 0; + + VkImageCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = avctx->bits_per_raw_sample == 8 ? VK_FORMAT_R8_UINT : + avctx->bits_per_raw_sample == 32 ? VK_FORMAT_R32_UINT : + VK_FORMAT_R16_UINT, + .extent.width = dpx->frame->width*dpx->components, + .extent.height = dpx->frame->height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .pQueueFamilyIndices = &ctx->qf[0].idx, + .queueFamilyIndexCount = 1, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + if (create_info.extent.width >= limits->maxImageDimension2D || + create_info.extent.height >= limits->maxImageDimension2D) + return 0; + + vk->CreateImage(ctx->s.hwctx->act_dev, &create_info, ctx->s.hwctx->alloc, + &temp); + + err = ff_vk_get_pooled_buffer(&ctx->s, &dxv->frame_data_pool, + &vp->slices_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + + FFVkBuffer *vkb = (FFVkBuffer *)vp->slices_buf->data; + VkBindImageMemoryInfo bind_info = { + .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, + .image = temp, + .memory = vkb->mem, + }; + vk->BindImageMemory2(ctx->s.hwctx->act_dev, 1, &bind_info); + + VkHostImageLayoutTransitionInfo layout_change = { + .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO, + .image = temp, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.layerCount = 1, + .subresourceRange.levelCount = 1, + }; + vk->TransitionImageLayoutEXT(ctx->s.hwctx->act_dev, 1, &layout_change); + + VkMemoryToImageCopy copy_region = { + .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY, + .pHostPointer = src, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ dpx->frame->width*dpx->components, + dpx->frame->height, + 1 }, + }; + VkCopyMemoryToImageInfo copy_info = { + .sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO, + .flags = VK_HOST_IMAGE_COPY_MEMCPY_BIT_EXT, + .dstImage = temp, + .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL, + .regionCount = 1, + .pRegions = ©_region, + }; + vk->CopyMemoryToImageEXT(ctx->s.hwctx->act_dev, ©_info); + + vk->DestroyImage(ctx->s.hwctx->act_dev, temp, ctx->s.hwctx->alloc); + + return 0; +} + +static int vk_dpx_start_frame(AVCodecContext *avctx, + const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + DPXDecContext *dpx = avctx->priv_data; + + DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + if (ctx->s.extensions & FF_VK_EXT_HOST_IMAGE_COPY) + host_upoad_image(avctx, dec, dpx, buffer, size); + + /* Host map the frame data if supported */ + if (!vp->slices_buf && + ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) + ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, (uint8_t *)buffer, + buffer_ref, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + + /* Prepare frame to be used */ + err = ff_vk_decode_prepare_frame_sdr(dec, dpx->frame, vp, 1, + FF_VK_REP_NATIVE, 0); + if (err < 0) + return err; + + return 0; +} + +static int vk_dpx_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + DPXDecContext *dpx = avctx->priv_data; + + DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + if (!vp->slices_buf) { + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + NULL, NULL); + if (err < 0) + return err; + } + + return 0; +} + +static int vk_dpx_end_frame(AVCodecContext *avctx) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + DPXDecContext *dpx = avctx->priv_data; + DPXVulkanDecodeContext *dxv = ctx->sd_ctx; + + DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; + + VkImageMemoryBarrier2 img_bar[8]; + int nb_img_bar = 0; + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + + /* Prepare deps */ + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, dpx->frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + dpx->frame); + if (err < 0) + return err; + + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); + vp->slices_buf = NULL; + + AVVkFrame *vkf = (AVVkFrame *)dpx->frame->data[0]; + for (int i = 0; i < 4; i++) { + vkf->layout[i] = VK_IMAGE_LAYOUT_UNDEFINED; + vkf->access[i] = VK_ACCESS_2_NONE; + } + + ff_vk_frame_barrier(&ctx->s, exec, dpx->frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + nb_img_bar = 0; + + FFVulkanShader *shd = &dxv->shader; + ff_vk_shader_update_img_array(&ctx->s, exec, shd, + dpx->frame, vp->view.out, + 0, 0, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, shd, + 0, 1, 0, + slices_buf, + 0, slices_buf->size, + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&ctx->s, exec, shd); + + /* Update push data */ + DecodePushData pd = (DecodePushData) { + .stride = dpx->stride, + .need_align = dpx->need_align, + .padded_10bit = !dpx->unpadded_10bit, + }; + + ff_vk_shader_update_push_const(&ctx->s, exec, shd, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, + FFALIGN(dpx->frame->width, shd->lg_size[0])/shd->lg_size[0], + FFALIGN(dpx->frame->height, shd->lg_size[1])/shd->lg_size[1], + 1); + + err = ff_vk_exec_submit(&ctx->s, exec); + if (err < 0) + return err; + +fail: + return 0; +} + +static int init_shader(AVCodecContext *avctx, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd, int bits) +{ + int err; + DPXDecContext *dpx = avctx->priv_data; + FFVulkanDescriptorSetBinding *desc_set; + AVHWFramesContext *dec_frames_ctx; + dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + int planes = av_pix_fmt_count_planes(dec_frames_ctx->sw_format); + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "dpx", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 512, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, int stride; ); + GLSLC(1, int need_align; ); + GLSLC(1, int padded_10bit; ); + GLSLC(0, }; ); + GLSLC(0, ); + ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) || + avctx->bits_per_raw_sample == 10; + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_quali = "writeonly", + .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "readonly", + .buf_content = (unpack || bits == 32) ? "uint32_t data[];" : + bits == 8 ? "uint8_t data[];" : "uint16_t data[];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); + + if (dpx->endian) + GLSLC(0, #define BIG_ENDIAN ); + GLSLF(0, #define COMPONENTS (%i) ,dpx->components); + GLSLF(0, #define BITS_PER_COMP (%i) ,bits); + GLSLF(0, #define NB_IMAGES (%i) ,planes); + if (unpack) { + if (bits == 10) + GLSLC(0, #define PACKED_10BIT ); + GLSLD(ff_source_dpx_unpack_comp); + } else { + GLSLF(0, #define SHIFT (%i) ,FFALIGN(bits, 8) - bits); + GLSLF(0, #define TYPE uint%i_t ,FFALIGN(bits, 8)); + GLSLF(0, #define TYPE_VEC u%ivec4 ,FFALIGN(bits, 8)); + GLSLF(0, #define TYPE_REVERSE(x) (reverse%i(x)), FFALIGN(bits, 8)/8); + GLSLD(ff_source_dpx_copy_comp); + } + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static void vk_decode_dpx_uninit(FFVulkanDecodeShared *ctx) +{ + DPXVulkanDecodeContext *fv = ctx->sd_ctx; + + ff_vk_shader_free(&ctx->s, &fv->shader); + + av_buffer_pool_uninit(&fv->frame_data_pool); + + av_freep(&fv); +} + +static int vk_decode_dpx_init(AVCodecContext *avctx) +{ + int err; + DPXDecContext *dpx = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + + switch (dpx->pix_fmt) { + case AV_PIX_FMT_GRAY10: + case AV_PIX_FMT_GRAY12: + case AV_PIX_FMT_GBRAP10: + case AV_PIX_FMT_GBRAP12: + case AV_PIX_FMT_UYVY422: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUVA444P: + return AVERROR(ENOTSUP); + case AV_PIX_FMT_GBRP10: + if (dpx->unpadded_10bit) + return AVERROR(ENOTSUP); + /* fallthrough */ + default: + break; + } + + FFVkSPIRVCompiler *spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + err = ff_vk_decode_init(avctx); + if (err < 0) + return err; + + FFVulkanDecodeShared *ctx = dec->shared_ctx; + DPXVulkanDecodeContext *dxv = ctx->sd_ctx = av_mallocz(sizeof(*dxv)); + if (!dxv) { + err = AVERROR(ENOMEM); + goto fail; + } + + ctx->sd_ctx_free = &vk_decode_dpx_uninit; + + RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, + spv, &dxv->shader, avctx->bits_per_raw_sample)); + +fail: + spv->uninit(&spv); + + return err; +} + +static void vk_dpx_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *dev_ctx = _hwctx.nc; + + DPXVulkanDecodePicture *pp = data; + FFVulkanDecodePicture *vp = &pp->vp; + + ff_vk_decode_free_frame(dev_ctx, vp); +} + +const FFHWAccel ff_dpx_vulkan_hwaccel = { + .p.name = "dpx_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_DPX, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_dpx_start_frame, + .decode_slice = &vk_dpx_decode_slice, + .end_frame = &vk_dpx_end_frame, + .free_frame_priv = &vk_dpx_free_frame_priv, + .frame_priv_data_size = sizeof(DPXVulkanDecodePicture), + .init = &vk_decode_dpx_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +};