Commit a092ab40 authored by Manuel Stoeckl's avatar Manuel Stoeckl
Browse files

Use VAAPI postprocessing to copy video output to DMABUF

A filterless VAAPI postprocessing pipeline is used to do a format
conversion to a VASurface created from the target DMABUF.

This method keeps image data on the GPU, and can be significantly
faster than using sws_scale and transferring data to/from the GPU.

This change adds a new dependency on libva.
parent fc3d789f
Pipeline #47937 passed with stages
in 46 seconds
......@@ -49,11 +49,11 @@ Requirements:
* libgbm (optional, to support programs using OpenGL via DMABUFs)
* libdrm (optional, same as for libgbm)
* ffmpeg (optional, >=3.1, needs avcodec/avutil/swscale for lossy video encoding)
* libva (optional, for hardware video encoding and decoding)
* scdoc (optional, to generate a man page)
* sys/sdt.h (optional, to provide static tracepoints for profiling)
* ssh (runtime, OpenSSH >= 6.7, for Unix domain socket forwarding)
* libx264 (ffmpeg runtime, for software video decoding and encoding)
* a VAAPI driver (ffmpeg runtime, for hardware video decoding and encoding)
[0] [https://mesonbuild.com/](https://mesonbuild.com/)
[1] [https://git.sr.ht/~sircmpwn/scdoc](https://git.sr.ht/~sircmpwn/scdoc)
......
......@@ -61,6 +61,10 @@ libswscale = dependency('libswscale', required: get_option('with_video'))
if libavcodec.found() and libavutil.found() and libswscale.found()
add_project_arguments('-DHAS_VIDEO=1', language: 'c')
endif
libva = dependency('libva', required: get_option('with_vaapi'))
if libva.found()
add_project_arguments('-DHAS_VAAPI=1', language: 'c')
endif
subdir('protocols')
......@@ -73,6 +77,7 @@ waypipe_dependencies = [
pthreads, # To run expensive computations in parallel
protos, # Wayland protocol data
rt, # For shared memory
libva, # For NV12->RGB conversions
]
waypipe_includes = []
......
......@@ -3,3 +3,4 @@ option('with_video', type : 'feature', value : 'auto', description : 'Link with
option('with_dmabuf', type : 'feature', value : 'auto', description : 'Support DMABUFs, the file descriptors used to exchange data for e.g. OpenGL applications')
option('with_lz4', type : 'feature', value : 'auto', description : 'Support LZ4 as a compression mechanism')
option('with_zstd', type : 'feature', value : 'auto', description : 'Support ZStandard as a compression mechanism')
option('with_vaapi', type : 'feature', value : 'auto', description : 'Link with libva and use VAAPI to perform hardware video output color space conversions on GPU')
......@@ -1409,7 +1409,7 @@ void create_from_update(struct fd_translation_map *map,
// Apply first frame, if available
if (block->size > sizeof(struct dmabuf_slice_data)) {
apply_video_packet(sfd,
apply_video_packet(sfd, render,
block->size - sizeof(struct dmabuf_slice_data),
block->data + sizeof(struct dmabuf_slice_data));
} else {
......@@ -1657,7 +1657,7 @@ void apply_update(struct fd_translation_map *map, struct render_data *render,
return;
}
apply_video_packet(sfd, block->size, block->data);
apply_video_packet(sfd, render, block->size, block->data);
} else if (sfd->type == FDC_DMAVID_IR) {
wp_error("Did not expect any messages updating a read-only video channel, rid=%d",
sfd->remote_id);
......
......@@ -123,6 +123,9 @@ struct bytebuf_stack {
int size, count;
};
typedef void *VADisplay;
typedef unsigned int VAGenericID;
typedef VAGenericID VAConfigID;
struct render_data {
bool disabled;
int drm_fd;
......@@ -132,6 +135,8 @@ struct render_data {
bool av_disabled;
struct AVBufferRef *av_hwdevice_ref;
struct AVBufferRef *av_drmdevice_ref;
VADisplay av_vadisplay;
VAConfigID av_copy_config;
};
enum thread_task {
......@@ -296,6 +301,10 @@ void reset_damage(struct damage *base);
/** Expand damage to cover everything */
void damage_everything(struct damage *base);
typedef VAGenericID VAContextID;
typedef VAGenericID VASurfaceID;
typedef VAGenericID VABufferID;
struct shadow_fd {
struct shadow_fd *next; // singly-linked list
fdcat_t type;
......@@ -357,6 +366,10 @@ struct shadow_fd {
struct SwsContext *video_color_context;
char *video_buffer;
int64_t video_frameno;
VASurfaceID video_va_surface;
VAContextID video_va_context;
VABufferID video_va_pipeline;
};
#define FILE_SIZE_EXTEND_FLAG (1u << 31)
......@@ -617,7 +630,8 @@ void setup_video_decode(struct shadow_fd *sfd, struct render_data *rd);
void collect_video_from_mirror(struct shadow_fd *sfd,
struct transfer_stack *transfers, struct bytebuf_stack *blocks,
bool first);
void apply_video_packet(struct shadow_fd *sfd, size_t size, const char *data);
void apply_video_packet(struct shadow_fd *sfd, struct render_data *rd,
size_t size, const char *data);
/** All return pointers can be NULL. Determines how much extra space or
* padded width/height is needed for a video frame */
void pad_video_mirror_size(int width, int height, int stride, int *new_width,
......
......@@ -52,8 +52,16 @@ void pad_video_mirror_size(int width, int height, int stride, int *new_width,
(void)new_min_size;
}
void destroy_video_data(struct shadow_fd *sfd) { (void)sfd; }
void setup_video_encode(struct shadow_fd *sfd) { (void)sfd; }
void setup_video_decode(struct shadow_fd *sfd) { (void)sfd; }
void setup_video_encode(struct shadow_fd *sfd, struct render_data *rd)
{
(void)sfd;
(void)rd;
}
void setup_video_decode(struct shadow_fd *sfd, struct render_data *rd)
{
(void)sfd;
(void)rd;
}
void collect_video_from_mirror(struct shadow_fd *sfd,
struct transfer_stack *transfers, struct bytebuf_stack *blocks,
bool first)
......@@ -63,8 +71,10 @@ void collect_video_from_mirror(struct shadow_fd *sfd,
(void)blocks;
(void)first;
}
void apply_video_packet(struct shadow_fd *sfd, size_t size, const char *data)
void apply_video_packet(struct shadow_fd *sfd, struct render_data *rd,
size_t size, const char *data)
{
(void)rd;
(void)sfd;
(void)size;
(void)data;
......@@ -82,6 +92,12 @@ void apply_video_packet(struct shadow_fd *sfd, size_t size, const char *data)
#include <libswscale/swscale.h>
#include <unistd.h>
#ifdef HAS_VAAPI
#include <libavutil/hwcontext_vaapi.h>
#include <va/va_drmcommon.h>
#include <va/va_vpp.h>
#endif
/* these are equivalent to the GBM formats */
#include <libdrm/drm_fourcc.h>
......@@ -186,9 +202,180 @@ void setup_video_logging()
av_log_set_callback(video_log_callback);
}
#ifdef HAS_VAAPI
static int setup_vaapi_pipeline(struct shadow_fd *sfd, struct render_data *rd)
{
VADisplay vadisp = rd->av_vadisplay;
unsigned long buffer_val = (unsigned long)sfd->fd_local;
VASurfaceAttribExternalBuffers buffer_desc;
/* so, we create a vaSurface from the existing buffer
* structure, hence won't need to deallocate at all. All
* additional surfaces written into will also be DMABUF
* surfaces; we guarantee disjointness by the client only
* updating a surface after release */
buffer_desc.num_buffers = 1;
buffer_desc.buffers = &buffer_val;
/* only very few surface types supported */
buffer_desc.pixel_format = VA_FOURCC_BGRX;
buffer_desc.flags = 0;
buffer_desc.width = sfd->video_context->width;
buffer_desc.height = sfd->video_context->height;
buffer_desc.data_size = sfd->buffer_size;
buffer_desc.num_planes = 1;
buffer_desc.offsets[0] = sfd->dmabuf_info.offsets[0];
buffer_desc.pitches[0] = sfd->dmabuf_info.strides[0];
VASurfaceAttrib attribs[3];
attribs[0].type = VASurfaceAttribPixelFormat;
attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
attribs[0].value.type = VAGenericValueTypeInteger;
attribs[0].value.value.i = VA_FOURCC_RGBX;
attribs[1].type = VASurfaceAttribMemoryType;
attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
attribs[1].value.type = VAGenericValueTypeInteger;
attribs[1].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;
attribs[2].type = VASurfaceAttribExternalBufferDescriptor;
attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE;
attribs[2].value.type = VAGenericValueTypePointer;
attribs[2].value.value.p = &buffer_desc;
sfd->video_va_surface = 0;
sfd->video_va_context = 0;
sfd->video_va_pipeline = 0;
VAStatus stat = vaCreateSurfaces(vadisp, VA_RT_FORMAT_RGB32,
buffer_desc.width, buffer_desc.height,
&sfd->video_va_surface, 1, attribs, 3);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Create surface failed: %s", vaErrorStr(stat));
sfd->video_va_surface = 0;
return -1;
}
stat = vaCreateContext(vadisp, rd->av_copy_config, buffer_desc.width,
buffer_desc.height, 0, &sfd->video_va_surface, 1,
&sfd->video_va_context);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Create context failed %s", vaErrorStr(stat));
vaDestroySurfaces(vadisp, &sfd->video_va_surface, 1);
sfd->video_va_surface = 0;
sfd->video_va_context = 0;
return -1;
}
stat = vaCreateBuffer(vadisp, sfd->video_va_context,
VAProcPipelineParameterBufferType,
sizeof(VAProcPipelineParameterBuffer), 1, NULL,
&sfd->video_va_pipeline);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Failed to create pipeline buffer: %s",
vaErrorStr(stat));
vaDestroySurfaces(vadisp, &sfd->video_va_surface, 1);
vaDestroyContext(vadisp, sfd->video_va_context);
sfd->video_va_surface = 0;
sfd->video_va_context = 0;
sfd->video_va_pipeline = 0;
return -1;
}
return 0;
}
static void cleanup_vaapi_pipeline(struct shadow_fd *sfd)
{
if (!sfd->video_context) {
return;
}
if (!sfd->video_context->hw_device_ctx) {
return;
}
AVHWDeviceContext *vwdc =
(AVHWDeviceContext *)
sfd->video_context->hw_device_ctx->data;
if (vwdc->type != AV_HWDEVICE_TYPE_VAAPI) {
return;
}
AVVAAPIDeviceContext *vdctx = (AVVAAPIDeviceContext *)vwdc->hwctx;
VADisplay vadisp = vdctx->display;
if (sfd->video_va_surface) {
vaDestroySurfaces(vadisp, &sfd->video_va_surface, 1);
sfd->video_va_surface = 0;
}
if (sfd->video_va_context) {
vaDestroyContext(vadisp, sfd->video_va_context);
sfd->video_va_context = 0;
}
if (sfd->video_va_pipeline) {
vaDestroyBuffer(vadisp, sfd->video_va_pipeline);
sfd->video_va_pipeline = 0;
}
}
static void run_vaapi_conversion(struct shadow_fd *sfd, struct render_data *rd,
struct AVFrame *va_frame)
{
VADisplay vadisp = rd->av_vadisplay;
if (va_frame->format != AV_PIX_FMT_VAAPI) {
wp_error("Non-vaapi pixel format: %s",
av_get_pix_fmt_name(va_frame->format));
}
VASurfaceID src_surf = (VASurfaceID)(ptrdiff_t)va_frame->data[3];
int stat = vaBeginPicture(
vadisp, sfd->video_va_context, sfd->video_va_surface);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Begin picture config failed: %s", vaErrorStr(stat));
}
VAProcPipelineParameterBuffer *pipeline_param;
stat = vaMapBuffer(vadisp, sfd->video_va_pipeline,
(void **)&pipeline_param);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Failed to map pipeline buffer: %s", vaErrorStr(stat));
}
pipeline_param->surface = src_surf;
pipeline_param->surface_region = NULL;
pipeline_param->output_region = NULL;
pipeline_param->output_background_color = 0;
pipeline_param->filter_flags = VA_FILTER_SCALING_FAST;
pipeline_param->filters = NULL;
pipeline_param->filters = 0;
stat = vaUnmapBuffer(vadisp, sfd->video_va_pipeline);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Failed to unmap pipeline buffer: %s",
vaErrorStr(stat));
}
stat = vaRenderPicture(vadisp, sfd->video_va_context,
&sfd->video_va_pipeline, 1);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Failed to render picture: %s", vaErrorStr(stat));
}
stat = vaEndPicture(vadisp, sfd->video_va_context);
if (stat != VA_STATUS_SUCCESS) {
wp_error("End picture failed: %s", vaErrorStr(stat));
}
stat = vaSyncSurface(vadisp, sfd->video_va_surface);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Sync surface failed: %s", vaErrorStr(stat));
}
}
#endif
void destroy_video_data(struct shadow_fd *sfd)
{
if (sfd->video_context) {
#ifdef HAS_VAAPI
cleanup_vaapi_pipeline(sfd);
#endif
/* free contexts (which, theoretically, could have hooks into
* frames/packets) first */
avcodec_free_context(&sfd->video_context);
......@@ -256,6 +443,10 @@ int init_hwcontext(struct render_data *rd)
return -1;
}
rd->av_vadisplay = 0;
rd->av_copy_config = 0;
rd->av_drmdevice_ref = NULL;
// Q: what does this even do?
rd->av_drmdevice_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
if (!rd->av_drmdevice_ref) {
......@@ -269,6 +460,8 @@ int init_hwcontext(struct render_data *rd)
dctx->fd = rd->drm_fd;
if (av_hwdevice_ctx_init(rd->av_drmdevice_ref)) {
wp_error("Failed to initialize AV DRM device context");
rd->av_disabled = true;
return -1;
}
/* We create a derived context here, to ensure that the drm fd matches
......@@ -279,14 +472,43 @@ int init_hwcontext(struct render_data *rd)
AV_HWDEVICE_TYPE_VAAPI, rd->av_drmdevice_ref,
0) < 0) {
wp_error("Failed to create VAAPI hardware device");
rd->av_disabled = true;
return -1;
}
#ifdef HAS_VAAPI
AVHWDeviceContext *vwdc =
(AVHWDeviceContext *)rd->av_hwdevice_ref->data;
AVVAAPIDeviceContext *vdctx = (AVVAAPIDeviceContext *)vwdc->hwctx;
if (!vdctx) {
wp_error("No vaapi device context");
rd->av_disabled = true;
return -1;
}
rd->av_vadisplay = vdctx->display;
int stat = vaCreateConfig(rd->av_vadisplay, VAProfileNone,
VAEntrypointVideoProc, NULL, 0, &rd->av_copy_config);
if (stat != VA_STATUS_SUCCESS) {
wp_error("Create config failed: %s", vaErrorStr(stat));
rd->av_disabled = true;
return -1;
}
#endif
return 0;
}
void cleanup_hwcontext(struct render_data *rd)
{
rd->av_disabled = true;
#if HAS_VAAPI
if (rd->av_vadisplay && rd->av_copy_config) {
vaDestroyConfig(rd->av_vadisplay, rd->av_copy_config);
}
#endif
if (rd->av_hwdevice_ref) {
av_buffer_unref(&rd->av_hwdevice_ref);
}
......@@ -807,7 +1029,8 @@ static void setup_color_conv(struct shadow_fd *sfd, struct AVFrame *cpu_frame)
sfd->video_color_context = sws;
}
void apply_video_packet(struct shadow_fd *sfd, size_t size, const char *data)
void apply_video_packet(struct shadow_fd *sfd, struct render_data *rd,
size_t size, const char *data)
{
// padding, requires zerod overflow for read
sfd->video_packet->data = (uint8_t *)data;
......@@ -825,6 +1048,24 @@ void apply_video_packet(struct shadow_fd *sfd, size_t size, const char *data)
sfd->video_context, sfd->video_yuv_frame);
if (recvstat == 0) {
struct AVFrame *cpu_frame = sfd->video_yuv_frame;
#if HAS_VAAPI
if (rd->av_vadisplay &&
sfd->video_yuv_frame->format ==
AV_PIX_FMT_VAAPI) {
if (!sfd->video_va_surface) {
setup_vaapi_pipeline(sfd, rd);
}
/* if setup successful, run conversion */
if (sfd->video_va_surface) {
run_vaapi_conversion(sfd, rd,
sfd->video_yuv_frame);
continue;
}
}
#else
(void)rd;
#endif
if (sfd->video_yuv_frame->format == AV_PIX_FMT_VAAPI) {
if (!sfd->video_tmp_frame) {
sfd->video_tmp_frame = av_frame_alloc();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment