Commit 7ab59e35 authored by Seungha Yang's avatar Seungha Yang 🐑
Browse files

nvcodec: Add CUDA specific memory and bufferpool

parent 6a6aae24
......@@ -4,7 +4,9 @@ libgstnvcodec_la_SOURCES = \
plugin.c \
gstcudaloader.c
gstcudacontext.c \
gstcudautils.c
gstcudautils.c \
gstcudamemory.c \
gstcudabufferpool.c
if USE_NVDEC
libgstnvcodec_la_SOURCES += \
......@@ -29,7 +31,9 @@ noinst_HEADERS = \
gstcudaloader.h \
gstcuvidloader.h
gstcudacontext.h \
gstcudautils.h
gstcudautils.h \
gstcudamemory.h \
gstcudabufferpool.h
libgstnvcodec_la_CFLAGS = \
$(GST_PLUGINS_BAD_CFLAGS) \
......
/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstcudabufferpool.h"
#include "gstcudacontext.h"
#include "gstcudamemory.h"
GST_DEBUG_CATEGORY_STATIC (gst_cuda_buffer_pool_debug);
#define GST_CAT_DEFAULT gst_cuda_buffer_pool_debug
struct _GstCudaBufferPoolPrivate
{
GstCudaContext *context;
GstAllocator *allocator;
GstVideoInfo info;
gboolean add_videometa;
GstCudaMemoryTarget target;
};
#define gst_cuda_buffer_pool_parent_class parent_class
G_DEFINE_TYPE_WITH_PRIVATE (GstCudaBufferPool, gst_cuda_buffer_pool,
GST_TYPE_BUFFER_POOL);
static const gchar **
gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
{
static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT, NULL
};
return options;
}
static gboolean
gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config)
{
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
GstCaps *caps = NULL;
guint size, min_buffers, max_buffers;
GstAllocator *allocator = NULL;
GstAllocationParams params;
GstVideoInfo *info;
if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers,
&max_buffers))
goto wrong_config;
if (caps == NULL)
goto no_caps;
if (!gst_buffer_pool_config_get_allocator (config, &allocator, &params))
goto wrong_config;
/* now parse the caps from the config */
if (!gst_video_info_from_caps (&priv->info, caps))
goto wrong_caps;
info = &priv->info;
GST_LOG_OBJECT (pool, "%dx%d, caps %" GST_PTR_FORMAT,
GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), caps);
gst_clear_object (&priv->allocator);
if (allocator) {
if (!GST_IS_CUDA_ALLOCATOR (allocator)) {
goto wrong_allocator;
} else {
priv->allocator = gst_object_ref (allocator);
}
} else {
priv->allocator = gst_cuda_allocator_new (priv->context, priv->target);
if (G_UNLIKELY (priv->allocator == NULL))
goto no_allocator;
}
priv->add_videometa = gst_buffer_pool_config_has_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META);
/* FIXME: add align, videometa */
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (info),
min_buffers, max_buffers);
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
/* ERRORS */
wrong_config:
{
GST_WARNING_OBJECT (pool, "invalid config");
return FALSE;
}
no_caps:
{
GST_WARNING_OBJECT (pool, "no caps in config");
return FALSE;
}
wrong_caps:
{
GST_WARNING_OBJECT (pool,
"failed getting geometry from caps %" GST_PTR_FORMAT, caps);
return FALSE;
}
no_allocator:
{
GST_WARNING_OBJECT (pool, "Could not create new CUDA allocator");
return FALSE;
}
wrong_allocator:
{
GST_WARNING_OBJECT (pool, "Incorrect allocator type for this pool");
return FALSE;
}
}
static GstFlowReturn
gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
GstBufferPoolAcquireParams * params)
{
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
GstVideoInfo *info;
GstBuffer *cuda;
GstMemory *mem;
info = &priv->info;
cuda = gst_buffer_new ();
mem = gst_allocator_alloc (GST_ALLOCATOR_CAST (priv->allocator),
GST_VIDEO_INFO_SIZE (info), NULL);
if (mem == NULL) {
gst_buffer_unref (cuda);
GST_WARNING_OBJECT (pool, "Cannot create CUDA memory");
return GST_FLOW_ERROR;
}
gst_buffer_append_memory (cuda, mem);
if (priv->add_videometa) {
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
gst_buffer_add_video_meta_full (cuda, GST_VIDEO_FRAME_FLAG_NONE,
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
info->offset, info->stride);
}
*buffer = cuda;
return GST_FLOW_OK;
}
GstBufferPool *
gst_cuda_buffer_pool_new (GstCudaContext * context, GstCudaMemoryTarget target)
{
GstCudaBufferPool *pool;
pool = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
gst_object_ref_sink (pool);
pool->priv->context = gst_object_ref (context);
pool->priv->target = target;
GST_LOG_OBJECT (pool, "new CUDA buffer pool %p", pool);
return GST_BUFFER_POOL_CAST (pool);
}
static void
gst_cuda_buffer_pool_dispose (GObject * object)
{
GstCudaBufferPool *pool = GST_CUDA_BUFFER_POOL_CAST (object);
GstCudaBufferPoolPrivate *priv = pool->priv;
GST_LOG_OBJECT (pool, "finalize CUDA buffer pool %p", pool);
gst_clear_object (&priv->allocator);
gst_clear_object (&priv->context);
G_OBJECT_CLASS (parent_class)->dispose (object);
}
static void
gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass)
{
GObjectClass *gobject_class = (GObjectClass *) klass;
GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass;
gobject_class->dispose = gst_cuda_buffer_pool_dispose;
gstbufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
gstbufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
gstbufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0,
"CUDA Buffer Pool");
}
static void
gst_cuda_buffer_pool_init (GstCudaBufferPool * pool)
{
pool->priv = gst_cuda_buffer_pool_get_instance_private (pool);
}
/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_CUDA_BUFFER_POOL_H__
#define __GST_CUDA_BUFFER_POOL_H__
#include <gst/video/gstvideometa.h>
#include <gst/video/gstvideopool.h>
#include "gstcudamemory.h"
G_BEGIN_DECLS
#define GST_TYPE_CUDA_BUFFER_POOL (gst_cuda_buffer_pool_get_type ())
#define GST_CUDA_BUFFER_POOL(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj),GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPool))
#define GST_CUDA_BUFFER_POOL_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPoolClass))
#define GST_CUDA_BUFFER_POOL_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPoolClass))
#define GST_IS_CUDA_BUFFER_POOL(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj),GST_TYPE_CUDA_BUFFER_POOL))
#define GST_IS_CUDA_BUFFER_POOL_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_CUDA_BUFFER_POOL))
#define GST_CUDA_BUFFER_POOL_CAST(obj) ((GstCudaBufferPool*)(obj))
typedef struct _GstCudaBufferPool GstCudaBufferPool;
typedef struct _GstCudaBufferPoolClass GstCudaBufferPoolClass;
typedef struct _GstCudaBufferPoolPrivate GstCudaBufferPoolPrivate;
/*
* GstCudaBufferPool:
*/
struct _GstCudaBufferPool
{
GstBufferPool parent;
GstCudaBufferPoolPrivate *priv;
};
/*
* GstCudaBufferPoolClass:
*/
struct _GstCudaBufferPoolClass
{
GstBufferPoolClass parent_class;
};
GType gst_cuda_buffer_pool_get_type (void);
GstBufferPool * gst_cuda_buffer_pool_new (GstCudaContext * context,
GstCudaMemoryTarget target);
G_END_DECLS
#endif /* __GST_CUDA_BUFFER_POOL_H__ */
......@@ -58,9 +58,14 @@ typedef struct _GstNvCodecCudaVTable
CUresult (*CuMemAlloc) (CUdeviceptr * dptr, unsigned int bytesize);
CUresult (*CuMemAllocPitch) (CUdeviceptr * dptr, size_t * pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
CUresult (*CuMemAllocHost) (void **pp, unsigned int bytesize);
CUresult (*CuMemcpy2D) (const CUDA_MEMCPY2D * pCopy);
CUresult (*CuMemcpy2DAsync) (const CUDA_MEMCPY2D *pCopy, CUstream hStream);
CUresult (*CuMemcpyHtoD) (CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount);
CUresult (*CuMemcpyDtoH) (void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount);
CUresult (*CuMemcpyDtoD) (CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount);
CUresult (*CuMemFree) (CUdeviceptr dptr);
CUresult (*CuMemFreeHost) (void *p);
CUresult (*CuStreamSynchronize) (CUstream hStream);
......@@ -113,9 +118,14 @@ gst_cuda_load_library (void)
LOAD_SYMBOL (cuMemAlloc, CuMemAlloc);
LOAD_SYMBOL (cuMemAllocPitch, CuMemAllocPitch);
LOAD_SYMBOL (cuMemAllocHost, CuMemAllocHost);
LOAD_SYMBOL (cuMemcpy2D, CuMemcpy2D);
LOAD_SYMBOL (cuMemcpy2DAsync, CuMemcpy2DAsync);
LOAD_SYMBOL (cuMemcpyHtoD, CuMemcpyHtoD);
LOAD_SYMBOL (cuMemcpyDtoH, CuMemcpyDtoH);
LOAD_SYMBOL (cuMemcpyDtoD, CuMemcpyDtoD);
LOAD_SYMBOL (cuMemFree, CuMemFree);
LOAD_SYMBOL (cuMemFreeHost, CuMemFreeHost);
LOAD_SYMBOL (cuStreamSynchronize, CuStreamSynchronize);
......@@ -274,6 +284,15 @@ CuMemAllocPitch (CUdeviceptr * dptr, size_t * pPitch, size_t WidthInBytes,
ElementSizeBytes);
}
CUresult
CuMemAllocHost (void **pp, unsigned int bytesize)
{
g_assert (gst_cuda_vtable != NULL);
g_assert (gst_cuda_vtable->CuMemAllocHost != NULL);
return gst_cuda_vtable->CuMemAllocHost (pp, bytesize);
}
CUresult
CuMemcpy2D (const CUDA_MEMCPY2D * pCopy)
{
......@@ -292,6 +311,35 @@ CuMemcpy2DAsync (const CUDA_MEMCPY2D * pCopy, CUstream hStream)
return gst_cuda_vtable->CuMemcpy2DAsync (pCopy, hStream);
}
CUresult
CuMemcpyHtoD (CUdeviceptr dstDevice, const void *srcHost,
unsigned int ByteCount)
{
g_assert (gst_cuda_vtable != NULL);
g_assert (gst_cuda_vtable->CuMemcpyHtoD != NULL);
return gst_cuda_vtable->CuMemcpyHtoD (dstDevice, srcHost, ByteCount);
}
CUresult
CuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount)
{
g_assert (gst_cuda_vtable != NULL);
g_assert (gst_cuda_vtable->CuMemcpyDtoH != NULL);
return gst_cuda_vtable->CuMemcpyDtoH (dstHost, srcDevice, ByteCount);
}
CUresult
CuMemcpyDtoD (CUdeviceptr dstDevice, CUdeviceptr srcDevice,
unsigned int ByteCount)
{
g_assert (gst_cuda_vtable != NULL);
g_assert (gst_cuda_vtable->CuMemcpyDtoD != NULL);
return gst_cuda_vtable->CuMemcpyDtoD (dstDevice, srcDevice, ByteCount);
}
CUresult
CuMemFree (CUdeviceptr dptr)
{
......@@ -301,6 +349,15 @@ CuMemFree (CUdeviceptr dptr)
return gst_cuda_vtable->CuMemFree (dptr);
}
CUresult
CuMemFreeHost (void *p)
{
g_assert (gst_cuda_vtable != NULL);
g_assert (gst_cuda_vtable->CuMemFreeHost != NULL);
return gst_cuda_vtable->CuMemFreeHost (p);
}
CUresult
CuStreamSynchronize (CUstream hStream)
{
......
......@@ -44,9 +44,14 @@ CUresult CuGraphicsUnregisterResource (CUgraphicsResource resource);
CUresult CuMemAlloc (CUdeviceptr *dptr, unsigned int bytesize);
CUresult CuMemAllocPitch (CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
CUresult CuMemAllocHost (void **pp, unsigned int bytesize);
CUresult CuMemcpy2D (const CUDA_MEMCPY2D *pCopy);
CUresult CuMemcpy2DAsync (const CUDA_MEMCPY2D *pCopy, CUstream hStream);
CUresult CuMemcpyHtoD (CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount);
CUresult CuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount);
CUresult CuMemcpyDtoD (CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount);
CUresult CuMemFree (CUdeviceptr dptr);
CUresult CuMemFreeHost (void *p);
CUresult CuStreamSynchronize (CUstream hStream);
......
/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstcudamemory.h"
#include "gstcudautils.h"
#include <string.h>
GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug);
#define GST_CAT_DEFAULT cudaallocator_debug
GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY);
#define gst_cuda_allocator_parent_class parent_class
G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
static void gst_cuda_allocator_dispose (GObject * object);
static GstMemory *gst_cuda_allocator_alloc (GstAllocator * allocator,
gsize size, GstAllocationParams * params);
static void gst_cuda_allocator_free (GstAllocator * allocator,
GstMemory * memory);
static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize,
GstMapFlags flags);
static void cuda_mem_unmap (GstCudaMemory * mem);
static GstMemory *cuda_mem_share (GstCudaMemory * mem, gssize offset,
gssize size);
static gboolean cuda_mem_is_span (GstCudaMemory * mem1, GstCudaMemory * mem2,
gsize * offset);
static void
gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
{
GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
gobject_class->dispose = gst_cuda_allocator_dispose;
allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_alloc);
allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0,
"CUDA Allocator");
GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY");
}
static void
gst_cuda_allocator_init (GstCudaAllocator * allocator)
{
GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator);
GST_DEBUG_OBJECT (allocator, "init");
alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map;
alloc->mem_unmap = (GstMemoryUnmapFunction) cuda_mem_unmap;
alloc->mem_share = (GstMemoryShareFunction) cuda_mem_share;
alloc->mem_is_span = (GstMemoryIsSpanFunction) cuda_mem_is_span;
/* Use the default, fallback copy function */
GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
}
static void
gst_cuda_allocator_dispose (GObject * object)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object);
GST_DEBUG_OBJECT (self, "dispose");
gst_clear_object (&self->context);
G_OBJECT_CLASS (parent_class)->dispose (object);
}
static GstCudaMemory *
gst_cuda_allocator_memory_new (GstCudaAllocator * self,
GstMemoryFlags flags, gsize maxsize, gsize align, gsize offset, gsize size,
GstCudaMemoryTarget target)
{
gpointer data;
gsize padding;
gboolean ret = FALSE;
GstCudaMemory *mem;
if (!gst_cuda_context_push (self->context))
return NULL;
/* ensure configured alignment */
align |= gst_memory_alignment;
/* allocate more to compensate for alignment */
maxsize += align;
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self,
"allocate new cuda memory with target %d", target);
switch (target) {
case GST_CUDA_MEMORY_TARGET_HOST:
ret = gst_cuda_result (CuMemAllocHost (&data, maxsize));
break;
case GST_CUDA_MEMORY_TARGET_DEVICE:
ret = gst_cuda_result (CuMemAlloc ((CUdeviceptr *) & data, maxsize));
break;
default:
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self,
"unknown CUDA memory type %d", target);
return NULL;
}
if (G_UNLIKELY (!ret)) {
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self,
"CUDA allocation failure for target %d", target);
return NULL;
}
mem = g_slice_new0 (GstCudaMemory);
mem->alloc_data = data;
mem->target = target;
g_mutex_init (&mem->lock);
/* alignment makes sense only for host memory */
if (target == GST_CUDA_MEMORY_TARGET_HOST) {
gsize aoffset;
guint8 *align_data;
align_data = data;
if ((aoffset = ((guintptr) align_data & align))) {
aoffset = (align + 1) - aoffset;
align_data += aoffset;
maxsize -= aoffset;
}
if (offset && (flags & GST_MEMORY_FLAG_ZERO_PREFIXED))
memset (align_data, 0, offset);
padding = maxsize - (offset + size);
if (padding && (flags & GST_MEMORY_FLAG_ZERO_PADDED))
memset (align_data + offset + size, 0, padding);
mem->data = align_data;
} else {
mem->data = data;
}
gst_cuda_context_pop ();
gst_memory_init (GST_MEMORY_CAST (mem),
flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size);
return mem;
}
static GstMemory *
gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size,
GstAllocationParams * params)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
gsize maxsize = size + params->prefix + params->padding;
return (GstMemory *) gst_cuda_allocator_memory_new (self,
params->flags, maxsize, params->align, params->prefix, size,
self->default_target);
}
static void
gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory");