qtmux (mp4mux) deadlock in 1.14.4 and 1.14.5
I have a problem with using gstreamer plugin named 'qtmux' in my high-loaded recording application. It seems that in some cases this plugin causes a deadlock:
pipeline: appsrc name=video_in ! videoscale ! video/x-raw,width=1280,height=360 ! vaapih264enc rate-control=cbr keyframe-period=300 bitrate=1000 max-bframes=0 name=hrencoder ! h264parse config-interval=-1 ! muxer.video_0 appsrc name=audio_in ! audioresample ! audioconvert ! voaacenc ! muxer.audio_0 qtmux name=muxer ! filesink name=recorder_out
gdb thread info:
...
383 Thread 0x7fffb11b0700 (LWP 380) "freeswitch" __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:103
...
429 Thread 0x7fff7747c700 (LWP 426) "audio_in:src" syscall () at ../sysdeps/unix/sysv/linux/x86_64
/syscall.S:38
...
thr 383:
#0 __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:103
#1 0x00007ffff79de2a3 in __GI___pthread_mutex_lock (mutex=0x7fffdc462760) at ../nptl/pthread_mutex_lock.c:115
#2 0x00007fffe5e3b0f8 in g_rec_mutex_lock (mutex=<optimized out>) at /usr/src/debug/dev-libs/glib-2.58.3/glib-
2.58.3/glib/gthread-posix.c:308
#3 0x00007fffe58d586d in gst_base_src_send_event (element=0x7fffdc0d8d10, event=0x7fffd8334420) at /usr/src/debug/media-libs/gstreamer-1.14.5/gstreamer-1.14.5/libs/gst/base/gstbasesrc.c:1859
#4 0x00007fffe5cc389f in gst_element_send_event (element=element@entry=0x7fffdc0d8d10, event=event@entry=0x7fffd8334420) at /usr/src/debug/media-libs/gstreamer-1.14.5/gstreamer-1.14.5
/gst/gstelement.c:1857
I got owner of the mutex:
(gdb) f 1
#1 0x00007ffff79de2a3 in __GI___pthread_mutex_lock (mutex=0x7fffdc462760) at ../nptl/pthread_mutex_lock.c:115
115 LLL_MUTEX_LOCK (mutex);
(gdb) p *mutex
$3418 = {__data = {__lock = 2, __count = 1, __owner = 426, __nusers = 1, __kind = 1, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\001\000\000\000\252\001\000\000\001\000\000\000\001", '\000' <repeats 22 times>, __align = 4294967298}
As you can see, the owner of this mutex __owner == 426 (audio_in:src)
thr 429 (LWP 426):
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1 0x00007fffe5e3b95f in g_cond_wait (cond=0x7fffe031e220, mutex=0x7fffe031e218) at /usr/src/debug/dev-libs/glib-2.58.3/glib-2.58.3/glib/gthread-posix.c:1402
#2 0x00007fffe58e7843 in gst_collect_pads_chain (pad=0x7fffc009e8d0, parent=<optimized out>, buffer=0x7fffdc0b4d40) at /usr/src/debug/media-libs/gstreamer-1.14.5/gstreamer-1.14.5/libs/gst/base/gstcollectpads.c:2252
#3 0x00007fffe5cdd87a in gst_pad_chain_data_unchecked (data=0x7fffdc0b4d40, type=4112, pad=0x7fffc009e8d0) at /usr/src/debug/media-libs/gstreamer-1.14.5/gstreamer-1.14.5/gst/gstpad.c:4322
#4 gst_pad_push_data (pad=pad@entry=0x7fffd45fb430, type=type@entry=4112, data=data@entry=0x7fffdc0b4d40) at /usr/src/debug/media-libs/gstreamer-1.14.5/gstreamer-1.14.5/gst/gstpad.c:4578
#5 0x00007fffe5ce5b62 in gst_pad_push (pad=0x7fffd45fb430, buffer=0x7fffdc0b4d40) at /usr/src/debug/media-libs/gstreamer-1.14.5/gstreamer-1.14.5/gst/gstpad.c:4697
I found this place in gstreamer code:
/* wait to be collected, this must happen from another thread triggered
* by the _chain function of another pad. We release the lock so we
* can get stopped or flushed as well. We can however not get EOS
* because we still hold the STREAM_LOCK.
*/
GST_COLLECT_PADS_STREAM_UNLOCK (pads);
**GST_COLLECT_PADS_EVT_WAIT (pads, cookie);**
GST_COLLECT_PADS_STREAM_LOCK (pads);
`
And we hang in the muxer's chain:
`
(gdb) f 3
#3 0x00007fffe5cdd87a in gst_pad_chain_data_unchecked (data=0x7fffdc0b4d40, type=4112, pad=0x7fffc009e8d0) at /usr/src/debug/media-libs/gstreamer-1.14.5/gstreamer-1.14.5/gst/gstpad.c:4322
4322 ret = chainfunc (pad, parent, GST_BUFFER_CAST (data));
(gdb) p *pad->object->parent
$3422 = {object = {g_type_instance = {g_class = 0x7fffd43164f0}, ref_count = 5, qdata = 0x7fffdc0da1d0}, lock = {p = 0x0, i = {0, 0}}, name = 0x7fffdc4995e0 "muxer", parent = 0x7fffdc09cda0, flags = 0, control_bindings = 0x0, control_rate = 100000000, last_sync = 18446744073709551615, _gst_reserved = 0x0
GST_COLLECT_PADS_EVT_WAIT (pads, cookie) never released in some cases when it waits for the muxer. It seems that it depends on cpu or thread load (incorrect sending of signal by using gst_collectpads_... function?) Here is code to reproduce problem (reproduces very rarely, some latency to g_cond_broadcast with correct cond, e.g. in gdb, may help)
#include <stdio.h>
#include <gst/gst.h>
#include <gst/gstparse.h>
#include <glib.h>
#include <glib/gerror.h>
#include <pthread.h>
#include <unistd.h>
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
void *start_new_pipeline(void *data) {
GError * error = NULL;
pthread_t pself = pthread_self();
char pipe_str[4096] = {0};
sprintf(pipe_str, "videotestsrc do-timestamp=TRUE is-live=TRUE num-buffers=-1 !"
"videoscale !"
" video/x-raw,width=1280,height=360 !"
" vaapih264enc rate-control=cbr keyframe-period=300 bitrate=1000 max-bframes=0 name=hrencoder !"
" h264parse config-interval=-1 !"
" muxer.video_0 "
//
" audiotestsrc do-timestamp=TRUE is-live=TRUE num-buffers=-1 ! "
" audio/x-raw, format=S16LE, rate=8000, channels=1 !"
" audioresample !"
" audioconvert !"
" voaacenc !"
" muxer.audio_0"
//
" qtmux name=muxer ! filesink location=%zu.mp4", pself);
GstElement * pipeline = gst_parse_launch(pipe_str, &error);
if(!pipeline || error) {
printf("Cannot create pipeline (%d): %s\n", error->code, error->message);
exit(1);
}
GstBus * bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
if(!bus) {
printf("Cannot get bus\n");
exit(2);
}
GstStateChangeReturn state = gst_element_set_state(GST_ELEMENT(pipeline), GST_STATE_PLAYING);
if(state == GST_STATE_CHANGE_FAILURE) {
printf("State change failure\n");
exit(3);
}
pthread_mutex_lock(&mutex);
int ret = pthread_cond_wait(&cond, &mutex);
//printf("condwait ret = %d\n", ret);
pthread_mutex_unlock(&mutex);
// problem here >>>> eos stucks sometimes due to collect pads (muxer)
gboolean eret = gst_element_send_event(pipeline, gst_event_new_eos());
if(!eret) {
printf("Cannot send eos message\n");
exit(5);
}
GstMessage * message = gst_bus_poll(bus, GST_MESSAGE_EOS, GST_SECOND * 5);
if(!message) {
printf("Cannot get EOS message\n");
exit(6);
} else {
printf("eos succesfully received, all ok with %lu\n", pthread_self());
}
gst_element_set_state(GST_ELEMENT(pipeline), GST_STATE_NULL);
sleep(1);
gst_message_unref(message);
gst_object_unref(bus);
gst_object_unref(pipeline);
return NULL;
}
int main()
{
gst_init(0, 0);
const size_t thrs = 30;
const size_t timeout = 2;
pthread_t thr[thrs];
// make 5 threads 5 times
size_t t = 0;
while(1) {
printf("Try #%zu\n", t);
for(size_t i = 0; i < thrs; ++i) {
pthread_create(&thr[i], NULL, start_new_pipeline, NULL);
}
printf("Some work %zu sec\n", timeout);
sleep(timeout);
printf("Signaling threads to raise eos\n");
pthread_mutex_lock(&mutex);
pthread_cond_broadcast(&cond);
pthread_mutex_unlock(&mutex);
printf("Awaiting threads done eos\n");
int * tret;
for(size_t j = 0; j < thrs; ++j) {
pthread_join(thr[j], (void **)&tret);
}
memset(thr, 0, sizeof(pthread_t) * thrs);
++t;
}
printf("This time problem not occured, try one more time\n");
}
very rarely, this code hangs in sending eos to the pipeline.