NetworkManager coredump connect_ready
Hello, We are using NetworkManager 1.12.4 in production, and we've had lots of coredumps with faulty 3G dongles. It seems that when NetworkManager tries to connect in 3G but the dongle disappears before the the connect_ready callback is called, NetworkManager coredumps. It seems to be because the self variable is not protected when calling some asynchronous functions (connect_ready being one of them).
Backtrace :
(gdb) bt
#0 0x00007f6fd8f8cba6 in connect_ready (simple_iface=<optimized out>, res=<optimized out>, self=0x55e637aa6340)
at /usr/src/debug/networkmanager/1.12.4-r1/NetworkManager-1.12.4/src/devices/wwan/nm-modem-broadband.c:386
#1 0x00007f6fdbc618e3 in g_task_return_now (task=0x55e637aa2d40) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1121
#2 0x00007f6fdbc61f96 in g_task_return (task=0x55e637aa2d40, type=<optimized out>) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1179
#3 0x00007f6fd8eeefe5 in simple_connect_ready (self=0x55e637a804b0, res=0x55e637adf460, task=0x55e637aa2d40) at /usr/src/debug/modemmanager/1.8.0-r1/ModemManager-1.8.0/libmm-glib/mm-modem-simple.c:138
#4 0x00007f6fdbc618e3 in g_task_return_now (task=0x55e637adf460) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1121
#5 0x00007f6fdbc61f96 in g_task_return (task=0x55e637adf460, type=<optimized out>) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1179
#6 0x00007f6fdbc9ff49 in reply_cb (connection=<optimized out>, res=<optimized out>, user_data=0x55e637adf460) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gdbusproxy.c:2573
#7 0x00007f6fdbc618e3 in g_task_return_now (task=0x55e637a95920) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1121
#8 0x00007f6fdbc61f96 in g_task_return (task=0x55e637a95920, type=<optimized out>) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1179
#9 0x00007f6fdbc949ef in g_dbus_connection_call_done (source=<optimized out>, result=0x55e637adf120, user_data=0x55e637a95920) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gdbusconnection.c:5706
#10 0x00007f6fdbc618e3 in g_task_return_now (task=0x55e637adf120) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1121
#11 0x00007f6fdbc61919 in complete_in_idle_cb (task=0x55e637adf120) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/gio/gtask.c:1135
#12 0x00007f6fdbab84fa in g_main_dispatch (context=0x55e637a6de60) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/glib/gmain.c:3203
#13 g_main_context_dispatch (context=context@entry=0x55e637a6de60) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/glib/gmain.c:3856
#14 0x00007f6fdbab88b0 in g_main_context_iterate (context=0x55e637a6de60, block=block@entry=1, dispatch=dispatch@entry=1, self=<optimized out>)
at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/glib/gmain.c:3929
#15 0x00007f6fdbab8bd2 in g_main_loop_run (loop=0x55e637a4fe70) at /usr/src/debug/glib-2.0/1_2.50.3-r0/glib-2.50.3/glib/gmain.c:4125
#16 0x000055e6373d1646 in main (argc=<optimized out>, argv=<optimized out>) at /usr/src/debug/networkmanager/1.12.4-r1/NetworkManager-1.12.4/src/main.c:438
Here is an extract of the sources, so you can easily find the lines matching the coredump :
354 static void
355 connect_ready (MMModemSimple *simple_iface,
356 GAsyncResult *res,
357 NMModemBroadband *self)
358 {
359 ConnectContext *ctx = self->_priv.ctx;
360 GError *error = NULL;
361 NMModemIPMethod ip4_method = NM_MODEM_IP_METHOD_UNKNOWN;
362 NMModemIPMethod ip6_method = NM_MODEM_IP_METHOD_UNKNOWN;
363
364 self->_priv.bearer = mm_modem_simple_connect_finish (simple_iface, res, &error);
365
366 if (!ctx)
367 return;
368
369 if (!self->_priv.bearer) {
370 if (g_error_matches (error, MM_MOBILE_EQUIPMENT_ERROR, MM_MOBILE_EQUIPMENT_ERROR_SIM_PIN) ||
371 (g_error_matches (error, MM_CORE_ERROR, MM_CORE_ERROR_UNAUTHORIZED) &&
372 mm_modem_get_unlock_required (self->_priv.modem_iface) == MM_MODEM_LOCK_SIM_PIN)) {
373 g_error_free (error);
374
375 /* Request PIN */
376 ask_for_pin (self);
377 connect_context_clear (self);
378 return;
379 }
380
381 /* Save the error, if it's the first one */
382 if (!ctx->first_error) {
383 /* Strip remote error info before saving it */
384 if (g_dbus_error_is_remote_error (error))
385 g_dbus_error_strip_remote_error (error);
386 ctx->first_error = error;
387 } else
388 g_clear_error (&error);
389
390 if ( ctx->ip_type_tries == 0
391 && g_error_matches (error, MM_CORE_ERROR, MM_CORE_ERROR_RETRY)) {
392 /* Try one more time */
393 ctx->ip_type_tries++;
394 } else {
395 /* If the modem/provider lies and the IP type we tried isn't supported,
396 * retry with the next one, if any.
397 */
398 ctx->ip_types_i++;
399 ctx->ip_type_tries = 0;
400 }
401 connect_context_step (self);
402 return;
403 }
404
405 /* Grab IP configurations */
406 self->_priv.ipv4_config = mm_bearer_get_ipv4_config (self->_priv.bearer);
407 if (self->_priv.ipv4_config)
408 ip4_method = get_bearer_ip_method (self->_priv.ipv4_config);
409
410 self->_priv.ipv6_config = mm_bearer_get_ipv6_config (self->_priv.bearer);
411 if (self->_priv.ipv6_config)
412 ip6_method = get_bearer_ip_method (self->_priv.ipv6_config);
413
414 if (!nm_modem_set_data_port (NM_MODEM (self),
415 NM_PLATFORM_GET,
416 mm_bearer_get_interface (self->_priv.bearer),
417 ip4_method,
418 ip6_method,
419 mm_bearer_get_ip_timeout (self->_priv.bearer),
420 &error)) {
421 _LOGW ("failed to connect modem: %s", error->message);
422 g_error_free (error);
423 nm_modem_emit_prepare_result (NM_MODEM (self), FALSE, NM_DEVICE_STATE_REASON_CONFIG_FAILED);
424 connect_context_clear (self);
425 return;
426 }
427
428 ctx->step++;
429 connect_context_step (self);
430 }
[...]
459 static void
460 connect_context_step (NMModemBroadband *self)
461 {
462 ConnectContext *ctx = self->_priv.ctx;
463
464 switch (ctx->step) {
465 case CONNECT_STEP_FIRST:
466 ctx->step++;
467 /* fall through */
468
469 case CONNECT_STEP_WAIT_FOR_SIM:
470 if (MODEM_CAPS_3GPP (ctx->caps) && !self->_priv.sim_iface) {
471 /* Have to wait for the SIM to show up */
472 break;
473 }
474 ctx->step++;
475 /* fall through */
476
477 case CONNECT_STEP_UNLOCK:
478 if ( MODEM_CAPS_3GPP (ctx->caps)
479 && mm_modem_get_unlock_required (self->_priv.modem_iface) == MM_MODEM_LOCK_SIM_PIN) {
480 NMSettingGsm *s_gsm = nm_connection_get_setting_gsm (ctx->connection);
481 const char *pin = nm_setting_gsm_get_pin (s_gsm);
482
483 /* If we have a PIN already, send it. If we don't, get it. */
484 if (pin) {
485 mm_sim_send_pin (self->_priv.sim_iface,
486 pin,
487 ctx->cancellable,
488 (GAsyncReadyCallback) send_pin_ready,
489 self);
490 } else {
491 ask_for_pin (self);
492 }
493 break;
494 }
495 ctx->step++;
496 /* fall through */
497 case CONNECT_STEP_WAIT_FOR_READY: {
498 GError *error = NULL;
499
500 if (mm_modem_get_state (self->_priv.modem_iface) <= MM_MODEM_STATE_LOCKED)
501 break;
502
503 /* Create core connect properties based on the modem capabilities */
504 g_assert (!ctx->connect_properties);
505
506 if (MODEM_CAPS_3GPP (ctx->caps))
507 ctx->connect_properties = create_gsm_connect_properties (ctx->connection);
508 else if (MODEM_CAPS_3GPP2 (ctx->caps))
509 ctx->connect_properties = create_cdma_connect_properties (ctx->connection);
510 else {
511 _LOGW ("failed to connect '%s': not a mobile broadband modem",
512 nm_connection_get_id (ctx->connection));
513
514 nm_modem_emit_prepare_result (NM_MODEM (self), FALSE, NM_DEVICE_STATE_REASON_MODEM_INIT_FAILED);
515 connect_context_clear (self);
516 break;
517 }
518 g_assert (ctx->connect_properties);
519
520 /* Build up list of IP types that we need to use in the retries */
521 ctx->ip_types = nm_modem_get_connection_ip_type (NM_MODEM (self), ctx->connection, &error);
522 if (!ctx->ip_types) {
523 _LOGW ("failed to connect '%s': %s",
524 nm_connection_get_id (ctx->connection),
525 error->message);
526 g_clear_error (&error);
527
528 nm_modem_emit_prepare_result (NM_MODEM (self), FALSE, NM_DEVICE_STATE_REASON_MODEM_INIT_FAILED);
529 connect_context_clear (self);
530 break;
531 }
532
533 ctx->step++;
534 }
535 /* fall through */
536 case CONNECT_STEP_CONNECT:
537 if (ctx->ip_types_i < ctx->ip_types->len) {
538 NMModemIPType current;
539
540 current = g_array_index (ctx->ip_types, NMModemIPType, ctx->ip_types_i);
541
542 if (current == NM_MODEM_IP_TYPE_IPV4)
543 mm_simple_connect_properties_set_ip_type (ctx->connect_properties, MM_BEARER_IP_FAMILY_IPV4);
544 else if (current == NM_MODEM_IP_TYPE_IPV6)
545 mm_simple_connect_properties_set_ip_type (ctx->connect_properties, MM_BEARER_IP_FAMILY_IPV6);
546 else if (current == NM_MODEM_IP_TYPE_IPV4V6)
547 mm_simple_connect_properties_set_ip_type (ctx->connect_properties, MM_BEARER_IP_FAMILY_IPV4V6);
548 else
549 g_assert_not_reached ();
550
551 _LOGD ("launching connection with ip type '%s' (try %d)",
552 nm_modem_ip_type_to_string (current),
553 ctx->ip_type_tries + 1);
554
555 mm_modem_simple_connect (self->_priv.simple_iface,
556 ctx->connect_properties,
557 NULL,
558 (GAsyncReadyCallback) connect_ready,
559 self);
560 break;
561 }
562
563 ctx->step++;
564 /* fall through */
565
566 case CONNECT_STEP_LAST:
567 if (self->_priv.ipv4_config || self->_priv.ipv6_config)
568 nm_modem_emit_prepare_result (NM_MODEM (self), TRUE, NM_DEVICE_STATE_REASON_NONE);
569 else {
570 /* If we have a saved error from a previous attempt, use it */
571 if (!ctx->first_error)
572 ctx->first_error = g_error_new_literal (NM_DEVICE_ERROR,
573 NM_DEVICE_ERROR_INVALID_CONNECTION,
574 "invalid bearer IP configuration");
575
576 _LOGW ("failed to connect modem: %s",
577 ctx->first_error->message);
578 nm_modem_emit_prepare_result (NM_MODEM (self), FALSE, translate_mm_error (self, ctx->first_error));
579 }
580
581 connect_context_clear (self);
582 break;
583 }
584 }
Here are also some logs.txt.
I will open a pull request where I protect the self variable. I am not sure this fixes the issue because we couldn't reproduce the coredumps on our development devices and we don't want to test in production.