dbus-pollable-set-epoll.c 9.42 KB
Newer Older
1
/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */
2
/* dbus-pollable-set-epoll.c - a pollable set implemented via Linux epoll(4)
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Copyright © 2011 Nokia Corporation
 *
 * Licensed under the Academic Free License version 2.1
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA  02110-1301  USA
 *
 */

#include <config.h>
26
#include "dbus-pollable-set.h"
27 28 29 30 31 32 33 34 35

#include <dbus/dbus-internals.h>
#include <dbus/dbus-sysdeps.h>

#ifndef __linux__
# error This file is for Linux epoll(4)
#endif

#include <errno.h>
36
#include <fcntl.h>
37 38 39 40 41 42
#include <sys/epoll.h>
#include <unistd.h>

#ifndef DOXYGEN_SHOULD_SKIP_THIS

typedef struct {
43
    DBusPollableSet parent;
44
    int epfd;
45
} DBusPollableSetEpoll;
46

47 48
static inline DBusPollableSetEpoll *
socket_set_epoll_cast (DBusPollableSet *set)
49
{
50 51
  _dbus_assert (set->cls == &_dbus_pollable_set_epoll_class);
  return (DBusPollableSetEpoll *) set;
52 53 54 55
}

/* this is safe to call on a partially-allocated socket set */
static void
56
socket_set_epoll_free (DBusPollableSet *set)
57
{
58
  DBusPollableSetEpoll *self = socket_set_epoll_cast (set);
59 60 61 62 63 64 65 66 67 68

  if (self == NULL)
    return;

  if (self->epfd != -1)
    close (self->epfd);

  dbus_free (self);
}

69 70
DBusPollableSet *
_dbus_pollable_set_epoll_new (void)
71
{
72
  DBusPollableSetEpoll *self;
73

74
  self = dbus_new0 (DBusPollableSetEpoll, 1);
75 76 77 78

  if (self == NULL)
    return NULL;

79
  self->parent.cls = &_dbus_pollable_set_epoll_class;
80 81 82

  self->epfd = epoll_create1 (EPOLL_CLOEXEC);

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
  if (self->epfd == -1)
    {
      int flags;

      /* the size hint is ignored unless you have a rather old kernel,
       * but must be positive on some versions, so just pick something
       * arbitrary; it's a hint, not a limit */
      self->epfd = epoll_create (42);

      flags = fcntl (self->epfd, F_GETFD, 0);

      if (flags != -1)
        fcntl (self->epfd, F_SETFD, flags | FD_CLOEXEC);
    }

98 99
  if (self->epfd == -1)
    {
100
      socket_set_epoll_free ((DBusPollableSet *) self);
101 102 103
      return NULL;
    }

104
  return (DBusPollableSet *) self;
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
}

static uint32_t
watch_flags_to_epoll_events (unsigned int flags)
{
  uint32_t events = 0;

  if (flags & DBUS_WATCH_READABLE)
    events |= EPOLLIN;
  if (flags & DBUS_WATCH_WRITABLE)
    events |= EPOLLOUT;

  return events;
}

static unsigned int
epoll_events_to_watch_flags (uint32_t events)
{
  short flags = 0;

  if (events & EPOLLIN)
    flags |= DBUS_WATCH_READABLE;
  if (events & EPOLLOUT)
    flags |= DBUS_WATCH_WRITABLE;
  if (events & EPOLLHUP)
    flags |= DBUS_WATCH_HANGUP;
  if (events & EPOLLERR)
    flags |= DBUS_WATCH_ERROR;

  return flags;
}

static dbus_bool_t
138 139 140 141
socket_set_epoll_add (DBusPollableSet  *set,
                      DBusPollable      fd,
                      unsigned int      flags,
                      dbus_bool_t       enabled)
142
{
143
  DBusPollableSetEpoll *self = socket_set_epoll_cast (set);
144 145 146
  struct epoll_event event;
  int err;

147
  _DBUS_ZERO (event);
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
  event.data.fd = fd;

  if (enabled)
    {
      event.events = watch_flags_to_epoll_events (flags);
    }
  else
    {
      /* We need to add *something* to reserve space in the kernel's data
       * structures: see socket_set_epoll_disable for more details */
      event.events = EPOLLET;
    }

  if (epoll_ctl (self->epfd, EPOLL_CTL_ADD, fd, &event) == 0)
    return TRUE;

  /* Anything except ENOMEM, ENOSPC means we have an internal error. */
  err = errno;
  switch (err)
    {
      case ENOMEM:
      case ENOSPC:
        /* be silent: this is basically OOM, which our callers are expected
         * to cope with */
        break;

      case EBADF:
175
        _dbus_warn ("Bad fd %d", fd);
176 177 178
        break;

      case EEXIST:
179
        _dbus_warn ("fd %d added and then added again", fd);
180 181 182
        break;

      default:
183
        _dbus_warn ("Misc error when trying to watch fd %d: %s", fd,
184 185 186 187 188 189 190 191
                    strerror (err));
        break;
    }

  return FALSE;
}

static void
192 193 194
socket_set_epoll_enable (DBusPollableSet  *set,
                         DBusPollable      fd,
                         unsigned int      flags)
195
{
196
  DBusPollableSetEpoll *self = socket_set_epoll_cast (set);
197 198 199
  struct epoll_event event;
  int err;

200
  _DBUS_ZERO (event);
201 202 203 204 205 206 207 208 209 210 211 212 213
  event.data.fd = fd;
  event.events = watch_flags_to_epoll_events (flags);

  if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0)
    return;

  err = errno;

  /* Enabling a file descriptor isn't allowed to fail, even for OOM, so we
   * do our best to avoid all of these. */
  switch (err)
    {
      case EBADF:
214
        _dbus_warn ("Bad fd %d", fd);
215 216 217
        break;

      case ENOENT:
218
        _dbus_warn ("fd %d enabled before it was added", fd);
219 220 221
        break;

      case ENOMEM:
222
        _dbus_warn ("Insufficient memory to change watch for fd %d", fd);
223 224 225
        break;

      default:
226
        _dbus_warn ("Misc error when trying to watch fd %d: %s", fd,
227 228 229 230 231 232
                    strerror (err));
        break;
    }
}

static void
233 234
socket_set_epoll_disable (DBusPollableSet  *set,
                          DBusPollable      fd)
235
{
236
  DBusPollableSetEpoll *self = socket_set_epoll_cast (set);
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
  struct epoll_event event;
  int err;

  /* The naive thing to do would be EPOLL_CTL_DEL, but that'll probably
   * free resources in the kernel. When we come to do socket_set_epoll_enable,
   * there might not be enough resources to bring it back!
   *
   * The next idea you might have is to set the flags to 0. However, events
   * always trigger on EPOLLERR and EPOLLHUP, even if libdbus isn't actually
   * delivering them to a DBusWatch. Because epoll is level-triggered by
   * default, we'll busy-loop on an unhandled error or hangup; not good.
   *
   * So, let's set it to be edge-triggered: then the worst case is that
   * we return from poll immediately on one iteration, ignore it because no
   * watch is enabled, then go back to normal. When we re-enable a watch
   * we'll switch back to level-triggered and be notified again (verified to
   * work on 2.6.32). Compile this file with -DTEST_BEHAVIOUR_OF_EPOLLET for
   * test code.
   */
256
  _DBUS_ZERO (event);
257 258 259 260 261 262 263
  event.data.fd = fd;
  event.events = EPOLLET;

  if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0)
    return;

  err = errno;
264
  _dbus_warn ("Error when trying to watch fd %d: %s", fd,
265 266 267 268
              strerror (err));
}

static void
269 270
socket_set_epoll_remove (DBusPollableSet  *set,
                         DBusPollable      fd)
271
{
272
  DBusPollableSetEpoll *self = socket_set_epoll_cast (set);
273
  int err;
274 275
  /* Kernels < 2.6.9 require a non-NULL struct pointer, even though its
   * contents are ignored */
276 277
  struct epoll_event dummy;
  _DBUS_ZERO (dummy);
278

279
  if (epoll_ctl (self->epfd, EPOLL_CTL_DEL, fd, &dummy) == 0)
280 281 282
    return;

  err = errno;
283
  _dbus_warn ("Error when trying to remove fd %d: %s", fd, strerror (err));
284 285 286 287 288 289 290 291
}

/* Optimally, this should be the same as in DBusLoop: we use it to translate
 * between struct epoll_event and DBusSocketEvent without allocating heap
 * memory. */
#define N_STACK_DESCRIPTORS 64

static int
292 293 294 295
socket_set_epoll_poll (DBusPollableSet   *set,
                       DBusPollableEvent *revents,
                       int                max_events,
                       int                timeout_ms)
296
{
297
  DBusPollableSetEpoll *self = socket_set_epoll_cast (set);
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
  struct epoll_event events[N_STACK_DESCRIPTORS];
  int n_ready;
  int i;

  _dbus_assert (max_events > 0);

  n_ready = epoll_wait (self->epfd, events,
                        MIN (_DBUS_N_ELEMENTS (events), max_events),
                        timeout_ms);

  if (n_ready <= 0)
    return n_ready;

  for (i = 0; i < n_ready; i++)
    {
      revents[i].fd = events[i].data.fd;
      revents[i].flags = epoll_events_to_watch_flags (events[i].events);
    }

  return n_ready;
}

320
DBusPollableSetClass _dbus_pollable_set_epoll_class = {
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
    socket_set_epoll_free,
    socket_set_epoll_add,
    socket_set_epoll_remove,
    socket_set_epoll_enable,
    socket_set_epoll_disable,
    socket_set_epoll_poll
};

#ifdef TEST_BEHAVIOUR_OF_EPOLLET
/* usage: cat /dev/null | ./epoll
 *
 * desired output:
 * ctl ADD: 0
 * wait for HUP, edge-triggered: 1
 * wait for HUP again: 0
 * ctl MOD: 0
 * wait for HUP: 1
 */

#include <sys/epoll.h>

#include <stdio.h>

int
main (void)
{
  struct epoll_event input;
  struct epoll_event output;
  int epfd = epoll_create1 (EPOLL_CLOEXEC);
  int fd = 0; /* stdin */
  int ret;

353 354
  _DBUS_ZERO (input);

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
  input.events = EPOLLHUP | EPOLLET;
  ret = epoll_ctl (epfd, EPOLL_CTL_ADD, fd, &input);
  printf ("ctl ADD: %d\n", ret);

  ret = epoll_wait (epfd, &output, 1, -1);
  printf ("wait for HUP, edge-triggered: %d\n", ret);

  ret = epoll_wait (epfd, &output, 1, 1);
  printf ("wait for HUP again: %d\n", ret);

  input.events = EPOLLHUP;
  ret = epoll_ctl (epfd, EPOLL_CTL_MOD, fd, &input);
  printf ("ctl MOD: %d\n", ret);

  ret = epoll_wait (epfd, &output, 1, -1);
  printf ("wait for HUP: %d\n", ret);

  return 0;
}

#endif /* TEST_BEHAVIOUR_OF_EPOLLET */

#endif /* !DOXYGEN_SHOULD_SKIP_THIS */