Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : main select loop and event handling - epoll implementation
5 :
6 : Copyright (C) Andrew Tridgell 2003-2005
7 : Copyright (C) Stefan Metzmacher 2005-2013
8 : Copyright (C) Jeremy Allison 2013
9 :
10 : ** NOTE! The following LGPL license applies to the tevent
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 :
28 : #include "replace.h"
29 : #include "system/filesys.h"
30 : #include "system/select.h"
31 : #include "tevent.h"
32 : #include "tevent_internal.h"
33 : #include "tevent_util.h"
34 :
35 : struct epoll_event_context {
36 : /* a pointer back to the generic event_context */
37 : struct tevent_context *ev;
38 :
39 : /* when using epoll this is the handle from epoll_create */
40 : int epoll_fd;
41 :
42 : pid_t pid;
43 :
44 : bool panic_force_replay;
45 : bool *panic_state;
46 : bool (*panic_fallback)(struct tevent_context *ev, bool replay);
47 : };
48 :
49 : #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 : #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
51 : #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
52 : #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
53 :
54 : #ifdef TEST_PANIC_FALLBACK
55 :
56 : static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
57 : int size)
58 : {
59 : if (epoll_ev->panic_fallback == NULL) {
60 : return epoll_create(size);
61 : }
62 :
63 : /* 50% of the time, fail... */
64 : if ((random() % 2) == 0) {
65 : errno = EINVAL;
66 : return -1;
67 : }
68 :
69 : return epoll_create(size);
70 : }
71 :
72 : static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
73 : int epfd, int op, int fd,
74 : struct epoll_event *event)
75 : {
76 : if (epoll_ev->panic_fallback == NULL) {
77 : return epoll_ctl(epfd, op, fd, event);
78 : }
79 :
80 : /* 50% of the time, fail... */
81 : if ((random() % 2) == 0) {
82 : errno = EINVAL;
83 : return -1;
84 : }
85 :
86 : return epoll_ctl(epfd, op, fd, event);
87 : }
88 :
89 : static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
90 : int epfd,
91 : struct epoll_event *events,
92 : int maxevents,
93 : int timeout)
94 : {
95 : if (epoll_ev->panic_fallback == NULL) {
96 : return epoll_wait(epfd, events, maxevents, timeout);
97 : }
98 :
99 : /* 50% of the time, fail... */
100 : if ((random() % 2) == 0) {
101 : errno = EINVAL;
102 : return -1;
103 : }
104 :
105 : return epoll_wait(epfd, events, maxevents, timeout);
106 : }
107 :
108 : #define epoll_create(_size) \
109 : epoll_create_panic_fallback(epoll_ev, _size)
110 : #define epoll_ctl(_epfd, _op, _fd, _event) \
111 : epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
112 : #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
113 : epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
114 : #endif
115 :
116 : /*
117 : called to set the panic fallback function.
118 : */
119 54585301 : _PRIVATE_ void tevent_epoll_set_panic_fallback(struct tevent_context *ev,
120 : bool (*panic_fallback)(struct tevent_context *ev,
121 : bool replay))
122 : {
123 47888253 : struct epoll_event_context *epoll_ev =
124 54585301 : talloc_get_type_abort(ev->additional_data,
125 : struct epoll_event_context);
126 :
127 54585301 : epoll_ev->panic_fallback = panic_fallback;
128 54585301 : }
129 :
130 : /*
131 : called when a epoll call fails
132 : */
133 5 : static void epoll_panic(struct epoll_event_context *epoll_ev,
134 : const char *reason, bool replay)
135 : {
136 5 : struct tevent_context *ev = epoll_ev->ev;
137 : bool (*panic_fallback)(struct tevent_context *ev, bool replay);
138 :
139 5 : panic_fallback = epoll_ev->panic_fallback;
140 :
141 5 : if (epoll_ev->panic_state != NULL) {
142 0 : *epoll_ev->panic_state = true;
143 : }
144 :
145 5 : if (epoll_ev->panic_force_replay) {
146 0 : replay = true;
147 : }
148 :
149 5 : TALLOC_FREE(ev->additional_data);
150 :
151 5 : if (panic_fallback == NULL) {
152 0 : tevent_debug(ev, TEVENT_DEBUG_FATAL,
153 : "%s (%s) replay[%u] - calling abort()\n",
154 0 : reason, strerror(errno), (unsigned)replay);
155 0 : abort();
156 : }
157 :
158 10 : tevent_debug(ev, TEVENT_DEBUG_ERROR,
159 : "%s (%s) replay[%u] - calling panic_fallback\n",
160 5 : reason, strerror(errno), (unsigned)replay);
161 :
162 5 : if (!panic_fallback(ev, replay)) {
163 : /* Fallback failed. */
164 0 : tevent_debug(ev, TEVENT_DEBUG_FATAL,
165 : "%s (%s) replay[%u] - calling abort()\n",
166 0 : reason, strerror(errno), (unsigned)replay);
167 0 : abort();
168 : }
169 5 : }
170 :
171 : /*
172 : map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
173 : */
174 13347781 : static uint32_t epoll_map_flags(uint16_t flags)
175 : {
176 13347781 : uint32_t ret = 0;
177 13347781 : if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
178 13347781 : if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
179 13347781 : return ret;
180 : }
181 :
182 : /*
183 : free the epoll fd
184 : */
185 54575761 : static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
186 : {
187 54575761 : close(epoll_ev->epoll_fd);
188 54575761 : epoll_ev->epoll_fd = -1;
189 54575761 : return 0;
190 : }
191 :
192 : /*
193 : init the epoll fd
194 : */
195 54585301 : static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
196 : {
197 54585301 : epoll_ev->epoll_fd = epoll_create(64);
198 54585301 : if (epoll_ev->epoll_fd == -1) {
199 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
200 : "Failed to create epoll handle.\n");
201 0 : return -1;
202 : }
203 :
204 54585301 : if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
205 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
206 : "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
207 : }
208 :
209 54585301 : epoll_ev->pid = tevent_cached_getpid();
210 54585301 : talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
211 :
212 54585301 : return 0;
213 : }
214 :
215 : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
216 :
217 : /*
218 : reopen the epoll handle when our pid changes
219 : see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
220 : demonstration of why this is needed
221 : */
222 69392238 : static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
223 : {
224 : struct tevent_fd *fde;
225 69392238 : bool *caller_panic_state = epoll_ev->panic_state;
226 69392238 : bool panic_triggered = false;
227 69392238 : pid_t pid = tevent_cached_getpid();
228 :
229 69392238 : if (epoll_ev->pid == pid) {
230 124448057 : return;
231 : }
232 :
233 34093 : close(epoll_ev->epoll_fd);
234 34093 : epoll_ev->epoll_fd = epoll_create(64);
235 34093 : if (epoll_ev->epoll_fd == -1) {
236 0 : epoll_panic(epoll_ev, "epoll_create() failed", false);
237 0 : return;
238 : }
239 :
240 34093 : if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
241 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
242 : "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
243 : }
244 :
245 34093 : epoll_ev->pid = pid;
246 34093 : epoll_ev->panic_state = &panic_triggered;
247 362538 : for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
248 328445 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
249 328445 : epoll_update_event(epoll_ev, fde);
250 :
251 328445 : if (panic_triggered) {
252 0 : if (caller_panic_state != NULL) {
253 0 : *caller_panic_state = true;
254 : }
255 0 : return;
256 : }
257 : }
258 34093 : epoll_ev->panic_state = NULL;
259 : }
260 :
261 : /*
262 : epoll cannot add the same file descriptor twice, once
263 : with read, once with write which is allowed by the
264 : tevent backend. Multiplex the existing fde, flag it
265 : as such so we can search for the correct fde on
266 : event triggering.
267 : */
268 :
269 15450 : static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
270 : struct tevent_fd *add_fde)
271 : {
272 : struct epoll_event event;
273 : struct tevent_fd *mpx_fde;
274 : int ret;
275 :
276 : /* Find the existing fde that caused the EEXIST error. */
277 30911 : for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
278 30911 : if (mpx_fde->fd != add_fde->fd) {
279 11 : continue;
280 : }
281 :
282 30900 : if (mpx_fde == add_fde) {
283 15450 : continue;
284 : }
285 :
286 15450 : break;
287 : }
288 15450 : if (mpx_fde == NULL) {
289 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
290 : "can't find multiplex fde for fd[%d]",
291 : add_fde->fd);
292 0 : return -1;
293 : }
294 :
295 15450 : if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
296 : /* Logic error. Can't have more than 2 multiplexed fde's. */
297 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
298 : "multiplex fde for fd[%d] is already multiplexed\n",
299 : mpx_fde->fd);
300 0 : return -1;
301 : }
302 :
303 : /*
304 : * The multiplex fde must have the same fd, and also
305 : * already have an epoll event attached.
306 : */
307 15450 : if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
308 : /* Logic error. Can't have more than 2 multiplexed fde's. */
309 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
310 : "multiplex fde for fd[%d] has no event\n",
311 : mpx_fde->fd);
312 0 : return -1;
313 : }
314 :
315 : /* Modify the mpx_fde to add in the new flags. */
316 15450 : ZERO_STRUCT(event);
317 15450 : event.events = epoll_map_flags(mpx_fde->flags);
318 15450 : event.events |= epoll_map_flags(add_fde->flags);
319 15450 : event.data.ptr = mpx_fde;
320 15450 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
321 15450 : if (ret != 0 && errno == EBADF) {
322 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
323 : "EPOLL_CTL_MOD EBADF for "
324 : "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
325 : add_fde, mpx_fde, add_fde->fd);
326 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
327 0 : mpx_fde->wrapper = NULL;
328 0 : mpx_fde->event_ctx = NULL;
329 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
330 0 : add_fde->wrapper = NULL;
331 0 : add_fde->event_ctx = NULL;
332 0 : return 0;
333 15450 : } else if (ret != 0) {
334 0 : return ret;
335 : }
336 :
337 : /*
338 : * Make each fde->additional_data pointers point at each other
339 : * so we can look them up from each other. They are now paired.
340 : */
341 15450 : mpx_fde->additional_data = (struct tevent_fd *)add_fde;
342 15450 : add_fde->additional_data = (struct tevent_fd *)mpx_fde;
343 :
344 : /* Now flag both fde's as being multiplexed. */
345 15450 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
346 15450 : add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
347 :
348 : /* we need to keep the GOT_ERROR flag */
349 15450 : if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
350 0 : add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
351 : }
352 :
353 15450 : return 0;
354 : }
355 :
356 : /*
357 : add the epoll event to the given fd_event
358 : */
359 9209637 : static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
360 : {
361 : struct epoll_event event;
362 : int ret;
363 9209637 : struct tevent_fd *mpx_fde = NULL;
364 :
365 9209637 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
366 9209637 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
367 :
368 9209637 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
369 : /*
370 : * This is a multiplexed fde, we need to include both
371 : * flags in the modified event.
372 : */
373 0 : mpx_fde = talloc_get_type_abort(fde->additional_data,
374 : struct tevent_fd);
375 :
376 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
377 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
378 : }
379 :
380 9209637 : ZERO_STRUCT(event);
381 9209637 : event.events = epoll_map_flags(fde->flags);
382 9209637 : if (mpx_fde != NULL) {
383 0 : event.events |= epoll_map_flags(mpx_fde->flags);
384 : }
385 9209637 : event.data.ptr = fde;
386 9209637 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
387 9209637 : if (ret != 0 && errno == EBADF) {
388 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
389 : "EPOLL_CTL_ADD EBADF for "
390 : "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
391 : fde, mpx_fde, fde->fd);
392 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
393 0 : fde->wrapper = NULL;
394 0 : fde->event_ctx = NULL;
395 0 : if (mpx_fde != NULL) {
396 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
397 0 : mpx_fde->wrapper = NULL;
398 0 : mpx_fde->event_ctx = NULL;
399 : }
400 9209637 : return;
401 9209637 : } else if (ret != 0 && errno == EEXIST && mpx_fde == NULL) {
402 15450 : ret = epoll_add_multiplex_fd(epoll_ev, fde);
403 26796 : if (ret != 0) {
404 0 : epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
405 : false);
406 0 : return;
407 : }
408 9194187 : } else if (ret != 0) {
409 5 : epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
410 5 : return;
411 : }
412 :
413 9209632 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
414 : /* only if we want to read we want to tell the event handler about errors */
415 9209632 : if (fde->flags & TEVENT_FD_READ) {
416 9202130 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
417 : }
418 :
419 9209632 : if (mpx_fde == NULL) {
420 9209632 : return;
421 : }
422 :
423 0 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
424 : /* only if we want to read we want to tell the event handler about errors */
425 0 : if (mpx_fde->flags & TEVENT_FD_READ) {
426 0 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
427 : }
428 : }
429 :
430 : /*
431 : delete the epoll event for given fd_event
432 : */
433 8520069 : static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
434 : {
435 : struct epoll_event event;
436 : int ret;
437 8520069 : struct tevent_fd *mpx_fde = NULL;
438 :
439 8520069 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
440 8520069 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
441 :
442 8520069 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
443 : /*
444 : * This is a multiplexed fde, we need to modify both events.
445 : */
446 0 : mpx_fde = talloc_get_type_abort(fde->additional_data,
447 : struct tevent_fd);
448 :
449 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
450 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
451 : }
452 :
453 8520069 : ZERO_STRUCT(event);
454 8520069 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
455 8520069 : if (ret != 0 && errno == ENOENT) {
456 : /*
457 : * This can happen after a epoll_check_reopen
458 : * within epoll_event_fd_destructor.
459 : */
460 32258 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
461 : "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
462 : fde->fd);
463 32258 : return;
464 8487811 : } else if (ret != 0 && errno == EBADF) {
465 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
466 : "EPOLL_CTL_DEL EBADF for "
467 : "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
468 : fde, mpx_fde, fde->fd);
469 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
470 0 : fde->wrapper = NULL;
471 0 : fde->event_ctx = NULL;
472 0 : if (mpx_fde != NULL) {
473 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
474 0 : mpx_fde->wrapper = NULL;
475 0 : mpx_fde->event_ctx = NULL;
476 : }
477 0 : return;
478 8487811 : } else if (ret != 0) {
479 0 : epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
480 0 : return;
481 : }
482 : }
483 :
484 : /*
485 : change the epoll event to the given fd_event
486 : */
487 4107218 : static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
488 : {
489 4107218 : struct tevent_fd *mpx_fde = NULL;
490 : struct epoll_event event;
491 : int ret;
492 :
493 4107218 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
494 4107218 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
495 :
496 4107218 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
497 : /*
498 : * This is a multiplexed fde, we need to include both
499 : * flags in the modified event.
500 : */
501 26 : mpx_fde = talloc_get_type_abort(fde->additional_data,
502 : struct tevent_fd);
503 :
504 26 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
505 26 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
506 : }
507 :
508 4107218 : ZERO_STRUCT(event);
509 4107218 : event.events = epoll_map_flags(fde->flags);
510 4107218 : if (mpx_fde != NULL) {
511 26 : event.events |= epoll_map_flags(mpx_fde->flags);
512 : }
513 4107218 : event.data.ptr = fde;
514 4107218 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
515 4107218 : if (ret != 0 && errno == EBADF) {
516 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
517 : "EPOLL_CTL_MOD EBADF for "
518 : "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
519 : fde, mpx_fde, fde->fd);
520 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
521 0 : fde->wrapper = NULL;
522 0 : fde->event_ctx = NULL;
523 0 : if (mpx_fde != NULL) {
524 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
525 0 : mpx_fde->wrapper = NULL;
526 0 : mpx_fde->event_ctx = NULL;
527 : }
528 4107192 : return;
529 4107218 : } else if (ret != 0) {
530 0 : epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
531 0 : return;
532 : }
533 :
534 4107218 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
535 : /* only if we want to read we want to tell the event handler about errors */
536 4107218 : if (fde->flags & TEVENT_FD_READ) {
537 4107161 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
538 : }
539 :
540 4107218 : if (mpx_fde == NULL) {
541 4107192 : return;
542 : }
543 :
544 26 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
545 : /* only if we want to read we want to tell the event handler about errors */
546 26 : if (mpx_fde->flags & TEVENT_FD_READ) {
547 26 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
548 : }
549 : }
550 :
551 22090452 : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
552 : {
553 22090452 : bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
554 22090452 : bool want_read = (fde->flags & TEVENT_FD_READ);
555 22090452 : bool want_write= (fde->flags & TEVENT_FD_WRITE);
556 22090452 : struct tevent_fd *mpx_fde = NULL;
557 :
558 22090452 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
559 : /*
560 : * work out what the multiplexed fde wants.
561 : */
562 26 : mpx_fde = talloc_get_type_abort(fde->additional_data,
563 : struct tevent_fd);
564 :
565 26 : if (mpx_fde->flags & TEVENT_FD_READ) {
566 26 : want_read = true;
567 : }
568 :
569 26 : if (mpx_fde->flags & TEVENT_FD_WRITE) {
570 0 : want_write = true;
571 : }
572 : }
573 :
574 : /* there's already an event */
575 22090452 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
576 12627287 : if (want_read || (want_write && !got_error)) {
577 4107218 : epoll_mod_event(epoll_ev, fde);
578 4107218 : return;
579 : }
580 : /*
581 : * if we want to match the select behavior, we need to remove the epoll_event
582 : * when the caller isn't interested in events.
583 : *
584 : * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
585 : */
586 8520069 : epoll_del_event(epoll_ev, fde);
587 8520069 : return;
588 : }
589 :
590 : /* there's no epoll_event attached to the fde */
591 9463165 : if (want_read || (want_write && !got_error)) {
592 9209637 : epoll_add_event(epoll_ev, fde);
593 9209637 : return;
594 : }
595 : }
596 :
597 : /*
598 : Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
599 : Return true if there's nothing else to do, false if
600 : this event needs further handling.
601 : */
602 13556644 : static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
603 : struct tevent_fd *fde)
604 : {
605 13556644 : if (fde == NULL) {
606 : /* Nothing to do if no event. */
607 6778322 : return true;
608 : }
609 :
610 6778322 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
611 : /*
612 : * if we only wait for TEVENT_FD_WRITE, we should not tell the
613 : * event handler about it, and remove the epoll_event,
614 : * as we only report errors when waiting for read events,
615 : * to match the select() behavior
616 : */
617 6778322 : if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
618 : /*
619 : * Do the same as the poll backend and
620 : * remove the writeable flag.
621 : */
622 0 : fde->flags &= ~TEVENT_FD_WRITE;
623 0 : return true;
624 : }
625 : /* This has TEVENT_FD_READ set, we're not finished. */
626 6778322 : return false;
627 : }
628 :
629 : /*
630 : event loop handling using epoll
631 : */
632 47645681 : static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
633 : {
634 : int ret, i;
635 : #define MAXEVENTS 1
636 : struct epoll_event events[MAXEVENTS];
637 47645681 : int timeout = -1;
638 : int wait_errno;
639 :
640 47645681 : if (tvalp) {
641 : /* it's better to trigger timed events a bit later than too early */
642 47645681 : timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
643 : }
644 :
645 79499927 : if (epoll_ev->ev->signal_events &&
646 31854246 : tevent_common_check_signal(epoll_ev->ev)) {
647 0 : return 0;
648 : }
649 :
650 47645681 : tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
651 47645681 : ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
652 47645681 : wait_errno = errno;
653 47645681 : tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
654 :
655 47645681 : if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
656 911180 : if (tevent_common_check_signal(epoll_ev->ev)) {
657 911139 : return 0;
658 : }
659 : }
660 :
661 46734501 : if (ret == -1 && wait_errno != EINTR) {
662 0 : epoll_panic(epoll_ev, "epoll_wait() failed", true);
663 0 : return -1;
664 : }
665 :
666 46734501 : if (ret == 0 && tvalp) {
667 : /* we don't care about a possible delay here */
668 363981 : tevent_common_loop_timer_delay(epoll_ev->ev);
669 363949 : return 0;
670 : }
671 :
672 46370520 : for (i=0;i<ret;i++) {
673 46370508 : struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
674 : struct tevent_fd);
675 46370508 : uint16_t flags = 0;
676 46370508 : struct tevent_fd *mpx_fde = NULL;
677 :
678 46370508 : if (fde == NULL) {
679 0 : epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
680 0 : return -1;
681 : }
682 46370508 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
683 : /*
684 : * Save off the multiplexed event in case we need
685 : * to use it to call the handler function.
686 : */
687 150866 : mpx_fde = talloc_get_type_abort(fde->additional_data,
688 : struct tevent_fd);
689 : }
690 46370508 : if (events[i].events & (EPOLLHUP|EPOLLERR)) {
691 6778322 : bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
692 6778322 : bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
693 :
694 6778322 : if (handled_fde && handled_mpx) {
695 0 : epoll_update_event(epoll_ev, fde);
696 0 : continue;
697 : }
698 :
699 6778322 : if (!handled_mpx) {
700 : /*
701 : * If the mpx event was the one that needs
702 : * further handling, it's the TEVENT_FD_READ
703 : * event so switch over and call that handler.
704 : */
705 0 : fde = mpx_fde;
706 0 : mpx_fde = NULL;
707 : }
708 6778322 : flags |= TEVENT_FD_READ;
709 : }
710 46370508 : if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
711 46370508 : if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
712 :
713 46370508 : if (flags & TEVENT_FD_WRITE) {
714 15345321 : if (fde->flags & TEVENT_FD_WRITE) {
715 15236374 : mpx_fde = NULL;
716 : }
717 15345321 : if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
718 108947 : fde = mpx_fde;
719 108947 : mpx_fde = NULL;
720 : }
721 : }
722 :
723 46370508 : if (mpx_fde) {
724 : /* Ensure we got the right fde. */
725 62 : if ((flags & fde->flags) == 0) {
726 57 : fde = mpx_fde;
727 57 : mpx_fde = NULL;
728 : }
729 : }
730 :
731 : /*
732 : * make sure we only pass the flags
733 : * the handler is expecting.
734 : */
735 46370508 : flags &= fde->flags;
736 46370508 : if (flags) {
737 46370508 : return tevent_common_invoke_fd_handler(fde, flags, NULL);
738 : }
739 : }
740 :
741 12 : return 0;
742 : }
743 :
744 : /*
745 : create a epoll_event_context structure.
746 : */
747 54585301 : static int epoll_event_context_init(struct tevent_context *ev)
748 : {
749 : int ret;
750 : struct epoll_event_context *epoll_ev;
751 :
752 : /*
753 : * We might be called during tevent_re_initialise()
754 : * which means we need to free our old additional_data.
755 : */
756 54585301 : TALLOC_FREE(ev->additional_data);
757 :
758 54585301 : epoll_ev = talloc_zero(ev, struct epoll_event_context);
759 54585301 : if (!epoll_ev) return -1;
760 54585301 : epoll_ev->ev = ev;
761 54585301 : epoll_ev->epoll_fd = -1;
762 :
763 54585301 : ret = epoll_init_ctx(epoll_ev);
764 54585301 : if (ret != 0) {
765 0 : talloc_free(epoll_ev);
766 0 : return ret;
767 : }
768 :
769 54585301 : ev->additional_data = epoll_ev;
770 54585301 : return 0;
771 : }
772 :
773 : /*
774 : destroy an fd_event
775 : */
776 11906542 : static int epoll_event_fd_destructor(struct tevent_fd *fde)
777 : {
778 11906542 : struct tevent_context *ev = fde->event_ctx;
779 11906542 : struct epoll_event_context *epoll_ev = NULL;
780 11906542 : bool panic_triggered = false;
781 11906542 : struct tevent_fd *mpx_fde = NULL;
782 11906542 : int flags = fde->flags;
783 :
784 11906542 : if (ev == NULL) {
785 3378180 : return tevent_common_fd_destructor(fde);
786 : }
787 :
788 8528362 : epoll_ev = talloc_get_type_abort(ev->additional_data,
789 : struct epoll_event_context);
790 :
791 : /*
792 : * we must remove the event from the list
793 : * otherwise a panic fallback handler may
794 : * reuse invalid memory
795 : */
796 8528362 : DLIST_REMOVE(ev->fd_events, fde);
797 :
798 8528362 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
799 15450 : mpx_fde = talloc_get_type_abort(fde->additional_data,
800 : struct tevent_fd);
801 :
802 15450 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
803 15450 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
804 :
805 15450 : fde->additional_data = NULL;
806 15450 : mpx_fde->additional_data = NULL;
807 :
808 15450 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
809 : }
810 :
811 8528362 : epoll_ev->panic_state = &panic_triggered;
812 8528362 : epoll_check_reopen(epoll_ev);
813 8528362 : if (panic_triggered) {
814 0 : return tevent_common_fd_destructor(fde);
815 : }
816 :
817 8528362 : if (mpx_fde != NULL) {
818 15450 : epoll_update_event(epoll_ev, mpx_fde);
819 15450 : if (panic_triggered) {
820 0 : return tevent_common_fd_destructor(fde);
821 : }
822 : }
823 :
824 8528362 : fde->flags = 0;
825 8528362 : epoll_update_event(epoll_ev, fde);
826 8528362 : fde->flags = flags;
827 8528362 : if (panic_triggered) {
828 0 : return tevent_common_fd_destructor(fde);
829 : }
830 8528362 : epoll_ev->panic_state = NULL;
831 :
832 8528362 : return tevent_common_fd_destructor(fde);
833 : }
834 :
835 : /*
836 : add a fd based event
837 : return NULL on failure (memory allocation error)
838 : */
839 8875895 : static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
840 : int fd, uint16_t flags,
841 : tevent_fd_handler_t handler,
842 : void *private_data,
843 : const char *handler_name,
844 : const char *location)
845 : {
846 5995577 : struct epoll_event_context *epoll_ev =
847 8875895 : talloc_get_type_abort(ev->additional_data,
848 : struct epoll_event_context);
849 : struct tevent_fd *fde;
850 8875895 : bool panic_triggered = false;
851 :
852 8875895 : fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
853 : handler, private_data,
854 : handler_name, location);
855 8875895 : if (!fde) return NULL;
856 :
857 8875895 : talloc_set_destructor(fde, epoll_event_fd_destructor);
858 :
859 8875895 : epoll_ev->panic_state = &panic_triggered;
860 8875895 : epoll_check_reopen(epoll_ev);
861 8875895 : if (panic_triggered) {
862 0 : return fde;
863 : }
864 8875895 : epoll_ev->panic_state = NULL;
865 :
866 8875895 : epoll_update_event(epoll_ev, fde);
867 :
868 8875895 : return fde;
869 : }
870 :
871 : /*
872 : set the fd event flags
873 : */
874 18670431 : static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
875 : {
876 : struct tevent_context *ev;
877 : struct epoll_event_context *epoll_ev;
878 18670431 : bool panic_triggered = false;
879 :
880 31545625 : if (fde->flags == flags) return;
881 :
882 4342300 : ev = fde->event_ctx;
883 4342300 : epoll_ev = talloc_get_type_abort(ev->additional_data,
884 : struct epoll_event_context);
885 :
886 4342300 : fde->flags = flags;
887 :
888 4342300 : epoll_ev->panic_state = &panic_triggered;
889 4342300 : epoll_check_reopen(epoll_ev);
890 4342300 : if (panic_triggered) {
891 0 : return;
892 : }
893 4342300 : epoll_ev->panic_state = NULL;
894 :
895 4342300 : epoll_update_event(epoll_ev, fde);
896 : }
897 :
898 : /*
899 : do a single event loop using the events defined in ev
900 : */
901 200473792 : static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
902 : {
903 174441963 : struct epoll_event_context *epoll_ev =
904 200473792 : talloc_get_type_abort(ev->additional_data,
905 : struct epoll_event_context);
906 : struct timeval tval;
907 200473792 : bool panic_triggered = false;
908 :
909 243506051 : if (ev->signal_events &&
910 43032269 : tevent_common_check_signal(ev)) {
911 261362 : return 0;
912 : }
913 :
914 200212420 : if (ev->threaded_contexts != NULL) {
915 177822 : tevent_common_threaded_activate_immediate(ev);
916 : }
917 :
918 216331710 : if (ev->immediate_events &&
919 16131866 : tevent_common_loop_immediate(ev)) {
920 16119290 : return 0;
921 : }
922 :
923 184080554 : tval = tevent_common_loop_timer_delay(ev);
924 184080513 : if (tevent_timeval_is_zero(&tval)) {
925 136434832 : return 0;
926 : }
927 :
928 47645681 : epoll_ev->panic_state = &panic_triggered;
929 47645681 : epoll_ev->panic_force_replay = true;
930 47645681 : epoll_check_reopen(epoll_ev);
931 47645681 : if (panic_triggered) {
932 0 : errno = EINVAL;
933 0 : return -1;
934 : }
935 47645681 : epoll_ev->panic_force_replay = false;
936 47645681 : epoll_ev->panic_state = NULL;
937 :
938 47645681 : return epoll_event_loop(epoll_ev, &tval);
939 : }
940 :
941 : static const struct tevent_ops epoll_event_ops = {
942 : .context_init = epoll_event_context_init,
943 : .add_fd = epoll_event_add_fd,
944 : .set_fd_close_fn = tevent_common_fd_set_close_fn,
945 : .get_fd_flags = tevent_common_fd_get_flags,
946 : .set_fd_flags = epoll_event_set_fd_flags,
947 : .add_timer = tevent_common_add_timer_v2,
948 : .schedule_immediate = tevent_common_schedule_immediate,
949 : .add_signal = tevent_common_add_signal,
950 : .loop_once = epoll_event_loop_once,
951 : .loop_wait = tevent_common_loop_wait,
952 : };
953 :
954 28196 : _PRIVATE_ bool tevent_epoll_init(void)
955 : {
956 28196 : return tevent_register_backend("epoll", &epoll_event_ops);
957 : }
|