| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Hypersonic::Event::IOUring; |
|
2
|
|
|
|
|
|
|
|
|
3
|
27
|
|
|
27
|
|
207371
|
use strict; |
|
|
27
|
|
|
|
|
52
|
|
|
|
27
|
|
|
|
|
1024
|
|
|
4
|
27
|
|
|
27
|
|
122
|
use warnings; |
|
|
27
|
|
|
|
|
75
|
|
|
|
27
|
|
|
|
|
1449
|
|
|
5
|
27
|
|
|
27
|
|
417
|
use 5.010; |
|
|
27
|
|
|
|
|
77
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
27
|
|
|
27
|
|
195
|
use parent 'Hypersonic::Event::Role'; |
|
|
27
|
|
|
|
|
55
|
|
|
|
27
|
|
|
|
|
642
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.15'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
1
|
1398
|
sub name { 'io_uring' } |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub available { |
|
14
|
57
|
50
|
|
57
|
1
|
5891
|
return 0 unless $^O eq 'linux'; |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# Check kernel version >= 5.1 |
|
17
|
57
|
|
50
|
|
|
379525
|
my $ver = `uname -r 2>/dev/null` || ''; |
|
18
|
57
|
|
|
|
|
2358
|
my ($major, $minor) = $ver =~ /^(\d+)\.(\d+)/; |
|
19
|
57
|
0
|
33
|
|
|
1271
|
return 0 unless $major && ($major > 5 || ($major == 5 && $minor >= 1)); |
|
|
|
|
33
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# Check for liburing headers |
|
22
|
57
|
|
33
|
|
|
3017
|
my $has_header = -f '/usr/include/liburing.h' |
|
23
|
|
|
|
|
|
|
|| -f '/usr/local/include/liburing.h' |
|
24
|
|
|
|
|
|
|
|| -f '/usr/include/x86_64-linux-gnu/liburing.h'; |
|
25
|
57
|
50
|
|
|
|
2074
|
return 0 unless $has_header; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# io_uring may be disabled at the kernel level. RHEL9 ships with |
|
28
|
|
|
|
|
|
|
# kernel.io_uring_disabled=2 by default; a value of 1 or 2 means |
|
29
|
|
|
|
|
|
|
# the syscall returns EINVAL/EPERM regardless of liburing being |
|
30
|
|
|
|
|
|
|
# linkable. Bail before we sink time into a compile+link probe. |
|
31
|
0
|
0
|
|
|
|
|
if (open my $fh, '<', '/proc/sys/kernel/io_uring_disabled') { |
|
32
|
0
|
|
|
|
|
|
my $disabled = <$fh>; |
|
33
|
0
|
|
|
|
|
|
close $fh; |
|
34
|
0
|
0
|
|
|
|
|
chomp $disabled if defined $disabled; |
|
35
|
0
|
0
|
0
|
|
|
|
return 0 if defined $disabled && $disabled ne '0'; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Compile-link-and-RUN probe. A pure link check passes on systems |
|
39
|
|
|
|
|
|
|
# that have liburing installed but where io_uring_setup() will |
|
40
|
|
|
|
|
|
|
# nevertheless fail at runtime (kernel disabled, sandboxing, missing |
|
41
|
|
|
|
|
|
|
# liburing.so at exec time). Actually open and close a ring. |
|
42
|
0
|
|
|
|
|
|
require Hypersonic::JIT::Util; |
|
43
|
0
|
|
|
|
|
|
return Hypersonic::JIT::Util->can_run( |
|
44
|
|
|
|
|
|
|
'', |
|
45
|
|
|
|
|
|
|
'-luring', |
|
46
|
|
|
|
|
|
|
'struct io_uring ring; int rc = io_uring_queue_init(8, &ring, 0); ' |
|
47
|
|
|
|
|
|
|
. 'if (rc < 0) return 1; io_uring_queue_exit(&ring); return 0;', |
|
48
|
|
|
|
|
|
|
'#include ', |
|
49
|
|
|
|
|
|
|
); |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub includes { |
|
53
|
|
|
|
|
|
|
# liburing.h for the server loop. is needed for the |
|
54
|
|
|
|
|
|
|
# UA::Async slot-tracking helpers (gen_create_loop / _add_with_slot / |
|
55
|
|
|
|
|
|
|
# _get_slot) - io_uring is Linux 5.1+ which always has epoll. |
|
56
|
0
|
|
|
0
|
1
|
|
return "#include \n#include "; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub defines { |
|
60
|
0
|
|
|
0
|
1
|
|
return <<'C'; |
|
61
|
|
|
|
|
|
|
#define EV_BACKEND_IO_URING 1 |
|
62
|
|
|
|
|
|
|
#ifndef URING_ENTRIES |
|
63
|
|
|
|
|
|
|
#define URING_ENTRIES 256 |
|
64
|
|
|
|
|
|
|
#endif |
|
65
|
|
|
|
|
|
|
#ifndef MAX_EVENTS |
|
66
|
|
|
|
|
|
|
#define MAX_EVENTS 1024 |
|
67
|
|
|
|
|
|
|
#endif |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
/* User data encoding: type in high bits, fd in low bits */ |
|
70
|
|
|
|
|
|
|
#define UD_ACCEPT 0x10000000 |
|
71
|
|
|
|
|
|
|
#define UD_READ 0x20000000 |
|
72
|
|
|
|
|
|
|
#define UD_WRITE 0x30000000 |
|
73
|
|
|
|
|
|
|
#define UD_FD_MASK 0x0FFFFFFF |
|
74
|
|
|
|
|
|
|
C |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
0
|
|
|
0
|
1
|
|
sub event_struct { 'io_uring_cqe' } |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# UA::Async slot-tracking helpers below use a private epoll instance |
|
80
|
|
|
|
|
|
|
# (io_uring's own slot tracking would mean weaving user_data through |
|
81
|
|
|
|
|
|
|
# the shared submission ring, which is a lot more invasive). So when |
|
82
|
|
|
|
|
|
|
# UA::Async asks for a buffer to pass to gen_wait_once it must be |
|
83
|
|
|
|
|
|
|
# struct epoll_event[], NOT io_uring_cqe[]. See the Fedora 43 / perl |
|
84
|
|
|
|
|
|
|
# 5.38.5 smoker report (5ce1e632) for what happens when the wrong |
|
85
|
|
|
|
|
|
|
# type is declared - epoll_wait() argument-type mismatch + a missing |
|
86
|
|
|
|
|
|
|
# `data` member access. |
|
87
|
0
|
|
|
0
|
0
|
|
sub slot_event_struct { 'epoll_event' } |
|
88
|
|
|
|
|
|
|
|
|
89
|
0
|
|
|
0
|
1
|
|
sub extra_cflags { '' } |
|
90
|
0
|
|
|
0
|
1
|
|
sub extra_ldflags { '-luring' } |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# io_uring is fundamentally different - uses submission/completion queues |
|
93
|
|
|
|
|
|
|
sub gen_create { |
|
94
|
0
|
|
|
0
|
1
|
|
my ($class, $builder, $listen_fd_var) = @_; |
|
95
|
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
|
$builder->comment('io_uring backend - high performance Linux I/O') |
|
97
|
|
|
|
|
|
|
->line('static struct io_uring ring;') |
|
98
|
|
|
|
|
|
|
->line('static int ring_initialized = 0;') |
|
99
|
|
|
|
|
|
|
->blank |
|
100
|
|
|
|
|
|
|
->if('!ring_initialized') |
|
101
|
|
|
|
|
|
|
->if('io_uring_queue_init(URING_ENTRIES, &ring, 0) < 0') |
|
102
|
|
|
|
|
|
|
# gen_create is inlined into hypersonic_run_event_loop, which |
|
103
|
|
|
|
|
|
|
# is a void XS function. `return ;` triggers GCC 14+ |
|
104
|
|
|
|
|
|
|
# -Wreturn-mismatch (now an error). croak from XS instead - |
|
105
|
|
|
|
|
|
|
# it longjmps out cleanly and surfaces a Perl-level error. |
|
106
|
|
|
|
|
|
|
->line('croak("io_uring_queue_init() failed: %s", strerror(errno));') |
|
107
|
|
|
|
|
|
|
->endif |
|
108
|
|
|
|
|
|
|
->line('ring_initialized = 1;') |
|
109
|
|
|
|
|
|
|
->endif |
|
110
|
|
|
|
|
|
|
->blank |
|
111
|
|
|
|
|
|
|
->comment('Submit initial accept') |
|
112
|
|
|
|
|
|
|
->line('struct io_uring_sqe* sqe = io_uring_get_sqe(&ring);') |
|
113
|
|
|
|
|
|
|
->if('sqe') |
|
114
|
|
|
|
|
|
|
->line("io_uring_prep_accept(sqe, $listen_fd_var, NULL, NULL, 0);") |
|
115
|
|
|
|
|
|
|
->line("io_uring_sqe_set_data(sqe, (void*)(uintptr_t)(UD_ACCEPT | $listen_fd_var));") |
|
116
|
|
|
|
|
|
|
->line('io_uring_submit(&ring);') |
|
117
|
|
|
|
|
|
|
->endif |
|
118
|
|
|
|
|
|
|
->line('int ev_fd = 0;') # Dummy - io_uring uses ring structure |
|
119
|
|
|
|
|
|
|
->blank; |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# Submit read operation |
|
123
|
|
|
|
|
|
|
sub gen_add { |
|
124
|
0
|
|
|
0
|
1
|
|
my ($class, $builder, $loop_var, $fd_var) = @_; |
|
125
|
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
|
$builder->line('sqe = io_uring_get_sqe(&ring);') |
|
127
|
|
|
|
|
|
|
->if('sqe') |
|
128
|
|
|
|
|
|
|
->line("io_uring_prep_recv(sqe, $fd_var, recv_buf, RECV_BUF_SIZE, 0);") |
|
129
|
|
|
|
|
|
|
->line("io_uring_sqe_set_data(sqe, (void*)(uintptr_t)(UD_READ | $fd_var));") |
|
130
|
|
|
|
|
|
|
->line('io_uring_submit(&ring);') |
|
131
|
|
|
|
|
|
|
->endif; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# Cancel pending operations |
|
135
|
|
|
|
|
|
|
sub gen_del { |
|
136
|
0
|
|
|
0
|
1
|
|
my ($class, $builder, $loop_var, $fd_var) = @_; |
|
137
|
|
|
|
|
|
|
|
|
138
|
0
|
|
|
|
|
|
$builder->comment('io_uring: close fd (pending ops will complete with error)') |
|
139
|
|
|
|
|
|
|
->line("close($fd_var);"); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Wait for completions |
|
143
|
|
|
|
|
|
|
sub gen_wait { |
|
144
|
0
|
|
|
0
|
1
|
|
my ($class, $builder, $loop_var, $events_var, $count_var, $timeout_var) = @_; |
|
145
|
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
$builder->line('struct io_uring_cqe* cqe;') |
|
147
|
|
|
|
|
|
|
->line('struct __kernel_timespec ts;') |
|
148
|
|
|
|
|
|
|
->line("ts.tv_sec = $timeout_var / 1000;") |
|
149
|
|
|
|
|
|
|
->line("ts.tv_nsec = ($timeout_var % 1000) * 1000000;") |
|
150
|
|
|
|
|
|
|
->blank |
|
151
|
|
|
|
|
|
|
->comment('Wait for at least one completion') |
|
152
|
|
|
|
|
|
|
->line('int wait_result = io_uring_wait_cqe_timeout(&ring, &cqe, &ts);') |
|
153
|
|
|
|
|
|
|
->if('wait_result < 0') |
|
154
|
|
|
|
|
|
|
->if('wait_result == -ETIME') |
|
155
|
|
|
|
|
|
|
->line('continue;') # Timeout is normal |
|
156
|
|
|
|
|
|
|
->endif |
|
157
|
|
|
|
|
|
|
->if('wait_result == -EINTR') |
|
158
|
|
|
|
|
|
|
->line('continue;') |
|
159
|
|
|
|
|
|
|
->endif |
|
160
|
|
|
|
|
|
|
->line('break;') |
|
161
|
|
|
|
|
|
|
->endif |
|
162
|
|
|
|
|
|
|
->blank |
|
163
|
|
|
|
|
|
|
->comment('Process all available completions') |
|
164
|
|
|
|
|
|
|
->line('unsigned head;') |
|
165
|
|
|
|
|
|
|
->line("int $count_var = 0;") |
|
166
|
|
|
|
|
|
|
->line("static struct io_uring_cqe* cqes[MAX_EVENTS];") |
|
167
|
|
|
|
|
|
|
->line('io_uring_for_each_cqe(&ring, head, cqe) {') |
|
168
|
|
|
|
|
|
|
->line(" if ($count_var < MAX_EVENTS) cqes[$count_var++] = cqe;") |
|
169
|
|
|
|
|
|
|
->line('}') |
|
170
|
|
|
|
|
|
|
->line("$events_var = cqes;"); # Point to our array |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Extract operation type and fd from completion |
|
174
|
|
|
|
|
|
|
sub gen_get_fd { |
|
175
|
0
|
|
|
0
|
1
|
|
my ($class, $builder, $events_var, $index_var, $fd_var) = @_; |
|
176
|
|
|
|
|
|
|
|
|
177
|
0
|
|
|
|
|
|
$builder->line("struct io_uring_cqe* completion = ${events_var}[$index_var];") |
|
178
|
|
|
|
|
|
|
->line('uintptr_t user_data = (uintptr_t)io_uring_cqe_get_data(completion);') |
|
179
|
|
|
|
|
|
|
->line('int op_type = user_data & 0xF0000000;') |
|
180
|
|
|
|
|
|
|
->line("int $fd_var = user_data & UD_FD_MASK;") |
|
181
|
|
|
|
|
|
|
->line('int result = completion->res;') |
|
182
|
|
|
|
|
|
|
->blank |
|
183
|
|
|
|
|
|
|
->comment('Mark completion as seen') |
|
184
|
|
|
|
|
|
|
->line('io_uring_cqe_seen(&ring, completion);') |
|
185
|
|
|
|
|
|
|
->blank |
|
186
|
|
|
|
|
|
|
->comment('Handle based on operation type') |
|
187
|
|
|
|
|
|
|
->if('op_type == UD_ACCEPT') |
|
188
|
|
|
|
|
|
|
->if('result >= 0') |
|
189
|
|
|
|
|
|
|
->comment('result is the new client fd') |
|
190
|
|
|
|
|
|
|
->line("int client_fd = result;") |
|
191
|
|
|
|
|
|
|
->line("$fd_var = listen_fd;") # Signal this was accept |
|
192
|
|
|
|
|
|
|
->else |
|
193
|
|
|
|
|
|
|
->line('continue;') # Accept failed |
|
194
|
|
|
|
|
|
|
->endif |
|
195
|
|
|
|
|
|
|
->elsif('op_type == UD_READ') |
|
196
|
|
|
|
|
|
|
->if('result <= 0') |
|
197
|
|
|
|
|
|
|
->comment('Connection closed or error') |
|
198
|
|
|
|
|
|
|
->line("close($fd_var);") |
|
199
|
|
|
|
|
|
|
->line('g_active_connections--;') |
|
200
|
|
|
|
|
|
|
->line('continue;') |
|
201
|
|
|
|
|
|
|
->endif |
|
202
|
|
|
|
|
|
|
->comment('result is bytes read - already in recv_buf') |
|
203
|
|
|
|
|
|
|
->endif; |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# Cleanup io_uring resources |
|
207
|
|
|
|
|
|
|
sub gen_cleanup { |
|
208
|
0
|
|
|
0
|
1
|
|
my ($class, $builder) = @_; |
|
209
|
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
|
$builder->if('ring_initialized') |
|
211
|
|
|
|
|
|
|
->line('io_uring_queue_exit(&ring);') |
|
212
|
|
|
|
|
|
|
->line('ring_initialized = 0;') |
|
213
|
|
|
|
|
|
|
->endif; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# ============================================================ |
|
217
|
|
|
|
|
|
|
# Async Slot Integration Methods (UA Async) |
|
218
|
|
|
|
|
|
|
# |
|
219
|
|
|
|
|
|
|
# UA::Async tracks per-slot fd readiness independently of the server |
|
220
|
|
|
|
|
|
|
# loop's io_uring ring. We use epoll under the hood since io_uring |
|
221
|
|
|
|
|
|
|
# requires Linux 5.1+ which always has epoll, and a separate epoll |
|
222
|
|
|
|
|
|
|
# instance is much simpler than weaving slot tracking through the |
|
223
|
|
|
|
|
|
|
# server's submission ring. |
|
224
|
|
|
|
|
|
|
# ============================================================ |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub gen_wait_once { |
|
227
|
0
|
|
|
0
|
0
|
|
my ($class, $builder, $loop_var, $events_var, $count_var, $timeout_ms) = @_; |
|
228
|
|
|
|
|
|
|
|
|
229
|
0
|
|
|
|
|
|
$builder->line("$count_var = epoll_wait($loop_var, $events_var, MAX_EVENTS, $timeout_ms);") |
|
230
|
|
|
|
|
|
|
->line("if ($count_var < 0 && errno == EINTR) $count_var = 0;"); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub gen_create_loop { |
|
234
|
0
|
|
|
0
|
0
|
|
my ($class, $builder, $loop_var) = @_; |
|
235
|
|
|
|
|
|
|
|
|
236
|
0
|
|
|
|
|
|
$builder->line("$loop_var = epoll_create1(0);") |
|
237
|
|
|
|
|
|
|
->if("$loop_var < 0") |
|
238
|
|
|
|
|
|
|
->line('croak("epoll_create1() failed");') |
|
239
|
|
|
|
|
|
|
->endif; |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub gen_add_with_slot { |
|
243
|
0
|
|
|
0
|
0
|
|
my ($class, $builder, $loop_var, $fd_var, $slot_var, $events) = @_; |
|
244
|
|
|
|
|
|
|
|
|
245
|
0
|
0
|
|
|
|
|
my $ev_flags = $events eq 'read' ? 'EPOLLIN | EPOLLET | EPOLLONESHOT' |
|
|
|
0
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
: $events eq 'write' ? 'EPOLLOUT | EPOLLET | EPOLLONESHOT' |
|
247
|
|
|
|
|
|
|
: 'EPOLLIN | EPOLLET | EPOLLONESHOT'; |
|
248
|
|
|
|
|
|
|
|
|
249
|
0
|
|
|
|
|
|
$builder->line('{') |
|
250
|
|
|
|
|
|
|
->line(' struct epoll_event _ev;') |
|
251
|
|
|
|
|
|
|
->line(" _ev.events = $ev_flags;") |
|
252
|
|
|
|
|
|
|
->line(" _ev.data.u32 = (uint32_t)$slot_var;") |
|
253
|
|
|
|
|
|
|
->line(" epoll_ctl($loop_var, EPOLL_CTL_ADD, $fd_var, &_ev);") |
|
254
|
|
|
|
|
|
|
->line('}'); |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub gen_get_slot { |
|
258
|
0
|
|
|
0
|
0
|
|
my ($class, $builder, $events_var, $index_var, $slot_var) = @_; |
|
259
|
|
|
|
|
|
|
|
|
260
|
0
|
|
|
|
|
|
$builder->line("int $slot_var;") |
|
261
|
|
|
|
|
|
|
->line("$slot_var = (int)${events_var}[$index_var].data.u32;"); |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# When async-slot helpers above are emitted, the io_uring includes |
|
265
|
|
|
|
|
|
|
# already pull in via the generated Hypersonic includes. |
|
266
|
|
|
|
|
|
|
# We don't need to add it here. |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# Future/Pool integration - add pool notify fd via poll on the fd |
|
269
|
|
|
|
|
|
|
sub gen_add_pool_notify { |
|
270
|
0
|
|
|
0
|
0
|
|
my ($class, $builder, $loop_var, $notify_fd_var) = @_; |
|
271
|
|
|
|
|
|
|
|
|
272
|
0
|
|
|
|
|
|
$builder->line("/* Add pool notify fd to io_uring via poll */") |
|
273
|
|
|
|
|
|
|
->line('sqe = io_uring_get_sqe(&ring);') |
|
274
|
|
|
|
|
|
|
->if('sqe') |
|
275
|
|
|
|
|
|
|
->line("io_uring_prep_poll_add(sqe, $notify_fd_var, POLLIN);") |
|
276
|
|
|
|
|
|
|
->line("io_uring_sqe_set_data(sqe, (void*)(uintptr_t)(0x40000000 | $notify_fd_var));") |
|
277
|
|
|
|
|
|
|
->line('io_uring_submit(&ring);') |
|
278
|
|
|
|
|
|
|
->endif; |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
1; |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
__END__ |