Hi… I am well aware that this diff view is very suboptimal. It will be fixed when the refactored server comes along!
More consistent API
The io_uring module provides access to Linux's io_uring subsystem. The documentation for this module is somewhat scarce: users are presumed to be familiar with io_uring. Thus, it is recommended that a reading of this module is paired with the Linux documentation, which may be available from your local liburing package under the io_uring_setup, io_uring_enter, and io_uring_register man pages.
The io_uring module provides access to Linux's io_uring subsystem. The documentation for this module is somewhat scarce: users are presumed to be familiar with io_uring. Thus, it is recommended that a reading of this module is paired with the Linux documentation, which may be available from your local liburing package under the io_uring_setup, io_uring_enter, and io_uring_register man pages. This module may be considered to be roughly equivalent to liburing's abstraction layer.
// License: MPL-2.0
// (c) 2021 Alexey Yerin <yyp@disroot.org>
// (c) 2021 Drew DeVault <sir@cmpwn.com>
// (c) 2021 Eyal Sawady <ecs@d2evs.net>
use errors;
use rt;
// Advances the completion queue by N items.
export fn cq_advance(ring: *io_uring, n: uint) void = {
	*ring.cq.khead = *ring.cq.khead + n;
};
// Call after processing a [[cqe]]. The cqe is returned to the pool and cannot
// be used by the application again.
export fn cqe_seen(ring: *io_uring, cqe: *cqe) void = cq_advance(ring, 1);
// Waits until a CQE is available, then returns it. The caller must pass the
// returned CQE to [[cqe_seen]] to advance the queue.
export fn wait(ring: *io_uring) (*cqe | error) = {
export fn cqe_wait(ring: *io_uring) (*cqe | error) = {
	match (get_cqe(ring, 0, 1)) {
	case let err: error =>
		return err;
	case let cq: nullable *cqe =>
		return cq as *cqe; // XXX: Correct?
	};
};
// Peeks the next CQE from the queue and returns it, or null if none are
// pending. The caller must pass the returned CQE to [[cqe_seen]] to advance the
// queue.
export fn peek(ring: *io_uring) (nullable *cqe | error) = get_cqe(ring, 0, 0);
export fn cqe_peek(ring: *io_uring) (nullable *cqe | error) = get_cqe(ring, 0, 0);
// Returns the result of a [[cqe]], or an error if unsuccessful.
export fn result(cqe: *cqe) (int | error) = {
export fn cqe_result(cqe: *cqe) (int | error) = {
	if (cqe.res >= 0) {
		return cqe.res;
	};
	switch (-cqe.res) {
	case rt::ENOBUFS =>
		return nobuffers;
	case =>
return errors::errno(rt::wrap_errno(-cqe.res));
return errors::errno(-cqe.res: rt::errno);
}; };
// Gets the user data field of a [[cqe]]. See [[set_user]] for the corresponding
// Gets the user data field of a [[cqe]]. See [[sqe_set_data]] for the corresponding
// SQE function.
export fn get_user(cqe: *cqe) nullable *void = cqe.user_data: uintptr: nullable *void;
export fn cqe_get_data(cqe: *cqe) nullable *opaque = cqe.user_data: uintptr: nullable *opaque;
// Returns the buffer ID used for this [[cqe]] in combination with // [[set_buffer_select]]. Aborts the program if this CQE was not configured to // use a buffer pool.
export fn get_buffer_id(cqe: *cqe) u16 = {
export fn cqe_get_buffer_id(cqe: *cqe) u16 = {
// TODO: Handle ENOBUFS assert(cqe.flags & cqe_flags::F_BUFFER > 0, "get_buffer_id called for CQE without buffer"); return (cqe.flags: u32 >> CQE_BUFFER_SHIFT): u16; };
fn peek_cqe(ring: *io_uring) (nullable *cqe, uint) = {
fn peek_cqe_(ring: *io_uring) (nullable *cqe, uint) = {
	let head = *ring.cq.khead;
	let tail = *ring.cq.ktail;
	let mask = *ring.cq.kring_mask;
	let avail = tail - head;
	if (avail == 0) {
		return (null, 0);
	};
	return (&ring.cq.cqes[head & mask], avail);
};
fn get_cqe(
	ring: *io_uring,
	submit: uint,
	wait: uint,
) (nullable *cqe | error) = {
	let cq: nullable *cqe = null;
	for (cq == null) {
		let enter = false, overflow = false;
		let flags = enter_flags::NONE;
		// TODO: tuple destructuring
let tup = peek_cqe(ring);
let tup = peek_cqe_(ring);
		let avail = tup.1;
		cq = tup.0;
		if (cq == null && wait == 0 && submit == 0) {
			if (!needs_flush(ring)) {
				return null;
			};
			overflow = true;
		};
		if (wait > avail || overflow) {
			flags |= enter_flags::GETEVENTS;
			enter = true;
		};
		if (submit > 0) {
			needs_enter(ring, &flags);
			enter = true;
		};
		if (!enter) {
			break;
		};
		match (rt::io_uring_enter(ring.fd,
			submit, wait, flags: uint, null)) {
		case let err: rt::errno =>
			return errors::errno(err);
		case let n: uint =>
			submit -= n;
		};
	};
	return cq;
};
// License: MPL-2.0 // (c) 2021 Alexey Yerin <yyp@disroot.org> // (c) 2021 Drew DeVault <sir@cmpwn.com> // (c) 2021 Eyal Sawady <ecs@d2evs.net> use errors; use rt; // TODO: Atomics // Returns the next available [[sqe]] for this [[io_uring]], or null if the // queue is full.
export fn get_sqe(ring: *io_uring) nullable *sqe = {
export fn ring_get_sqe(ring: *io_uring) nullable *sqe = {
	const sq = &ring.sq;
	const head = *sq.khead, next = sq.sqe_tail + 1;
	if (next - head <= *sq.kring_entries) {
		let sqe = &sq.sqes[sq.sqe_tail & *sq.kring_mask];
		sq.sqe_tail = next;
		return sqe;
	};
	return null;
};
// Returns the next available [[sqe]] for this [[io_uring]], or aborts the
// program if the queue is full.
export fn must_get_sqe(ring: *io_uring) *sqe = {
	match (get_sqe(ring)) {
	case null =>
		abort("I/O queue full");
	case let sq: *sqe =>
		return sq;
	};
};
fn needs_enter(ring: *io_uring, flags: *enter_flags) bool = {
	if (ring.flags & setup_flags::IOPOLL != setup_flags::IOPOLL) {
	if (ring.flags & ring_setup_flags::IOPOLL != ring_setup_flags::IOPOLL) {
return true; };
	if (*ring.sq.kflags & sqring_flags::NEED_WAKEUP == sqring_flags::NEED_WAKEUP) {
	if (*ring.sq.kflags & sq_flags::NEED_WAKEUP == sq_flags::NEED_WAKEUP) {
*flags |= enter_flags::SQ_WAKEUP; return true; }; return false; }; fn needs_flush(ring: *io_uring) bool =
*ring.sq.kflags & sqring_flags::CQ_OVERFLOW == sqring_flags::CQ_OVERFLOW;
*ring.sq.kflags & sq_flags::CQ_OVERFLOW == sq_flags::CQ_OVERFLOW;
// Submits queued I/O asynchronously. Returns the number of submissions accepted // by the kernel.
export fn submit(ring: *io_uring) (uint | error) =
export fn ring_submit(ring: *io_uring) (uint | error) =
do_submit(ring, flush_sq(ring), 0u); // Submits queued I/O asynchronously and blocks until at least "wait" events are
// complete. If setup_flags::IOPOLL was configured for this ring, the meaning of
// complete. If ring_setup_flags::IOPOLL was configured for this ring, the meaning of
// the "wait" parameter is different: a non-zero value will block until at least // one event is completed. // // Returns the number of submissions accepted by the kernel.
export fn submit_wait(ring: *io_uring, wait: uint) (uint | error) =
export fn ring_submit_and_wait(ring: *io_uring, wait: uint) (uint | error) =
	do_submit(ring, flush_sq(ring), wait);
fn flush_sq(ring: *io_uring) uint = {
	let sq = &ring.sq;
	let ktail = *sq.ktail;
	const mask = *sq.kring_mask;
	if (sq.sqe_head == sq.sqe_tail) {
		return ktail - *sq.khead;
	};
	for (let n = sq.sqe_tail - sq.sqe_head; n > 0; n -= 1u) {
		sq.array[ktail & mask] = sq.sqe_head & mask;
		ktail += 1u;
		sq.sqe_head += 1u;
	};
	*sq.ktail = ktail;
	return ktail - *sq.khead;
};
fn do_submit(
	ring: *io_uring,
	submitted: uint,
	wait: uint,
) (uint | error) = {
	let flags = enter_flags::GETEVENTS;
	if (needs_enter(ring, &flags) || wait != 0) {
		match (rt::io_uring_enter(ring.fd,
				submitted, wait, flags, null)) {
		case let err: rt::errno =>
			return errors::errno(err);
		case let n: uint =>
			return n;
		};
	} else {
		return submitted;
	};
};
// License: MPL-2.0 // (c) 2021 Drew DeVault <sir@cmpwn.com> // (c) 2021 Eyal Sawady <ecs@d2evs.net> use errors; use rt; use types; // Registers a set of fixed buffers with an [[io_uring]]. Note that you must // call [[unregister_buffers]] before registering a new set of buffers (even if // some of them have similar addresses to the old set). The buffers must be // anonymous, non-file-backed memory (e.g. the kind returned by alloc or // rt::mmap).
export fn register_buffers(ring: *io_uring, iov: []rt::iovec) (void | error) = {
export fn ring_register_buffers(ring: *io_uring, iov: []rt::iovec) (void | error) = {
assert(len(iov) <= types::UINT_MAX);
match (rt::io_uring_register(ring.fd, regop::REGISTER_BUFFERS,
match (rt::io_uring_register(ring.fd, ring_register_op::REGISTER_BUFFERS,
			iov: *[*]rt::iovec, len(iov): uint)) {
	case let err: rt::errno =>
		return errors::errno(err);
	case int => void;
	};
};
// Unregisters all fixed buffers associated with an [[io_uring]].
export fn unregister_buffers(ring: *io_uring) (void | error) = {
export fn ring_unregister_buffers(ring: *io_uring) (void | error) = {
match (rt::io_uring_register(ring.fd,
			regop::UNREGISTER_BUFFERS, null, 0)) {
			ring_register_op::UNREGISTER_BUFFERS, null, 0)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; }; // Registers a set of file descriptors with an [[io_uring]]. The set of files // may be sparse, meaning that some are set to -1, to be updated later using // [[register_files_update]].
export fn register_files(ring: *io_uring, files: []int) (void | error) = {
export fn ring_register_files(ring: *io_uring, files: []int) (void | error) = {
assert(len(files) <= types::UINT_MAX);
match (rt::io_uring_register(ring.fd, regop::REGISTER_FILES,
match (rt::io_uring_register(ring.fd, ring_register_op::REGISTER_FILES,
			files: *[*]int, len(files): uint)) {
	case let err: rt::errno =>
		return errors::errno(err);
	case int => void;
	};
};
// Applies a set of [[files_update]]s to the set of files registered with an
// Applies a set of [[ring_files_update]]s to the set of files registered with an
// [[io_uring]].
export fn register_files_update(
export fn ring_register_files_update(
ring: *io_uring,
updates: []files_update,
updates: []ring_files_update,
) (void | error) = {
	assert(len(updates) <= types::UINT_MAX);
	match (rt::io_uring_register(ring.fd, regop::REGISTER_FILES_UPDATE,
			updates: *[*]files_update, len(updates): uint)) {
	match (rt::io_uring_register(ring.fd, ring_register_op::REGISTER_FILES_UPDATE,
			updates: *[*]ring_files_update, len(updates): uint)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; }; // Unregisters all files associated with an [[io_uring]].
export fn unregister_files(ring: *io_uring) (void | error) = {
export fn ring_unregister_files(ring: *io_uring) (void | error) = {
match (rt::io_uring_register(ring.fd,
			regop::UNREGISTER_FILES, null, 0)) {
			ring_register_op::UNREGISTER_FILES, null, 0)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; }; // Registers an eventfd(2) with this [[io_uring]] to be notified of completion // events.
export fn register_eventfd(ring: *io_uring, fd: int) (void | error) = {
export fn ring_register_eventfd(ring: *io_uring, fd: int) (void | error) = {
match (rt::io_uring_register(ring.fd,
			regop::REGISTER_EVENTFD, &fd, 1)) {
			ring_register_op::REGISTER_EVENTFD, &fd, 1)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; }; // Similar to [[register_eventfd]], but only notifies of events which complet // asyncronously.
export fn register_eventfd_async(ring: *io_uring, fd: int) (void | error) = {
export fn ring_register_eventfd_async(ring: *io_uring, fd: int) (void | error) = {
match (rt::io_uring_register(ring.fd,
			regop::REGISTER_EVENTFD_ASYNC, &fd, 1)) {
			ring_register_op::REGISTER_EVENTFD_ASYNC, &fd, 1)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; }; // Unregisters the eventfd(2) associated with this [[io_uring]].
export fn unregister_eventfd(ring: *io_uring) (void | error) = {
export fn ring_unregister_eventfd(ring: *io_uring) (void | error) = {
match (rt::io_uring_register(ring.fd,
			regop::UNREGISTER_EVENTFD, null, 0)) {
			ring_register_op::UNREGISTER_EVENTFD, null, 0)) {
	case let err: rt::errno =>
		return errors::errno(err);
	case int => void;
	};
};
// XXX: This interface is pretty bad. It would be nice to improve on it
// a bit before making it part of the API.
//export fn register_probe(ring: *io_uring, out: *probe, nops: size) void = {
//	assert(nops * size(probe_op) <= types::UINT_MAX);
// match (rt::io_uring_register(ring.fd, regop::REGISTER_PROBE,
// match (rt::io_uring_register(ring.fd, ring_register_op::REGISTER_PROBE,
//			out, (nops * size(probe)): uint)) {
//		rt::errno => abort("Unexpected io_uring REGISTER_PROBE error"),
//		void => void,
//	};
//};
// Registers the current process's credentials as a personality with an
// [[io_uring]], returning an ID. Set the personality field of an [[sqe]] to use
// that personality for an I/O submission.
export fn register_personality(ring: *io_uring) int = {
export fn ring_register_personality(ring: *io_uring) int = {
match (rt::io_uring_register(ring.fd,
			regop::REGISTER_PERSONALITY, null, 0)) {
			ring_register_op::REGISTER_PERSONALITY, null, 0)) {
	case rt::errno =>
		abort("Unexpected io_uring REGISTER_PERSONALITY error");
	case let i: int =>
		return i;
	};
};
// Unregisters a personality previously configured with
// [[register_personality]].
export fn unregister_personality(ring: *io_uring, id: int) (void | error) = {
export fn ring_unregister_personality(ring: *io_uring, id: int) (void | error) = {
match (rt::io_uring_register(ring.fd,
			regop::UNREGISTER_PERSONALITY, null, id: uint)) {
			ring_register_op::UNREGISTER_PERSONALITY, null, id: uint)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; }; // Enables submissions for an [[io_uring]] which was started in the disabled
// state via [[setup_flags::R_DISABLED]]. Future access to this io_uring is
// state via [[ring_setup_flags::R_DISABLED]]. Future access to this io_uring is
// subject to any configured restrictions.
export fn register_enable_rings(ring: *io_uring) (void | error) = {
export fn ring_register_enable(ring: *io_uring) (void | error) = {
match (rt::io_uring_register(ring.fd,
			regop::REGISTER_ENABLE_RINGS, null, 0)) {
			ring_register_op::REGISTER_ENABLE_RINGS, null, 0)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; }; // Registers a restriction for this [[io_uring]], limiting the kinds of future // registrations and I/O submissions which are permitted for it. This is only // accepted if the [[io_uring]] was set up in a disabled state via
// [[setup_flags::R_DISABLED]].
export fn register_restrictions(ring: *io_uring, res: []restriction) (void | error) = {
// [[ring_setup_flags::R_DISABLED]].
export fn ring_register_restrictions(ring: *io_uring, res: []ring_register_restriction_details) (void | error) = {
assert(len(res) < types::UINT_MAX);
	match (rt::io_uring_register(ring.fd, regop::REGISTER_RESTRICTIONS,
			res: *[*]restriction, len(res): uint)) {
	match (rt::io_uring_register(ring.fd, ring_register_op::REGISTER_RESTRICTIONS,
			res: *[*]ring_register_restriction_details, len(res): uint)) {
case let err: rt::errno => return errors::errno(err); case int => void; }; };
// License: MPL-2.0 // (c) 2021 Drew DeVault <sir@cmpwn.com> // (c) 2021 Eyal Sawady <ecs@d2evs.net> use errors; use rt; // Sets up an io_uring. The params parameter must be initialized with the // desired flags, sq_thread_cpu, and sq_thread_idle parameters; the remaining // fields are initialized by the kernel.
export fn setup(entries: u32, params: *params) (io_uring | error) = {
	const fd = match (rt::io_uring_setup(entries, params)) {
export fn ring_init(entries: u32, params: *ring_params) (io_uring | error) = {
	const fd = match (rt::io_uring_setup(entries, params: *rt::io_uring_params)) {
	case let err: rt::errno =>
		return errors::errno(err);
	case let fd: int =>
		yield fd;
	};
	let uring = io_uring {
		sq = sq { ... },
		cq = cq { ... },
		sq = sq {
			khead = null: *uint,
			ktail = null: *uint,
			kring_mask = null: *uint,
			kring_entries = null: *uint,
			kflags = null: *sq_flags,
			kdropped = null: *uint,
			array = null: *[*]uint,
			sqes = null: *[*]sqe,
			sqe_head = 0: uint,
			sqe_tail = 0: uint,
			ring_sz = 0: size,
			ring_ptr = null: *opaque,
		},
		cq = cq {
			khead = null: *uint,
			ktail = null: *uint,
			kring_mask = null: *uint,
			kring_entries = null: *uint,
			kflags = null: *cq_flags,
			koverflow = null: *uint,
			cqes = null: *[*]cqe,
			ring_sz = 0: size,
			ring_ptr = null: *opaque,
		},
fd = fd, flags = params.flags, features = params.features, }; let sq = &uring.sq, cq = &uring.cq; sq.ring_sz = params.sq_off.array + params.sq_entries * size(uint); cq.ring_sz = params.cq_off.cqes + params.cq_entries * size(cqe);
	if (uring.features & features::SINGLE_MMAP == features::SINGLE_MMAP) {
	if (uring.features & ring_features::SINGLE_MMAP == ring_features::SINGLE_MMAP) {
		if (cq.ring_sz > sq.ring_sz) {
			sq.ring_sz = cq.ring_sz;
		};
		cq.ring_sz = sq.ring_sz;
	};
	sq.ring_ptr = match (rt::mmap(null,
			params.sq_off.array + entries * size(u32),
			rt::PROT_READ | rt::PROT_WRITE,
			rt::MAP_SHARED | rt::MAP_POPULATE,
			fd, OFF_SQ_RING)) {
	case let err: rt::errno =>
		return errors::errno(err);
case let ptr: *void =>
case let ptr: *opaque =>
yield ptr; };
	cq.ring_ptr = if (uring.features & features::SINGLE_MMAP == features::SINGLE_MMAP) {
	cq.ring_ptr = if (uring.features & ring_features::SINGLE_MMAP == ring_features::SINGLE_MMAP) {
		yield sq.ring_ptr;
	} else match (rt::mmap(null, cq.ring_sz,
			rt::PROT_READ | rt::PROT_WRITE,
			rt::MAP_SHARED | rt::MAP_POPULATE,
			fd, OFF_CQ_RING)) {
	case let err: rt::errno =>
		return errors::errno(err);
case let ptr: *void =>
case let ptr: *opaque =>
yield ptr; }; const ring_ptr = sq.ring_ptr: uintptr; sq.khead = (ring_ptr + params.sq_off.head: uintptr): *uint; sq.ktail = (ring_ptr + params.sq_off.tail: uintptr): *uint; sq.kring_mask = (ring_ptr + params.sq_off.ring_mask: uintptr): *uint; sq.kring_entries = (ring_ptr + params.sq_off.ring_entries: uintptr): *uint;
sq.kflags = (ring_ptr + params.sq_off.flags: uintptr): *sqring_flags;
sq.kflags = (ring_ptr + params.sq_off.flags: uintptr): *sq_flags;
	sq.kdropped = (ring_ptr + params.sq_off.dropped: uintptr): *uint;
	sq.array = (ring_ptr + params.sq_off.array: uintptr): *[*]uint;
	sq.sqes = match (rt::mmap(null,
			params.sq_entries * size(sqe),
			rt::PROT_READ | rt::PROT_WRITE,
			rt::MAP_SHARED | rt::MAP_POPULATE,
			fd, OFF_SQES)) {
	case let err: rt::errno =>
		return errors::errno(err);
case let ptr: *void =>
case let ptr: *opaque =>
		yield ptr: *[*]sqe;
	};
	const ring_ptr = cq.ring_ptr: uintptr;
	cq.khead = (ring_ptr + params.cq_off.head: uintptr): *uint;
	cq.ktail = (ring_ptr + params.cq_off.tail: uintptr): *uint;
	cq.kring_mask = (ring_ptr + params.cq_off.ring_mask: uintptr): *uint;
	cq.kring_entries = (ring_ptr + params.cq_off.ring_entries: uintptr): *uint;
	cq.koverflow = (ring_ptr + params.cq_off.overflow: uintptr): *uint;
	cq.cqes = (ring_ptr + params.cq_off.cqes: uintptr): *[*]cqe;
	if (params.cq_off.flags != 0) {
cq.kflags = (ring_ptr + params.cq_off.flags: uintptr): *cqring_flags;
cq.kflags = (ring_ptr + params.cq_off.flags: uintptr): *cq_flags;
}; return uring; }; // Frees state associated with an [[io_uring]].
export fn finish(ring: *io_uring) void = {
export fn ring_exit(ring: *io_uring) void = {
let sq = &ring.sq, cq = &ring.cq; rt::munmap(sq.ring_ptr, sq.ring_sz): void;
	if (cq.ring_ptr != null: *void && cq.ring_ptr != sq.ring_ptr) {
	if (cq.ring_ptr != null: *opaque && cq.ring_ptr != sq.ring_ptr) {
rt::munmap(cq.ring_ptr, cq.ring_sz): void; }; rt::close(ring.fd): void; };
// License: MPL-2.0 // (c) 2021 Alexey Yerin <yyp@disroot.org> // (c) 2021 Drew DeVault <sir@cmpwn.com> // (c) 2022 Eyal Sawady <ecs@d2evs.net> use endian; use rt; use types;
use types::c;
fn prep(sq: *sqe, op: op, flags: flags...) void = {
fn prep(sq: *sqe, op: sqe_op, flags: sqe_flags...) void = {
	rt::memset(sq, 0, size(sqe));
	sq.opcode = op;
	for (let i = 0z; i < len(flags); i += 1) {
		sq.flags |= flags[i];
	};
};
fn preprw(
	sqe: *sqe,
op: op,
op: sqe_op,
fd: int,
addr: nullable *void,
addr: nullable *opaque,
length: uint, offs: u64,
flags: flags...
flags: sqe_flags...
) void = {
	prep(sqe, op, flags...);
	sqe.fd = fd;
	sqe.addr = addr;
	sqe.length = length;
	sqe.off = offs;
};
// Sets the user data field of an [[sqe]]. This is copied to the [[cqe]] and can
// be used to correlate a completion event with the original SQE.
export fn set_user(sqe: *sqe, user_data: *void) void = {
export fn sqe_set_data(sqe: *sqe, user_data: *opaque) void = {
static assert(size(uintptr) <= size(u64)); sqe.user_data = user_data: uintptr: u64; }; // Sets the BUFFER_SELECT flag and sets the desired buffer group. See // [[provide_buffers]] for configuring buffer groups, and [[get_buffer_id]] to // retrieve the buffer used from the corresponding [[cqe]].
export fn set_buffer_select(sqe: *sqe, group: u16) void = {
	sqe.flags |= flags::BUFFER_SELECT;
export fn sqe_set_buffer_select(sqe: *sqe, group: u16) void = {
	sqe.flags |= sqe_flags::BUFFER_SELECT;
sqe.buf_group = group; }; // Prepares a no-op "operation" for an [[sqe]].
export fn nop(sqe: *sqe, flags: flags...) void = {
	prep(sqe, op::NOP, flags...);
export fn op_nop(sqe: *sqe, flags: sqe_flags...) void = {
	prep(sqe, sqe_op::NOP, flags...);
}; // Prepares a vectored read operation for an [[sqe]].
export fn readv(
export fn op_readv(
sqe: *sqe, fd: int, iov: []rt::iovec, offs: size,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::READV, fd,
preprw(sqe, sqe_op::READV, fd,
iov: *[*]rt::iovec, len(iov): uint, offs, flags...); }; // Prepares a vectored write operation for an [[sqe]].
export fn writev(
export fn op_writev(
sqe: *sqe, fd: int, iov: []rt::iovec, offs: size,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::WRITEV, fd,
preprw(sqe, sqe_op::WRITEV, fd,
iov: *[*]rt::iovec, len(iov): uint, offs, flags...); }; // Prepares a read operation for an [[sqe]].
export fn read(
export fn op_read(
sqe: *sqe, fd: int,
buf: *void,
buf: *opaque,
count: size, offs: u64,
flags: flags...
flags: sqe_flags...
) void = {
	assert(count <= types::U32_MAX);
preprw(sqe, op::READ, fd, buf, count: u32, offs, flags...);
preprw(sqe, sqe_op::READ, fd, buf, count: u32, offs, flags...);
}; // Prepares a write operation for an [[sqe]].
export fn write(
export fn op_write(
sqe: *sqe, fd: int,
buf: *void,
buf: *opaque,
count: size, offs: u64,
flags: flags...
flags: sqe_flags...
) void = {
	assert(count <= types::U32_MAX);
preprw(sqe, op::WRITE, fd, buf, count: u32, offs, flags...);
preprw(sqe, sqe_op::WRITE, fd, buf, count: u32, offs, flags...);
}; // Prepares a read for a fixed buffer previously registered with // [[register_buffers]]. The buf and count parameters must refer to an address // which falls within the buffer referenced by the index parameter.
export fn read_fixed(
export fn op_read_fixed(
sqe: *sqe, fd: int,
buf: *void,
buf: *opaque,
count: size, index: u16,
flags: flags...
flags: sqe_flags...
) void = {
	assert(count <= types::U32_MAX);
preprw(sqe, op::READ_FIXED, fd, buf, count: u32, 0, flags...);
preprw(sqe, sqe_op::READ_FIXED, fd, buf, count: u32, 0, flags...);
sqe.buf_index = index; }; // Prepares a write for a fixed buffer previously registered with // [[register_buffers]]. The buf and count parameters must refer to an address // which falls within the buffer referenced by the index parameter.
export fn write_fixed(
export fn op_write_fixed(
sqe: *sqe, fd: int,
buf: *void,
buf: *opaque,
count: size, index: u16,
flags: flags...
flags: sqe_flags...
) void = {
	assert(count <= types::U32_MAX);
preprw(sqe, op::WRITE_FIXED, fd, buf, count: u32, 0, flags...);
preprw(sqe, sqe_op::WRITE_FIXED, fd, buf, count: u32, 0, flags...);
sqe.buf_index = index; }; // Prepares an fsync operation for an [[sqe]]. Note that operations are executed // in parallel and not are completed in submission order, so an fsync submitted // after a write may not cause the write to be accounted for by the fsync unless // [[flags::IO_LINK]] is used.
export fn fsync(
export fn op_fsync(
sqe: *sqe, fd: int,
fsync_flags: fsync_flags, flags: flags...
fsync_flags: op_fsync_flags, flags: sqe_flags...
) void = {
preprw(sqe, op::FSYNC, fd, null, 0, 0, flags...);
preprw(sqe, sqe_op::FSYNC, fd, null, 0, 0, flags...);
sqe.fsync_flags = fsync_flags; }; // Adds a request to poll a file descriptor for the given set of poll events. // This will only happen once, the poll request must be submitted with a new SQE
// to re-poll the file descriptor later. The caller must call [[set_user]] to
// to re-poll the file descriptor later. The caller must call [[sqe_set_data]] to
// provide a user data field in order to use [[poll_remove]] to remove this poll // request later.
export fn poll_add(
export fn op_poll_add(
sqe: *sqe, fd: int, poll_mask: uint,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::POLL_ADD, fd, null, 0, 0, flags...);
preprw(sqe, sqe_op::POLL_ADD, fd, null, 0, 0, flags...);
assert(endian::host == &endian::little); // TODO? sqe.poll32_events = poll_mask: u32; }; // Removes an existing poll request by matching the SQE's user_data field. See
// [[set_user]].
export fn poll_remove(sqe: *sqe, user_data: *void, flags: flags...) void = {
	preprw(sqe, op::POLL_REMOVE, -1, null, 0, 0, flags...);
	set_user(sqe, user_data);
// [[sqe_set_data]].
export fn op_poll_remove(sqe: *sqe, user_data: *opaque, flags: sqe_flags...) void = {
	preprw(sqe, sqe_op::POLL_REMOVE, -1, null, 0, 0, flags...);
	sqe_set_data(sqe, user_data);
}; // Prepares a sendmsg operation for an [[sqe]], equivalent to the sendmsg(2) // system call.
export fn sendmsg(
export fn op_sendmsg(
sqe: *sqe, fd: int, msghdr: *rt::msghdr, sendmsg_flags: int,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::SENDMSG, fd, msghdr, 0, 0, flags...);
preprw(sqe, sqe_op::SENDMSG, fd, msghdr, 0, 0, flags...);
sqe.msg_flags = sendmsg_flags; }; // Prepares a recvmsg operation for an [[sqe]], equivalent to the recvmsg(2) // system call.
export fn recvmsg(
export fn op_recvmsg(
sqe: *sqe, fd: int, msghdr: *rt::msghdr, recvmsg_flags: int,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::RECVMSG, fd, msghdr, 0, 0, flags...);
preprw(sqe, sqe_op::RECVMSG, fd, msghdr, 0, 0, flags...);
sqe.msg_flags = recvmsg_flags; }; // Prepares a send operation for an [[sqe]], equivalent to the send(2) system // call.
export fn send(
export fn op_send(
sqe: *sqe, fd: int,
buf: *void,
buf: *opaque,
count: size, send_flags: int,
flags: flags...
flags: sqe_flags...
) void = {
	assert(count <= types::U32_MAX);
preprw(sqe, op::SEND, fd, buf, count: u32, 0, flags...);
preprw(sqe, sqe_op::SEND, fd, buf, count: u32, 0, flags...);
sqe.msg_flags = send_flags; }; // Prepares a recv operation for an [[sqe]], equivalent to the recv(2) system // call.
export fn recv(
export fn op_recv(
sqe: *sqe, fd: int,
buf: *void,
buf: *opaque,
count: size, recv_flags: int,
flags: flags...
flags: sqe_flags...
) void = {
	assert(count <= types::U32_MAX);
preprw(sqe, op::RECV, fd, buf, count: u32, 0, flags...);
preprw(sqe, sqe_op::RECV, fd, buf, count: u32, 0, flags...);
sqe.msg_flags = recv_flags; }; // Prepares a timeout operation for an [[sqe]]. "ts" should be a timespec // describing the desired timeout, and "events" may optionally be used to define // a number of completion events to wake after (or zero to wake only after the
// timeout expires). The caller must call [[set_user]] to provide a user data
// timeout expires). The caller must call [[sqe_set_data]] to provide a user data
// field in order to use [[timeout_remove]] to cancel this timeout later.
export fn timeout(
export fn op_timeout(
sqe: *sqe, ts: *rt::timespec, events: uint,
to_flags: timeout_flags, flags: flags...
to_flags: op_timeout_flags, flags: sqe_flags...
) void = {
preprw(sqe, op::TIMEOUT, 0, ts, 1, events, flags...);
preprw(sqe, sqe_op::TIMEOUT, 0, ts, 1, events, flags...);
sqe.timeout_flags = to_flags; }; // Removes an existing timeout request by matching the SQE's user_data field.
// See [[set_user]]. export fn timeout_remove(
// See [[sqe_set_data]]. export fn op_timeout_remove(
sqe: *sqe,
user_data: *void, to_flags: timeout_flags, flags: flags...
user_data: *opaque, to_flags: op_timeout_flags, flags: sqe_flags...
) void = {
preprw(sqe, op::TIMEOUT_REMOVE, 0, user_data, 0, 0, flags...);
preprw(sqe, sqe_op::TIMEOUT_REMOVE, 0, user_data, 0, 0, flags...);
sqe.timeout_flags = to_flags; }; // Updates an existing timeout request by matching the SQE's user_data field.
// See [[set_user]]. export fn timeout_update(
// See [[sqe_set_data]]. export fn op_timeout_update(
sqe: *sqe,
user_data: *void,
user_data: *opaque,
ts: *rt::timespec, events: uint,
to_flags: timeout_flags, flags: flags...
to_flags: op_timeout_flags, flags: sqe_flags...
) void = {
preprw(sqe, op::TIMEOUT_REMOVE, 0, user_data, 0, events, flags...); sqe.timeout_flags = to_flags | timeout_flags::UPDATE;
preprw(sqe, sqe_op::TIMEOUT_REMOVE, 0, user_data, 0, events, flags...); sqe.timeout_flags = to_flags | op_timeout_flags::UPDATE;
sqe.addr2 = ts; }; // Prepares a timeout operation for an [[sqe]] which is linked to the previous // SQE, effectively setting an upper limit on how long that SQE can take to // complete. "ts" should be a timespec describing the desired timeout. The
// caller must call [[set_user]] to provide a user data field in order to use
// caller must call [[sqe_set_data]] to provide a user data field in order to use
// [[timeout_remove]] to cancel this timeout later.
export fn link_timeout(
export fn op_link_timeout(
sqe: *sqe, ts: *rt::timespec,
to_flags: timeout_flags, flags: flags...
to_flags: op_timeout_flags, flags: sqe_flags...
) void = {
preprw(sqe, op::LINK_TIMEOUT, 0, ts, 1, 0, flags...);
preprw(sqe, sqe_op::LINK_TIMEOUT, 0, ts, 1, 0, flags...);
sqe.timeout_flags = to_flags; }; // Prepares a socket accept operation for an [[sqe]]. Equivalent to accept4(2).
export fn accept(
export fn op_accept(
sqe: *sqe, fd: int, addr: nullable *rt::sockaddr, addrlen: nullable *uint, aflags: uint,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::ACCEPT, fd, addr, 0, 0, flags...);
preprw(sqe, sqe_op::ACCEPT, fd, addr, 0, 0, flags...);
sqe.accept_flags = aflags; sqe.addr2 = addrlen; }; // Prepares an [[sqe]] operation which opens a file. The path must be a C // string, i.e. NUL terminated; see [[strings::to_c]].
export fn openat(
export fn op_openat(
sqe: *sqe, dirfd: int,
path: *const char,
path: *const c::char,
oflags: int, mode: uint,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::OPENAT, dirfd, path, mode, 0, flags...);
preprw(sqe, sqe_op::OPENAT, dirfd, path, mode, 0, flags...);
sqe.open_flags = oflags: u32; }; // Prepares an [[sqe]] operation which closes a file descriptor.
export fn close(sqe: *sqe, fd: int, flags: flags...) void = {
	preprw(sqe, op::CLOSE, fd, null, 0, 0, flags...);
export fn op_close(sqe: *sqe, fd: int, flags: sqe_flags...) void = {
	preprw(sqe, sqe_op::CLOSE, fd, null, 0, 0, flags...);
}; // Prepares an [[sqe]] operation which provides a buffer pool to the kernel. // len(pool) must be equal to nbuf * bufsz. See [[set_buffer_select]] to use // the buffer pool for a subsequent request.
export fn provide_buffers(
export fn op_provide_buffers(
sqe: *sqe, group: u16, pool: []u8, nbuf: size, bufsz: size, bufid: u16,
flags: flags...
flags: sqe_flags...
) void = {
	assert(len(pool) == nbuf * bufsz);
preprw(sqe, op::PROVIDE_BUFFERS, nbuf: int, pool: *[*]u8,
preprw(sqe, sqe_op::PROVIDE_BUFFERS, nbuf: int, pool: *[*]u8,
bufsz: uint, bufid: uint, flags...); sqe.buf_group = group; }; // Removes buffers previously registered with [[provide_buffers]].
export fn remove_buffers(
export fn op_remove_buffers(
sqe: *sqe, nbuf: size, group: u16,
flags: flags...
flags: sqe_flags...
) void = {
preprw(sqe, op::REMOVE_BUFFERS, nbuf: int, null, 0, 0, flags...);
preprw(sqe, sqe_op::REMOVE_BUFFERS, nbuf: int, null, 0, 0, flags...);
sqe.buf_group = group; };
// License: MPL-2.0
// (c) 2021 Alexey Yerin <yyp@disroot.org>
// (c) 2021 Drew DeVault <sir@cmpwn.com>
// (c) 2021-2022 Eyal Sawady <ecs@d2evs.net>
use errors;
// Returned when buffer pool use was configured for an [[sqe]], but there are no
// buffers available.
export type nobuffers = !void;
// All errors which may be returned by this module.
export type error = !(errors::error | nobuffers);
// Converts an [[error]] into a human-readable string.
export fn strerror(err: error) const str = {
	match (err) {
	case nobuffers =>
		return "Buffer pool exhausted";
	case let err: errors::error =>
		return errors::strerror(err);
	};
};
// The maximum value for the first parameter of [[setup]].
// The maximum value for the first parameter of [[queue_init_params]].
export def MAX_ENTRIES: uint = 4096; def CQE_BUFFER_SHIFT: u32 = 16; def OFF_SQ_RING: u64 = 0; def OFF_CQ_RING: u64 = 0x8000000; def OFF_SQES: u64 = 0x10000000; // An io_uring [[sqe]] operation.
export type op = enum u8 {
export type sqe_op = enum u8 {
NOP, READV, WRITEV, FSYNC, READ_FIXED, WRITE_FIXED, POLL_ADD, POLL_REMOVE, SYNC_FILE_RANGE, SENDMSG, RECVMSG, TIMEOUT, TIMEOUT_REMOVE, ACCEPT, ASYNC_CANCEL, LINK_TIMEOUT, CONNECT, FALLOCATE, OPENAT, CLOSE, FILES_UPDATE, STATX, READ, WRITE, FADVISE, MADVISE, SEND, RECV, OPENAT2, EPOLL_CTL, SPLICE, PROVIDE_BUFFERS, REMOVE_BUFFERS, TEE, }; // Flags for an [[sqe]].
export type flags = enum u8 {
export type sqe_flags = enum u8 {
NONE = 0, // Use fixed fileset FIXED_FILE = 1 << 0, // Issue after inflight IO IO_DRAIN = 1 << 1, // Links next sqe IO_LINK = 1 << 2, // Like LINK, but stronger IO_HARDLINK = 1 << 3, // Always go async ASYNC = 1 << 4, // Select buffer from sqe.buf_group BUFFER_SELECT = 1 << 5, }; // Flags for an fsync operation.
export type fsync_flags = enum u32 {
export type op_fsync_flags = enum u32 {
NONE = 0, DATASYNC = 1 << 0, }; // Flags for a timeout operation.
export type timeout_flags = enum u32 {
export type op_timeout_flags = enum u32 {
NONE = 0, // If set, the timeout will be "absolute", waiting until CLOCK_MONOTONIC // reaches the time defined by the timespec. If unset, it will be // interpted as a duration relative to the I/O submission. ABS = 1 << 0, // When combined with [[op::TIMEOUT_REMOVE]], causes the submission to // update the timer rather than remove it. UPDATE = 1 << 1, }; // Flags for a splice operation.
export type splice_flags = enum u32 {
export type op_splice_flags = enum u32 {
	NONE = 0,
	F_FD_IN_FIXED = 1 << 31,
};
// Flags for a [[cqe]].
export type cqe_flags = enum u32 {
	NONE = 0,
	F_BUFFER = 1 << 0,
	F_MORE = 1 << 1,
};
// A submission queue entry.
export type sqe = struct {
opcode: op, flags: flags,
opcode: sqe_op, flags: sqe_flags,
	ioprio: u16,
	fd: i32,
	union {
		off: u64,
addr2: nullable *void,
addr2: nullable *opaque,
	},
	union {
addr: nullable *void,
addr: nullable *opaque,
		splice_off_in: u64,
	},
	length: u32,
	union {
		rw_flags: int,
fsync_flags: fsync_flags,
fsync_flags: op_fsync_flags,
poll_events: u16, poll32_events: u32, sync_range_flags: u32, msg_flags: int,
timeout_flags: timeout_flags,
timeout_flags: op_timeout_flags,
accept_flags: u32, cancel_flags: u32, open_flags: u32, statx_flags: u32, fadvise_advice: u32,
splice_flags: splice_flags,
splice_flags: op_splice_flags,
	},
	user_data: u64,
	union {
		struct {
			union {
				buf_index: u16,
				buf_group: u16,
			},
			personality: u16,
			splice_fd_in: i32,
		},
		pad2: [3]u64,
	},
};
// A completion queue entry.
export type cqe = struct {
	user_data: u64,
// Consider using [[cqe_result]] instead.
res: i32,
flags: cqe_flags, }; // Filled with the offset for mmap(2)
export type sqring_offsets = struct {
export type sq_offsets = struct {
head: u32, tail: u32, ring_mask: u32, ring_entries: u32, flags: u32, dropped: u32, array: u32, resv1: u32, resv2: u64, }; // Flags for the sq ring.
export type sqring_flags = enum u32 {
export type sq_flags = enum u32 {
NONE = 0, // Needs io_uring_enter wakeup NEED_WAKEUP = 1 << 0, // CQ ring is overflown CQ_OVERFLOW = 1 << 1, }; // Filled with the offset for mmap(2)
export type cqring_offsets = struct {
export type cq_offsets = struct {
head: u32, tail: u32, ring_mask: u32, ring_entries: u32, overflow: u32, cqes: u32, flags: u32, resv1: u32, resv2: u64, }; // Flags for the cq ring.
export type cqring_flags = enum u32 {
export type cq_flags = enum u32 {
NONE = 0, EVENTFD_DISABLED = 1 << 0, }; // Flags for setup operation.
export type setup_flags = enum u32 {
export type ring_setup_flags = enum u32 {
NONE = 0, // io_context is polled IOPOLL = 1 << 0, // SQ poll thread SQPOLL = 1 << 1, // sq_thread_cpu is valid SQ_AFF = 1 << 2, // App defines CQ size CQSIZE = 1 << 3, // Clamp SQ/CQ ring sizes CLAMP = 1 << 4, // Attach to existing wq ATTACH_WQ = 1 << 5, // Start with ring disabled R_DISABLED = 1 << 6, }; // Parameters for [[setup]]. Partially completed by the kernel.
export type params = struct {
export type ring_params = struct {
sq_entries: u32, cq_entries: u32,
flags: setup_flags,
flags: ring_setup_flags,
sq_thread_cpu: u32, sq_thread_idle: u32,
features: features,
features: ring_features,
wq_fd: u32, resv: [3]u32,
sq_off: sqring_offsets, cq_off: cqring_offsets,
sq_off: sq_offsets, cq_off: cq_offsets,
}; // Features supported by the kernel.
export type features = enum u32 {
export type ring_features = enum u32 {
NONE = 0, SINGLE_MMAP = 1 << 0, NODROP = 1 << 1, SUBMIT_STABLE = 1 << 2, RW_CUR_POS = 1 << 3, CUR_PERSONALITY = 1 << 4, FAST_POLL = 1 << 5, POLL_32BITS = 1 << 6, }; // Flags for enter operation.
export type enter_flags = enum uint {
type enter_flags = enum uint {
NONE = 0, GETEVENTS = 1 << 0, SQ_WAKEUP = 1 << 1, SQ_WAIT = 1 << 2, }; // Register operations.
export type regop = enum uint {
export type ring_register_op = enum uint {
REGISTER_BUFFERS, UNREGISTER_BUFFERS, REGISTER_FILES, UNREGISTER_FILES, REGISTER_EVENTFD, UNREGISTER_EVENTFD, REGISTER_FILES_UPDATE, REGISTER_EVENTFD_ASYNC, REGISTER_PROBE, REGISTER_PERSONALITY, UNREGISTER_PERSONALITY, REGISTER_RESTRICTIONS, REGISTER_ENABLE_RINGS, }; // Information for a REGISTER_FILES_UPDATE operation.
export type files_update = struct {
export type ring_files_update = struct {
offs: u32, resv: u32, fds: *int, };
// Flags for a probe operation.
export type op_flags = enum u16 {
	NONE = 0,
	SUPPORTED = 1 << 0,
};
// REGISTER_PROBE operation details.
export type probe_op = struct {
	op: u8,
	resv: u8,
	flags: op_flags,
	resv2: u32,
};
// Summary of REGISTER_PROBE results.
export type probe = struct {
	last_op: u8,
	ops_len: u8,
	resv: u16,
	resv2: [3]u32,
	ops: [*]probe_op,
};
// // Flags for a probe operation.
// export type op_probe_flags = enum u16 {
// 	NONE = 0,
// 	SUPPORTED = 1 << 0,
// };
// 
// // REGISTER_PROBE operation details.
// export type probe_op = struct {
// 	op: u8,
// 	resv: u8,
// 	flags: op_probe_flags,
// 	resv2: u32,
// };
// 
// // Summary of REGISTER_PROBE results.
// export type probe = struct {
// 	last_op: u8,
// 	ops_len: u8,
// 	resv: u16,
// 	resv2: [3]u32,
// 	ops: [*]probe_op,
// };
// Details for a REGISTER_RESTRICTIONS operation.
export type restriction = struct {
	opcode: resop,
export type ring_register_restriction_details = struct {
	opcode: ring_register_restriction_op,
	union {
register_op: regop, sqe_op: op, flags: flags,
register_op: ring_register_op, sqe_op: sqe_op, flags: sqe_flags,
}, resv: u8, resv2: [3]u32, }; // Opcode for a [[restriction]].
export type resop = enum u16 {
export type ring_register_restriction_op = enum u16 {
	NONE = 0,
	// Allow an io_uring_register(2) opcode
	REGISTER_OP = 0,
	// Allow an sqe opcode
	SQE_OP = 1,
	// Allow sqe flags
	SQE_FLAGS_ALLOWED = 2,
	// Require sqe flags (these flags must be set on each submission)
	SQE_FLAGS_REQUIRED = 3,
};
// State for an io_uring.
export type io_uring = struct {
	sq: sq,
	cq: cq,
	fd: int,
flags: setup_flags, features: features,
flags: ring_setup_flags, features: ring_features,
};
// Submission queue state.
export type sq = struct {
	khead: *uint,
	ktail: *uint,
	kring_mask: *uint,
	kring_entries: *uint,
kflags: *sqring_flags,
kflags: *sq_flags,
kdropped: *uint, array: *[*]uint, sqes: *[*]sqe, sqe_head: uint, sqe_tail: uint, ring_sz: size,
ring_ptr: *void,
ring_ptr: *opaque,
};
// Completion queue state.
export type cq = struct {
	khead: *uint,
	ktail: *uint,
	kring_mask: *uint,
	kring_entries: *uint,
kflags: *cqring_flags,
kflags: *cq_flags,
koverflow: *uint, cqes: *[*]cqe, ring_sz: size,
ring_ptr: *void,
ring_ptr: *opaque,
};