Lindenii Project Forge
Login

hare-aio

Asynchronous I/O event loops for Hare

Warning: Due to various recent migrations, viewing non-HEAD refs may be broken.

/linux/io_uring/uring.ha (raw)

// SPDX-License-Identifier: MPL-2.0
// (c) 2021 Alexey Yerin <yyp@disroot.org>
// (c) 2021 Drew DeVault <sir@cmpwn.com>
// (c) 2021-2022 Eyal Sawady <ecs@d2evs.net>
// (c) 2025 Runxi Yu <me@runxiyu.org>
use errors;

// Returned when buffer pool use was configured for an [[sqe]], but there are no
// buffers available.
export type nobuffers = !void;

// All errors which may be returned by this module.
export type error = !(errors::error | nobuffers);

// Converts an [[error]] into a human-readable string.
export fn strerror(err: error) const str = {
	match (err) {
	case nobuffers =>
		return "Buffer pool exhausted";
	case let err: errors::error =>
		return errors::strerror(err);
	};
};

// The maximum value for the first parameter of [[ring_init]].
export def MAX_ENTRIES: uint = 4096;

def CQE_BUFFER_SHIFT: u32 = 16;
def OFF_SQ_RING: u64 = 0;
def OFF_CQ_RING: u64 = 0x8000000;
def OFF_SQES: u64 = 0x10000000;

// An io_uring [[sqe]] operation.
export type sqe_op = enum u8 {
	NOP,
	READV,
	WRITEV,
	FSYNC,
	READ_FIXED,
	WRITE_FIXED,
	POLL_ADD,
	POLL_REMOVE,
	SYNC_FILE_RANGE,
	SENDMSG,
	RECVMSG,
	TIMEOUT,
	TIMEOUT_REMOVE,
	ACCEPT,
	ASYNC_CANCEL,
	LINK_TIMEOUT,
	CONNECT,
	FALLOCATE,
	OPENAT,
	CLOSE,
	FILES_UPDATE,
	STATX,
	READ,
	WRITE,
	FADVISE,
	MADVISE,
	SEND,
	RECV,
	OPENAT2,
	EPOLL_CTL,
	SPLICE,
	PROVIDE_BUFFERS,
	REMOVE_BUFFERS,
	TEE,
};

// Flags for an [[sqe]].
export type sqe_flags = enum u8 {
	NONE = 0,
	// Use fixed fileset
	FIXED_FILE = 1 << 0,
	// Issue after inflight IO
	IO_DRAIN = 1 << 1,
	// Links next sqe
	IO_LINK = 1 << 2,
	// Like LINK, but stronger
	IO_HARDLINK = 1 << 3,
	// Always go async
	ASYNC = 1 << 4,
	// Select buffer from sqe.buf_group
	BUFFER_SELECT = 1 << 5,
};

// Flags for an fsync operation.
export type op_fsync_flags = enum u32 {
	NONE = 0,
	DATASYNC = 1 << 0,
};

// Flags for a timeout operation.
export type op_timeout_flags = enum u32 {
	NONE = 0,
	// If set, the timeout will be "absolute", waiting until CLOCK_MONOTONIC
	// reaches the time defined by the timespec. If unset, it will be
	// interpted as a duration relative to the I/O submission.
	ABS = 1 << 0,
	// When combined with [[sqe_op::TIMEOUT_REMOVE]], causes the submission to
	// update the timer rather than remove it.
	UPDATE = 1 << 1,
	// Use CLOCK_BOOTTIME instead of CLOCK_MONOTONIC when ABS is set.
	// Does not make sense when ABS is unset.
	BOOTTIME = 1 << 2,
	// Use CLOCK_REALTIME instead of CLOCK_MONOTONIC when ABS is set.
	// Does not make sense when ABS is unset.
	REALTIME = 1 << 3,
	// Targets a linked timeout when used with [[op_timeout_flags::UPDATE]].
	LINK_TIMEOUT_UPDATE = 1 << 4,
	// Treat -ETIME completions as success.
	ETIME_SUCCESS = 1 << 5,
	// Make the timeout generate multiple completions until cancelled.
	MULTISHOT = 1 << 6,
};

// Flags for a splice operation.
export type op_splice_flags = enum u32 {
	NONE = 0,
	F_FD_IN_FIXED = 1 << 31,
};

// Flags for a [[cqe]].
export type cqe_flags = enum u32 {
	NONE = 0,
	F_BUFFER = 1 << 0,
	F_MORE = 1 << 1,
};

// A submission queue entry.
export type sqe = struct {
	opcode: sqe_op,
	flags: sqe_flags,
	ioprio: u16,
	fd: i32,
	union {
		off: u64,
		addr2: nullable *opaque,
	},
	union {
		addr: nullable *opaque,
		splice_off_in: u64,
	},
	length: u32,
	union {
		rw_flags: int,
		fsync_flags: op_fsync_flags,
		poll_events: u16,
		poll32_events: u32,
		sync_range_flags: u32,
		msg_flags: int,
		timeout_flags: op_timeout_flags,
		accept_flags: u32,
		cancel_flags: u32,
		open_flags: u32,
		statx_flags: u32,
		fadvise_advice: u32,
		splice_flags: op_splice_flags,
	},
	user_data: u64,
	union {
		struct {
			union {
				buf_index: u16,
				buf_group: u16,
			},
			personality: u16,
			splice_fd_in: i32,
		},
		pad2: [3]u64,
	},
};

// A completion queue entry.
export type cqe = struct {
	user_data: u64,

	// Consider using [[cqe_result]] instead.
	res: i32,

	flags: cqe_flags,
};

// Filled with the offset for mmap(2)
export type sq_offsets = struct {
	head: u32,
	tail: u32,
	ring_mask: u32,
	ring_entries: u32,
	flags: u32,
	dropped: u32,
	array: u32,
	resv1: u32,
	resv2: u64,
};

// Flags for the sq ring.
export type sq_flags = enum u32 {
	NONE = 0,
	// Needs io_uring_enter wakeup
	NEED_WAKEUP = 1 << 0,
	// CQ ring is overflown
	CQ_OVERFLOW = 1 << 1,
};

// Filled with the offset for mmap(2)
export type cq_offsets = struct {
	head: u32,
	tail: u32,
	ring_mask: u32,
	ring_entries: u32,
	overflow: u32,
	cqes: u32,
	flags: u32,
	resv1: u32,
	resv2: u64,
};

// Flags for the cq ring.
export type cq_flags = enum u32 {
	NONE = 0,
	EVENTFD_DISABLED = 1 << 0,
};

// Flags for setup operation.
export type ring_setup_flags = enum u32 {
	NONE = 0,
	// io_context is polled
	IOPOLL = 1 << 0,
	// SQ poll thread
	SQPOLL = 1 << 1,
	// sq_thread_cpu is valid
	SQ_AFF = 1 << 2,
	// App defines CQ size
	CQSIZE = 1 << 3,
	// Clamp SQ/CQ ring sizes
	CLAMP = 1 << 4,
	// Attach to existing wq
	ATTACH_WQ = 1 << 5,
	// Start with ring disabled
	R_DISABLED = 1 << 6,
};

// Parameters for [[ring_init]]. Partially completed by the kernel.
export type ring_params = struct {
	sq_entries: u32,
	cq_entries: u32,
	flags: ring_setup_flags,
	sq_thread_cpu: u32,
	sq_thread_idle: u32,
	features: ring_features,
	wq_fd: u32,
	resv: [3]u32,
	sq_off: sq_offsets,
	cq_off: cq_offsets,
};

// Features supported by the kernel.
export type ring_features = enum u32 {
	NONE = 0,
	SINGLE_MMAP = 1 << 0,
	NODROP = 1 << 1,
	SUBMIT_STABLE = 1 << 2,
	RW_CUR_POS = 1 << 3,
	CUR_PERSONALITY = 1 << 4,
	FAST_POLL = 1 << 5,
	POLL_32BITS = 1 << 6,
};

// Flags for enter operation.
type enter_flags = enum uint {
	NONE = 0,
	GETEVENTS = 1 << 0,
	SQ_WAKEUP = 1 << 1,
	SQ_WAIT = 1 << 2,
};

// Register operations.
export type ring_register_op = enum uint {
	REGISTER_BUFFERS,
	UNREGISTER_BUFFERS,
	REGISTER_FILES,
	UNREGISTER_FILES,
	REGISTER_EVENTFD,
	UNREGISTER_EVENTFD,
	REGISTER_FILES_UPDATE,
	REGISTER_EVENTFD_ASYNC,
	REGISTER_PROBE,
	REGISTER_PERSONALITY,
	UNREGISTER_PERSONALITY,
	REGISTER_RESTRICTIONS,
	REGISTER_ENABLE_RINGS,
};

// Information for a REGISTER_FILES_UPDATE operation.
export type ring_files_update = struct {
	offs: u32,
	resv: u32,
	fds: *int,
};

// // Flags for a probe operation.
// export type op_probe_flags = enum u16 {
// 	NONE = 0,
// 	SUPPORTED = 1 << 0,
// };
// 
// // REGISTER_PROBE operation details.
// export type probe_op = struct {
// 	op: u8,
// 	resv: u8,
// 	flags: op_probe_flags,
// 	resv2: u32,
// };
// 
// // Summary of REGISTER_PROBE results.
// export type probe = struct {
// 	last_op: u8,
// 	ops_len: u8,
// 	resv: u16,
// 	resv2: [3]u32,
// 	ops: [*]probe_op,
// };

// Details for a REGISTER_RESTRICTIONS operation.
export type ring_register_restriction_details = struct {
	opcode: ring_register_restriction_op,
	union {
		register_op: ring_register_op,
		sqe_op: sqe_op,
		flags: sqe_flags,
	},
	resv: u8,
	resv2: [3]u32,
};

// Opcode for a [[ring_register_restriction_details]].
export type ring_register_restriction_op = enum u16 {
	NONE = 0,
	// Allow an io_uring_register(2) opcode
	REGISTER_OP = 0,
	// Allow an sqe opcode
	SQE_OP = 1,
	// Allow sqe flags
	SQE_FLAGS_ALLOWED = 2,
	// Require sqe flags (these flags must be set on each submission)
	SQE_FLAGS_REQUIRED = 3,
};

// State for an io_uring.
export type io_uring = struct {
	sq: sq,
	cq: cq,
	fd: int,
	flags: ring_setup_flags,
	features: ring_features,
};

// Submission queue state.
export type sq = struct {
	khead: *uint,
	ktail: *uint,
	kring_mask: *uint,
	kring_entries: *uint,
	kflags: *sq_flags,
	kdropped: *uint,
	array: *[*]uint,
	sqes: *[*]sqe,
	sqe_head: uint,
	sqe_tail: uint,
	ring_sz: size,
	ring_ptr: *opaque,
};

// Completion queue state.
export type cq = struct {
	khead: *uint,
	ktail: *uint,
	kring_mask: *uint,
	kring_entries: *uint,
	kflags: *cq_flags,
	koverflow: *uint,
	cqes: *[*]cqe,
	ring_sz: size,
	ring_ptr: *opaque,
};