zig/lib/std / os/linux/bpf.zig

const std = @import("../../std.zig");
const errno = getErrno;
const unexpectedErrno = std.os.unexpectedErrno;
const expectEqual = std.testing.expectEqual;
const expectError = std.testing.expectError;
const expect = std.testing.expect;

const linux = std.os.linux;
const fd_t = linux.fd_t;
const pid_t = linux.pid_t;
const getErrno = linux.getErrno;

btf

bpf/btf.zig
pub const btf = @import("bpf/btf.zig");

kern

bpf/kern.zig
pub const kern = @import("bpf/kern.zig");

// instruction classes

LD

pub const LD = 0x00;

LDX

pub const LDX = 0x01;

ST

pub const ST = 0x02;

STX

pub const STX = 0x03;

ALU

pub const ALU = 0x04;

JMP

pub const JMP = 0x05;

RET

pub const RET = 0x06;

MISC

pub const MISC = 0x07;

W

32-bit

pub const W = 0x00;

H

16-bit

pub const H = 0x08;

B

8-bit

pub const B = 0x10;

DW

64-bit

pub const DW = 0x18;

IMM

pub const IMM = 0x00;

ABS

pub const ABS = 0x20;

IND

pub const IND = 0x40;

MEM

pub const MEM = 0x60;

LEN

pub const LEN = 0x80;

MSH

pub const MSH = 0xa0;

// alu fields

ADD

pub const ADD = 0x00;

SUB

pub const SUB = 0x10;

MUL

pub const MUL = 0x20;

DIV

pub const DIV = 0x30;

OR

pub const OR = 0x40;

AND

pub const AND = 0x50;

LSH

pub const LSH = 0x60;

RSH

pub const RSH = 0x70;

NEG

pub const NEG = 0x80;

MOD

pub const MOD = 0x90;

XOR

pub const XOR = 0xa0;

// jmp fields

JA

pub const JA = 0x00;

JEQ

pub const JEQ = 0x10;

JGT

pub const JGT = 0x20;

JGE

pub const JGE = 0x30;

JSET

pub const JSET = 0x40;

//#define BPF_SRC(code)   ((code) & 0x08)

K

pub const K = 0x00;

X

pub const X = 0x08;

MAXINSNS

pub const MAXINSNS = 4096;

// instruction classes

JMP32

jmp mode in word width

pub const JMP32 = 0x06;

ALU64

alu mode in double word width

pub const ALU64 = 0x07;

// ld/ldx fields

XADD

exclusive add

pub const XADD = 0xc0;

// alu/jmp fields

MOV

mov reg to reg

pub const MOV = 0xb0;

ARSH

sign extending arithmetic shift right */

pub const ARSH = 0xc0;

// change endianness of a register

END

flags for endianness conversion:

pub const END = 0xd0;

TO_LE

convert to little-endian */

pub const TO_LE = 0x00;

TO_BE

convert to big-endian

pub const TO_BE = 0x08;

FROM_LE

pub const FROM_LE = TO_LE;

FROM_BE

pub const FROM_BE = TO_BE;

// jmp encodings

JNE

jump != *

pub const JNE = 0x50;

JLT

LT is unsigned, '<'

pub const JLT = 0xa0;

JLE

LE is unsigned, '<=' *

pub const JLE = 0xb0;

JSGT

SGT is signed '>', GT in x86

pub const JSGT = 0x60;

JSGE

SGE is signed '>=', GE in x86

pub const JSGE = 0x70;

JSLT

SLT is signed, '<'

pub const JSLT = 0xc0;

JSLE

SLE is signed, '<='

pub const JSLE = 0xd0;

CALL

function call

pub const CALL = 0x80;

EXIT

function return

pub const EXIT = 0x90;

F_ALLOW_OVERRIDE

Flag for prog_attach command. If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program.

pub const F_ALLOW_OVERRIDE = 0x1;

F_ALLOW_MULTI

Flag for prog_attach command. If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup.

pub const F_ALLOW_MULTI = 0x2;

F_REPLACE

Flag for prog_attach command.

pub const F_REPLACE = 0x4;

F_STRICT_ALIGNMENT

If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the verifier will perform strict alignment checking as if the kernel has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, and NET_IP_ALIGN defined to 2.

pub const F_STRICT_ALIGNMENT = 0x1;

F_ANY_ALIGNMENT

If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the verifier will allow any alignment whatsoever. On platforms with strict alignment requirements for loads ands stores (such as sparc and mips) the verifier validates that all loads and stores provably follow this requirement. This flag turns that checking and enforcement off.

It is mostly used for testing when we want to validate the context and memory access aspects of the verifier, but because of an unaligned access the alignment check would trigger before the one we are interested in.

pub const F_ANY_ALIGNMENT = 0x2;

F_TEST_RND_HI32

BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. Verifier does sub-register def/use analysis and identifies instructions whose def only matters for low 32-bit, high 32-bit is never referenced later through implicit zero extension. Therefore verifier notifies JIT back-ends that it is safe to ignore clearing high 32-bit for these instructions. This saves some back-ends a lot of code-gen. However such optimization is not necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends hence hasn't used verifier's analysis result. But, we really want to have a way to be able to verify the correctness of the described optimization on x86_64 on which testsuites are frequently exercised.

So, this flag is introduced. Once it is set, verifier will randomize high 32-bit for those instructions who has been identified as safe to ignore them. Then, if verifier is not doing correct analysis, such randomization will regress tests to expose bugs.

pub const F_TEST_RND_HI32 = 0x4;

F_SLEEPABLE

If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will restrict map and helper usage for such programs. Sleepable BPF programs can only be attached to hooks where kernel execution context allows sleeping. Such programs are allowed to use helpers that may sleep like bpf_copy_from_user().

pub const F_SLEEPABLE = 0x10;

PSEUDO_MAP_FD

When BPF ldimm64's insn[0].src_reg != 0 then this can have two extensions: insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE insn[0].imm: map fd map fd insn[1].imm: 0 offset into value insn[0].off: 0 0 insn[1].off: 0 0 ldimm64 rewrite: address of map address of map[0]+offset verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE

pub const PSEUDO_MAP_FD = 1;

PSEUDO_MAP_VALUE

pub const PSEUDO_MAP_VALUE = 2;

PSEUDO_CALL

when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative offset to another bpf function

pub const PSEUDO_CALL = 1;

ANY

flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing

pub const ANY = 0;

NOEXIST

flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist

pub const NOEXIST = 1;

EXIST

flag for BPF_MAP_UPDATE_ELEM command. update existing element

pub const EXIST = 2;

F_LOCK

flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update

pub const F_LOCK = 4;

BPF_F_NO_PREALLOC

flag for BPF_MAP_CREATE command */

pub const BPF_F_NO_PREALLOC = 0x1;

BPF_F_NO_COMMON_LRU

flag for BPF_MAP_CREATE command. Instead of having one common LRU list in the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can scale and perform better. Note, the LRU nodes (including free nodes) cannot be moved across different LRU lists.

pub const BPF_F_NO_COMMON_LRU = 0x2;

BPF_F_NUMA_NODE

flag for BPF_MAP_CREATE command. Specify numa node during map creation

pub const BPF_F_NUMA_NODE = 0x4;

BPF_F_RDONLY

flag for BPF_MAP_CREATE command. Flags for BPF object read access from syscall side

pub const BPF_F_RDONLY = 0x8;

BPF_F_WRONLY

flag for BPF_MAP_CREATE command. Flags for BPF object write access from syscall side

pub const BPF_F_WRONLY = 0x10;

BPF_F_STACK_BUILD_ID

flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset instead of pointer

pub const BPF_F_STACK_BUILD_ID = 0x20;

BPF_F_ZERO_SEED

flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This should only be used for testing.

pub const BPF_F_ZERO_SEED = 0x40;

BPF_F_RDONLY_PROG

flag for BPF_MAP_CREATE command Flags for accessing BPF object from program side.

pub const BPF_F_RDONLY_PROG = 0x80;

BPF_F_WRONLY_PROG

flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program side.

pub const BPF_F_WRONLY_PROG = 0x100;

BPF_F_CLONE

flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted socket

pub const BPF_F_CLONE = 0x200;

BPF_F_MMAPABLE

flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map

pub const BPF_F_MMAPABLE = 0x400;

Helper

These values correspond to "syscalls" within the BPF program's environment, each one is documented in std.os.linux.BPF.kern

pub const Helper = enum(i32) {
    unspec,
    map_lookup_elem,
    map_update_elem,
    map_delete_elem,
    probe_read,
    ktime_get_ns,
    trace_printk,
    get_prandom_u32,
    get_smp_processor_id,
    skb_store_bytes,
    l3_csum_replace,
    l4_csum_replace,
    tail_call,
    clone_redirect,
    get_current_pid_tgid,
    get_current_uid_gid,
    get_current_comm,
    get_cgroup_classid,
    skb_vlan_push,
    skb_vlan_pop,
    skb_get_tunnel_key,
    skb_set_tunnel_key,
    perf_event_read,
    redirect,
    get_route_realm,
    perf_event_output,
    skb_load_bytes,
    get_stackid,
    csum_diff,
    skb_get_tunnel_opt,
    skb_set_tunnel_opt,
    skb_change_proto,
    skb_change_type,
    skb_under_cgroup,
    get_hash_recalc,
    get_current_task,
    probe_write_user,
    current_task_under_cgroup,
    skb_change_tail,
    skb_pull_data,
    csum_update,
    set_hash_invalid,
    get_numa_node_id,
    skb_change_head,
    xdp_adjust_head,
    probe_read_str,
    get_socket_cookie,
    get_socket_uid,
    set_hash,
    setsockopt,
    skb_adjust_room,
    redirect_map,
    sk_redirect_map,
    sock_map_update,
    xdp_adjust_meta,
    perf_event_read_value,
    perf_prog_read_value,
    getsockopt,
    override_return,
    sock_ops_cb_flags_set,
    msg_redirect_map,
    msg_apply_bytes,
    msg_cork_bytes,
    msg_pull_data,
    bind,
    xdp_adjust_tail,
    skb_get_xfrm_state,
    get_stack,
    skb_load_bytes_relative,
    fib_lookup,
    sock_hash_update,
    msg_redirect_hash,
    sk_redirect_hash,
    lwt_push_encap,
    lwt_seg6_store_bytes,
    lwt_seg6_adjust_srh,
    lwt_seg6_action,
    rc_repeat,
    rc_keydown,
    skb_cgroup_id,
    get_current_cgroup_id,
    get_local_storage,
    sk_select_reuseport,
    skb_ancestor_cgroup_id,
    sk_lookup_tcp,
    sk_lookup_udp,
    sk_release,
    map_push_elem,
    map_pop_elem,
    map_peek_elem,
    msg_push_data,
    msg_pop_data,
    rc_pointer_rel,
    spin_lock,
    spin_unlock,
    sk_fullsock,
    tcp_sock,
    skb_ecn_set_ce,
    get_listener_sock,
    skc_lookup_tcp,
    tcp_check_syncookie,
    sysctl_get_name,
    sysctl_get_current_value,
    sysctl_get_new_value,
    sysctl_set_new_value,
    strtol,
    strtoul,
    sk_storage_get,
    sk_storage_delete,
    send_signal,
    tcp_gen_syncookie,
    skb_output,
    probe_read_user,
    probe_read_kernel,
    probe_read_user_str,
    probe_read_kernel_str,
    tcp_send_ack,
    send_signal_thread,
    jiffies64,
    read_branch_records,
    get_ns_current_pid_tgid,
    xdp_output,
    get_netns_cookie,
    get_current_ancestor_cgroup_id,
    sk_assign,
    ktime_get_boot_ns,
    seq_printf,
    seq_write,
    sk_cgroup_id,
    sk_ancestor_cgroup_id,
    ringbuf_output,
    ringbuf_reserve,
    ringbuf_submit,
    ringbuf_discard,
    ringbuf_query,
    csum_level,
    skc_to_tcp6_sock,
    skc_to_tcp_sock,
    skc_to_tcp_timewait_sock,
    skc_to_tcp_request_sock,
    skc_to_udp6_sock,
    get_task_stack,
    _,
};

// TODO: determine that this is the expected bit layout for both little and big
// endian systems

Insn

a single BPF instruction

pub const Insn = packed struct {
    code: u8,
    dst: u4,
    src: u4,
    off: i16,
    imm: i32,

    pub const Reg = enum(u4) { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 };
    const Source = enum(u1) { reg, imm };

    const Mode = enum(u8) {
        imm = IMM,
        abs = ABS,
        ind = IND,
        mem = MEM,
        len = LEN,
        msh = MSH,
    };

    pub const AluOp = enum(u8) {
        add = ADD,
        sub = SUB,
        mul = MUL,
        div = DIV,
        alu_or = OR,
        alu_and = AND,
        lsh = LSH,
        rsh = RSH,
        neg = NEG,
        mod = MOD,
        xor = XOR,
        mov = MOV,
        arsh = ARSH,
    };

    pub const Size = enum(u8) {
        byte = B,
        half_word = H,
        word = W,
        double_word = DW,
    };

    pub const JmpOp = enum(u8) {
        ja = JA,
        jeq = JEQ,
        jgt = JGT,
        jge = JGE,
        jset = JSET,
        jlt = JLT,
        jle = JLE,
        jne = JNE,
        jsgt = JSGT,
        jsge = JSGE,
        jslt = JSLT,
        jsle = JSLE,
    };

    const ImmOrReg = union(Source) {
        reg: Reg,
        imm: i32,
    };

    fn imm_reg(code: u8, dst: Reg, src: anytype, off: i16) Insn {
        const imm_or_reg = if (@TypeOf(src) == Reg or @typeInfo(@TypeOf(src)) == .EnumLiteral)
            ImmOrReg{ .reg = @as(Reg, src) }
        else
            ImmOrReg{ .imm = src };

        const src_type: u8 = switch (imm_or_reg) {
            .imm => K,
            .reg => X,
        };

        return Insn{
            .code = code | src_type,
            .dst = @intFromEnum(dst),
            .src = switch (imm_or_reg) {
                .imm => 0,
                .reg => |r| @intFromEnum(r),
            },
            .off = off,
            .imm = switch (imm_or_reg) {
                .imm => |i| i,
                .reg => 0,
            },
        };
    }

alu()

r0 - r9 are general purpose 64-bit registers, r10 points to the stack frame

    pub fn alu(comptime width: comptime_int, op: AluOp, dst: Reg, src: anytype) Insn {
        const width_bitfield = switch (width) {
            32 => ALU,
            64 => ALU64,
            else => @compileError("width must be 32 or 64"),
        };

        return imm_reg(width_bitfield | @intFromEnum(op), dst, src, 0);
    }

mov()

    pub fn mov(dst: Reg, src: anytype) Insn {
        return alu(64, .mov, dst, src);
    }

add()

    pub fn add(dst: Reg, src: anytype) Insn {
        return alu(64, .add, dst, src);
    }

sub()

    pub fn sub(dst: Reg, src: anytype) Insn {
        return alu(64, .sub, dst, src);
    }

mul()

    pub fn mul(dst: Reg, src: anytype) Insn {
        return alu(64, .mul, dst, src);
    }

div()

    pub fn div(dst: Reg, src: anytype) Insn {
        return alu(64, .div, dst, src);
    }

alu_or()

    pub fn alu_or(dst: Reg, src: anytype) Insn {
        return alu(64, .alu_or, dst, src);
    }

alu_and()

    pub fn alu_and(dst: Reg, src: anytype) Insn {
        return alu(64, .alu_and, dst, src);
    }

lsh()

    pub fn lsh(dst: Reg, src: anytype) Insn {
        return alu(64, .lsh, dst, src);
    }

rsh()

    pub fn rsh(dst: Reg, src: anytype) Insn {
        return alu(64, .rsh, dst, src);
    }

neg()

    pub fn neg(dst: Reg) Insn {
        return alu(64, .neg, dst, 0);
    }

mod()

    pub fn mod(dst: Reg, src: anytype) Insn {
        return alu(64, .mod, dst, src);
    }

xor()

    pub fn xor(dst: Reg, src: anytype) Insn {
        return alu(64, .xor, dst, src);
    }

arsh()

    pub fn arsh(dst: Reg, src: anytype) Insn {
        return alu(64, .arsh, dst, src);
    }

jmp()

    pub fn jmp(op: JmpOp, dst: Reg, src: anytype, off: i16) Insn {
        return imm_reg(JMP | @intFromEnum(op), dst, src, off);
    }

ja()

    pub fn ja(off: i16) Insn {
        return jmp(.ja, .r0, 0, off);
    }

jeq()

    pub fn jeq(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jeq, dst, src, off);
    }

jgt()

    pub fn jgt(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jgt, dst, src, off);
    }

jge()

    pub fn jge(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jge, dst, src, off);
    }

jlt()

    pub fn jlt(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jlt, dst, src, off);
    }

jle()

    pub fn jle(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jle, dst, src, off);
    }

jset()

    pub fn jset(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jset, dst, src, off);
    }

jne()

    pub fn jne(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jne, dst, src, off);
    }

jsgt()

    pub fn jsgt(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jsgt, dst, src, off);
    }

jsge()

    pub fn jsge(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jsge, dst, src, off);
    }

jslt()

    pub fn jslt(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jslt, dst, src, off);
    }

jsle()

    pub fn jsle(dst: Reg, src: anytype, off: i16) Insn {
        return jmp(.jsle, dst, src, off);
    }

xadd()

    pub fn xadd(dst: Reg, src: Reg) Insn {
        return Insn{
            .code = STX | XADD | DW,
            .dst = @intFromEnum(dst),
            .src = @intFromEnum(src),
            .off = 0,
            .imm = 0,
        };
    }

    fn ld(mode: Mode, size: Size, dst: Reg, src: Reg, imm: i32) Insn {
        return Insn{
            .code = @intFromEnum(mode) | @intFromEnum(size) | LD,
            .dst = @intFromEnum(dst),
            .src = @intFromEnum(src),
            .off = 0,
            .imm = imm,
        };
    }

ld_abs()

    pub fn ld_abs(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
        return ld(.abs, size, dst, src, imm);
    }

ld_ind()

    pub fn ld_ind(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
        return ld(.ind, size, dst, src, imm);
    }

ldx()

    pub fn ldx(size: Size, dst: Reg, src: Reg, off: i16) Insn {
        return Insn{
            .code = MEM | @intFromEnum(size) | LDX,
            .dst = @intFromEnum(dst),
            .src = @intFromEnum(src),
            .off = off,
            .imm = 0,
        };
    }

    fn ld_imm_impl1(dst: Reg, src: Reg, imm: u64) Insn {
        return Insn{
            .code = LD | DW | IMM,
            .dst = @intFromEnum(dst),
            .src = @intFromEnum(src),
            .off = 0,
            .imm = @as(i32, @intCast(@as(u32, @truncate(imm)))),
        };
    }

    fn ld_imm_impl2(imm: u64) Insn {
        return Insn{
            .code = 0,
            .dst = 0,
            .src = 0,
            .off = 0,
            .imm = @as(i32, @intCast(@as(u32, @truncate(imm >> 32)))),
        };
    }

ld_dw1()

    pub fn ld_dw1(dst: Reg, imm: u64) Insn {
        return ld_imm_impl1(dst, .r0, imm);
    }

ld_dw2()

    pub fn ld_dw2(imm: u64) Insn {
        return ld_imm_impl2(imm);
    }

ld_map_fd1()

    pub fn ld_map_fd1(dst: Reg, map_fd: fd_t) Insn {
        return ld_imm_impl1(dst, @as(Reg, @enumFromInt(PSEUDO_MAP_FD)), @as(u64, @intCast(map_fd)));
    }

ld_map_fd2()

    pub fn ld_map_fd2(map_fd: fd_t) Insn {
        return ld_imm_impl2(@as(u64, @intCast(map_fd)));
    }

st()

    pub fn st(size: Size, dst: Reg, off: i16, imm: i32) Insn {
        return Insn{
            .code = MEM | @intFromEnum(size) | ST,
            .dst = @intFromEnum(dst),
            .src = 0,
            .off = off,
            .imm = imm,
        };
    }

stx()

    pub fn stx(size: Size, dst: Reg, off: i16, src: Reg) Insn {
        return Insn{
            .code = MEM | @intFromEnum(size) | STX,
            .dst = @intFromEnum(dst),
            .src = @intFromEnum(src),
            .off = off,
            .imm = 0,
        };
    }

    fn endian_swap(endian: std.builtin.Endian, comptime size: Size, dst: Reg) Insn {
        return Insn{
            .code = switch (endian) {
                .big => 0xdc,
                .little => 0xd4,
            },
            .dst = @intFromEnum(dst),
            .src = 0,
            .off = 0,
            .imm = switch (size) {
                .byte => @compileError("can't swap a single byte"),
                .half_word => 16,
                .word => 32,
                .double_word => 64,
            },
        };
    }

le()

    pub fn le(comptime size: Size, dst: Reg) Insn {
        return endian_swap(.little, size, dst);
    }

be()

    pub fn be(comptime size: Size, dst: Reg) Insn {
        return endian_swap(.big, size, dst);
    }

call()

    pub fn call(helper: Helper) Insn {
        return Insn{
            .code = JMP | CALL,
            .dst = 0,
            .src = 0,
            .off = 0,
            .imm = @intFromEnum(helper),
        };
    }

exit()

exit BPF program

    pub fn exit() Insn {
        return Insn{
            .code = JMP | EXIT,
            .dst = 0,
            .src = 0,
            .off = 0,
            .imm = 0,
        };
    }
};

Test:

insn bitsize

test "insn bitsize" {
    try expectEqual(@bitSizeOf(Insn), 64);
}

fn expect_opcode(code: u8, insn: Insn) !void {
    try expectEqual(code, insn.code);
}

// The opcodes were grabbed from https://github.com/iovisor/bpf-docs/blob/master/eBPF.md

Test:

opcodes

test "opcodes" {
    // instructions that have a name that end with 1 or 2 are consecutive for
    // loading 64-bit immediates (imm is only 32 bits wide)

    // alu instructions
    try expect_opcode(0x07, Insn.add(.r1, 0));
    try expect_opcode(0x0f, Insn.add(.r1, .r2));
    try expect_opcode(0x17, Insn.sub(.r1, 0));
    try expect_opcode(0x1f, Insn.sub(.r1, .r2));
    try expect_opcode(0x27, Insn.mul(.r1, 0));
    try expect_opcode(0x2f, Insn.mul(.r1, .r2));
    try expect_opcode(0x37, Insn.div(.r1, 0));
    try expect_opcode(0x3f, Insn.div(.r1, .r2));
    try expect_opcode(0x47, Insn.alu_or(.r1, 0));
    try expect_opcode(0x4f, Insn.alu_or(.r1, .r2));
    try expect_opcode(0x57, Insn.alu_and(.r1, 0));
    try expect_opcode(0x5f, Insn.alu_and(.r1, .r2));
    try expect_opcode(0x67, Insn.lsh(.r1, 0));
    try expect_opcode(0x6f, Insn.lsh(.r1, .r2));
    try expect_opcode(0x77, Insn.rsh(.r1, 0));
    try expect_opcode(0x7f, Insn.rsh(.r1, .r2));
    try expect_opcode(0x87, Insn.neg(.r1));
    try expect_opcode(0x97, Insn.mod(.r1, 0));
    try expect_opcode(0x9f, Insn.mod(.r1, .r2));
    try expect_opcode(0xa7, Insn.xor(.r1, 0));
    try expect_opcode(0xaf, Insn.xor(.r1, .r2));
    try expect_opcode(0xb7, Insn.mov(.r1, 0));
    try expect_opcode(0xbf, Insn.mov(.r1, .r2));
    try expect_opcode(0xc7, Insn.arsh(.r1, 0));
    try expect_opcode(0xcf, Insn.arsh(.r1, .r2));

    // atomic instructions: might be more of these not documented in the wild
    try expect_opcode(0xdb, Insn.xadd(.r1, .r2));

    // TODO: byteswap instructions
    try expect_opcode(0xd4, Insn.le(.half_word, .r1));
    try expectEqual(@as(i32, @intCast(16)), Insn.le(.half_word, .r1).imm);
    try expect_opcode(0xd4, Insn.le(.word, .r1));
    try expectEqual(@as(i32, @intCast(32)), Insn.le(.word, .r1).imm);
    try expect_opcode(0xd4, Insn.le(.double_word, .r1));
    try expectEqual(@as(i32, @intCast(64)), Insn.le(.double_word, .r1).imm);
    try expect_opcode(0xdc, Insn.be(.half_word, .r1));
    try expectEqual(@as(i32, @intCast(16)), Insn.be(.half_word, .r1).imm);
    try expect_opcode(0xdc, Insn.be(.word, .r1));
    try expectEqual(@as(i32, @intCast(32)), Insn.be(.word, .r1).imm);
    try expect_opcode(0xdc, Insn.be(.double_word, .r1));
    try expectEqual(@as(i32, @intCast(64)), Insn.be(.double_word, .r1).imm);

    // memory instructions
    try expect_opcode(0x18, Insn.ld_dw1(.r1, 0));
    try expect_opcode(0x00, Insn.ld_dw2(0));

    //   loading a map fd
    try expect_opcode(0x18, Insn.ld_map_fd1(.r1, 0));
    try expectEqual(@as(u4, @intCast(PSEUDO_MAP_FD)), Insn.ld_map_fd1(.r1, 0).src);
    try expect_opcode(0x00, Insn.ld_map_fd2(0));

    try expect_opcode(0x38, Insn.ld_abs(.double_word, .r1, .r2, 0));
    try expect_opcode(0x20, Insn.ld_abs(.word, .r1, .r2, 0));
    try expect_opcode(0x28, Insn.ld_abs(.half_word, .r1, .r2, 0));
    try expect_opcode(0x30, Insn.ld_abs(.byte, .r1, .r2, 0));

    try expect_opcode(0x58, Insn.ld_ind(.double_word, .r1, .r2, 0));
    try expect_opcode(0x40, Insn.ld_ind(.word, .r1, .r2, 0));
    try expect_opcode(0x48, Insn.ld_ind(.half_word, .r1, .r2, 0));
    try expect_opcode(0x50, Insn.ld_ind(.byte, .r1, .r2, 0));

    try expect_opcode(0x79, Insn.ldx(.double_word, .r1, .r2, 0));
    try expect_opcode(0x61, Insn.ldx(.word, .r1, .r2, 0));
    try expect_opcode(0x69, Insn.ldx(.half_word, .r1, .r2, 0));
    try expect_opcode(0x71, Insn.ldx(.byte, .r1, .r2, 0));

    try expect_opcode(0x62, Insn.st(.word, .r1, 0, 0));
    try expect_opcode(0x6a, Insn.st(.half_word, .r1, 0, 0));
    try expect_opcode(0x72, Insn.st(.byte, .r1, 0, 0));

    try expect_opcode(0x63, Insn.stx(.word, .r1, 0, .r2));
    try expect_opcode(0x6b, Insn.stx(.half_word, .r1, 0, .r2));
    try expect_opcode(0x73, Insn.stx(.byte, .r1, 0, .r2));
    try expect_opcode(0x7b, Insn.stx(.double_word, .r1, 0, .r2));

    // branch instructions
    try expect_opcode(0x05, Insn.ja(0));
    try expect_opcode(0x15, Insn.jeq(.r1, 0, 0));
    try expect_opcode(0x1d, Insn.jeq(.r1, .r2, 0));
    try expect_opcode(0x25, Insn.jgt(.r1, 0, 0));
    try expect_opcode(0x2d, Insn.jgt(.r1, .r2, 0));
    try expect_opcode(0x35, Insn.jge(.r1, 0, 0));
    try expect_opcode(0x3d, Insn.jge(.r1, .r2, 0));
    try expect_opcode(0xa5, Insn.jlt(.r1, 0, 0));
    try expect_opcode(0xad, Insn.jlt(.r1, .r2, 0));
    try expect_opcode(0xb5, Insn.jle(.r1, 0, 0));
    try expect_opcode(0xbd, Insn.jle(.r1, .r2, 0));
    try expect_opcode(0x45, Insn.jset(.r1, 0, 0));
    try expect_opcode(0x4d, Insn.jset(.r1, .r2, 0));
    try expect_opcode(0x55, Insn.jne(.r1, 0, 0));
    try expect_opcode(0x5d, Insn.jne(.r1, .r2, 0));
    try expect_opcode(0x65, Insn.jsgt(.r1, 0, 0));
    try expect_opcode(0x6d, Insn.jsgt(.r1, .r2, 0));
    try expect_opcode(0x75, Insn.jsge(.r1, 0, 0));
    try expect_opcode(0x7d, Insn.jsge(.r1, .r2, 0));
    try expect_opcode(0xc5, Insn.jslt(.r1, 0, 0));
    try expect_opcode(0xcd, Insn.jslt(.r1, .r2, 0));
    try expect_opcode(0xd5, Insn.jsle(.r1, 0, 0));
    try expect_opcode(0xdd, Insn.jsle(.r1, .r2, 0));
    try expect_opcode(0x85, Insn.call(.unspec));
    try expect_opcode(0x95, Insn.exit());
}

Cmd

pub const Cmd = enum(usize) {
    map_create,

    map_lookup_elem,

    map_update_elem,

    map_delete_elem,

    map_get_next_key,

    prog_load,

    obj_pin,

    obj_get,

    prog_attach,

    prog_detach,

    prog_test_run,

    prog_get_next_id,

    map_get_next_id,

    prog_get_fd_by_id,

    map_get_fd_by_id,

    obj_get_info_by_fd,

    prog_query,

    raw_tracepoint_open,

    btf_load,

    btf_get_fd_by_id,

    task_fd_query,

    map_lookup_and_delete_elem,
    map_freeze,

    btf_get_next_id,

    map_lookup_batch,

    map_lookup_and_delete_batch,

    map_update_batch,

    map_delete_batch,

    link_create,

    link_update,

    link_get_fd_by_id,

    link_get_next_id,

    enable_stats,

    iter_create,
    link_detach,
    _,
};

MapType

Create a map and return a file descriptor that refers to the map. The close-on-exec file descriptor flag is automatically enabled for the new file descriptor.

uses MapCreateAttr Look up an element by key in a specified map and return its value.

uses MapElemAttr Create or update an element (key/value pair) in a specified map.

uses MapElemAttr Look up and delete an element by key in a specified map.

uses MapElemAttr Look up an element by key in a specified map and return the key of the next element. Verify and load an eBPF program, returning a new file descriptor associated with the program. The close-on-exec file descriptor flag is automatically enabled for the new file descriptor.

uses ProgLoadAttr Pin a map or eBPF program to a path within the minimal BPF filesystem

uses ObjAttr Get the file descriptor of a BPF object pinned to a certain path

uses ObjAttr uses ProgAttachAttr uses ProgAttachAttr uses TestRunAttr uses GetIdAttr uses GetIdAttr uses GetIdAttr uses GetIdAttr uses InfoAttr uses QueryAttr uses RawTracepointAttr uses BtfLoadAttr uses GetIdAttr uses TaskFdQueryAttr uses MapElemAttr uses GetIdAttr uses MapBatchAttr uses MapBatchAttr uses MapBatchAttr uses MapBatchAttr uses LinkCreateAttr uses LinkUpdateAttr uses GetIdAttr uses GetIdAttr uses EnableStatsAttr uses IterCreateAttr

pub const MapType = enum(u32) {
    unspec,
    hash,
    array,
    prog_array,
    perf_event_array,
    percpu_hash,
    percpu_array,
    stack_trace,
    cgroup_array,
    lru_hash,
    lru_percpu_hash,
    lpm_trie,
    array_of_maps,
    hash_of_maps,
    devmap,
    sockmap,
    cpumap,
    xskmap,
    sockhash,
    cgroup_storage,
    reuseport_sockarray,
    percpu_cgroup_storage,
    queue,
    stack,
    sk_storage,
    devmap_hash,
    struct_ops,

    ringbuf,

    _,
};

ProgType

An ordered and shared CPU version of perf_event_array. They have similar semantics: - variable length records - no blocking: when full, reservation fails - memory mappable for ease and speed - epoll notifications for new data, but can busy poll

Ringbufs give BPF programs two sets of APIs: - ringbuf_output() allows copy data from one place to a ring buffer, similar to bpf_perf_event_output() - ringbuf_reserve()/ringbuf_commit()/ringbuf_discard() split the process into two steps. First a fixed amount of space is reserved, if that is successful then the program gets a pointer to a chunk of memory and can be submitted with commit() or discarded with discard()

ringbuf_output() will incurr an extra memory copy, but allows to submit records of the length that's not known beforehand, and is an easy replacement for perf_event_outptu().

ringbuf_reserve() avoids the extra memory copy but requires a known size of memory beforehand.

ringbuf_query() allows to query properties of the map, 4 are currently supported: - BPF_RB_AVAIL_DATA: amount of unconsumed data in ringbuf - BPF_RB_RING_SIZE: returns size of ringbuf - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical position of consumer and producer respectively

key size: 0 value size: 0 max entries: size of ringbuf, must be power of 2

pub const ProgType = enum(u32) {
    unspec,

    socket_filter,

    kprobe,

    sched_cls,

    sched_act,

    tracepoint,

    xdp,

    perf_event,

    cgroup_skb,

    cgroup_sock,

    lwt_in,

    lwt_out,

    lwt_xmit,

    sock_ops,

    sk_skb,

    cgroup_device,

    sk_msg,

    raw_tracepoint,

    cgroup_sock_addr,

    lwt_seg6local,

    lirc_mode2,

    sk_reuseport,

    flow_dissector,

    cgroup_sysctl,

    raw_tracepoint_writable,

    cgroup_sockopt,

    tracing,

    struct_ops,

    ext,

    lsm,

    sk_lookup,

    syscall,

    _,
};

AttachType

context type: __sk_buff context type: bpf_user_pt_regs_t context type: __sk_buff context type: __sk_buff context type: u64 context type: xdp_md context type: bpf_perf_event_data context type: __sk_buff context type: bpf_sock context type: __sk_buff context type: __sk_buff context type: __sk_buff context type: bpf_sock_ops context type: __sk_buff context type: bpf_cgroup_dev_ctx context type: sk_msg_md context type: bpf_raw_tracepoint_args context type: bpf_sock_addr context type: __sk_buff context type: u32 context type: sk_reuseport_md context type: __sk_buff context type: bpf_sysctl context type: bpf_raw_tracepoint_args context type: bpf_sockopt context type: void * context type: void * context type: void * context type: void * context type: bpf_sk_lookup context type: void *

pub const AttachType = enum(u32) {
    cgroup_inet_ingress,
    cgroup_inet_egress,
    cgroup_inet_sock_create,
    cgroup_sock_ops,
    sk_skb_stream_parser,
    sk_skb_stream_verdict,
    cgroup_device,
    sk_msg_verdict,
    cgroup_inet4_bind,
    cgroup_inet6_bind,
    cgroup_inet4_connect,
    cgroup_inet6_connect,
    cgroup_inet4_post_bind,
    cgroup_inet6_post_bind,
    cgroup_udp4_sendmsg,
    cgroup_udp6_sendmsg,
    lirc_mode2,
    flow_dissector,
    cgroup_sysctl,
    cgroup_udp4_recvmsg,
    cgroup_udp6_recvmsg,
    cgroup_getsockopt,
    cgroup_setsockopt,
    trace_raw_tp,
    trace_fentry,
    trace_fexit,
    modify_return,
    lsm_mac,
    trace_iter,
    cgroup_inet4_getpeername,
    cgroup_inet6_getpeername,
    cgroup_inet4_getsockname,
    cgroup_inet6_getsockname,
    xdp_devmap,
    cgroup_inet_sock_release,
    xdp_cpumap,
    sk_lookup,
    xdp,
    _,
};

const obj_name_len = 16;

MapCreateAttr

struct used by Cmd.map_create command

pub const MapCreateAttr = extern struct {
    map_type: u32,

    key_size: u32,

    value_size: u32,

    max_entries: u32,

    map_flags: u32,

    inner_map_fd: fd_t,

    numa_node: u32,
    map_name: [obj_name_len]u8,

    map_ifindex: u32,

    btf_fd: fd_t,

    btf_key_type_id: u32,

    bpf_value_type_id: u32,

    btf_vmlinux_value_type_id: u32,
};

MapElemAttr

one of MapType size of key in bytes size of value in bytes max number of entries in a map .map_create related flags fd pointing to the inner map numa node (effective only if MapCreateFlags.numa_node is set) ifindex of netdev to create on fd pointing to a BTF type data BTF type_id of the key BTF type_id of the value BTF type_id of a kernel struct stored as the map value struct used by Cmd.map_*_elem commands

pub const MapElemAttr = extern struct {
    map_fd: fd_t,
    key: u64,
    result: extern union {
        value: u64,
        next_key: u64,
    },
    flags: u64,
};

MapBatchAttr

struct used by Cmd.map_*_batch commands

pub const MapBatchAttr = extern struct {
    in_batch: u64,

    out_batch: u64,
    keys: u64,
    values: u64,

    count: u32,
    map_fd: fd_t,
    elem_flags: u64,
    flags: u64,
};

ProgLoadAttr

start batch, NULL to start from beginning output: next start batch input/output: input: # of key/value elements output: # of filled elements struct used by Cmd.prog_load command

pub const ProgLoadAttr = extern struct {
    prog_type: u32,
    insn_cnt: u32,
    insns: u64,
    license: u64,

    log_level: u32,

    log_size: u32,

    log_buf: u64,

    kern_version: u32,
    prog_flags: u32,
    prog_name: [obj_name_len]u8,

    prog_ifindex: u32,

    expected_attach_type: u32,

    prog_btf_fd: fd_t,

    func_info_rec_size: u32,
    func_info: u64,

    func_info_cnt: u32,

    line_info_rec_size: u32,
    line_info: u64,

    line_info_cnt: u32,

    attact_btf_id: u32,

    attach_prog_id: u32,
};

ObjAttr

one of ProgType verbosity level of verifier size of user buffer user supplied buffer not used ifindex of netdev to prep for. For some prog types expected attach type must be known at load time to verify attach type specific parts of prog (context accesses, allowed helpers, etc). fd pointing to BTF type data userspace bpf_func_info size number of bpf_func_info records userspace bpf_line_info size number of bpf_line_info records in-kernel BTF type id to attach to 0 to attach to vmlinux struct used by Cmd.obj_* commands

pub const ObjAttr = extern struct {
    pathname: u64,
    bpf_fd: fd_t,
    file_flags: u32,
};

ProgAttachAttr

struct used by Cmd.prog_attach/detach commands

pub const ProgAttachAttr = extern struct {
    target_fd: fd_t,

    attach_bpf_fd: fd_t,

    attach_type: u32,
    attach_flags: u32,

    // TODO: BPF_F_REPLACE flags
    replace_bpf_fd: fd_t,
};

TestRunAttr

container object to attach to eBPF program to attach previously attached eBPF program to replace if .replace is used struct used by Cmd.prog_test_run command

pub const TestRunAttr = extern struct {
    prog_fd: fd_t,
    retval: u32,

    data_size_in: u32,

    data_size_out: u32,
    data_in: u64,
    data_out: u64,
    repeat: u32,
    duration: u32,

    ctx_size_in: u32,

    ctx_size_out: u32,
    ctx_in: u64,
    ctx_out: u64,
};

GetIdAttr

input: len of data_in input/output: len of data_out. returns ENOSPC if data_out is too small. input: len of ctx_in input/output: len of ctx_out. returns ENOSPC if ctx_out is too small. struct used by Cmd.*_get_*_id commands

pub const GetIdAttr = extern struct {
    id: extern union {
        start_id: u32,
        prog_id: u32,
        map_id: u32,
        btf_id: u32,
        link_id: u32,
    },
    next_id: u32,
    open_flags: u32,
};

InfoAttr

struct used by Cmd.obj_get_info_by_fd command

pub const InfoAttr = extern struct {
    bpf_fd: fd_t,
    info_len: u32,
    info: u64,
};

QueryAttr

struct used by Cmd.prog_query command

pub const QueryAttr = extern struct {
    target_fd: fd_t,
    attach_type: u32,
    query_flags: u32,
    attach_flags: u32,
    prog_ids: u64,
    prog_cnt: u32,
};

RawTracepointAttr

container object to query struct used by Cmd.raw_tracepoint_open command

pub const RawTracepointAttr = extern struct {
    name: u64,
    prog_fd: fd_t,
};

BtfLoadAttr

struct used by Cmd.btf_load command

pub const BtfLoadAttr = extern struct {
    btf: u64,
    btf_log_buf: u64,
    btf_size: u32,
    btf_log_size: u32,
    btf_log_level: u32,
};

TaskFdQueryAttr

struct used by Cmd.task_fd_query

pub const TaskFdQueryAttr = extern struct {
    pid: pid_t,

    fd: fd_t,

    flags: u32,

    buf_len: u32,

    buf: u64,

    prog_id: u32,

    fd_type: u32,

    probe_offset: u64,

    probe_addr: u64,
};

LinkCreateAttr

input: pid input: fd input: flags input/output: buf len input/output: tp_name for tracepoint symbol for kprobe filename for uprobe output: prod_id output: BPF_FD_TYPE output: probe_offset output: probe_addr struct used by Cmd.link_create command

pub const LinkCreateAttr = extern struct {
    prog_fd: fd_t,

    target_fd: fd_t,
    attach_type: u32,

    flags: u32,
};

LinkUpdateAttr

eBPF program to attach object to attach to extra flags struct used by Cmd.link_update command

pub const LinkUpdateAttr = extern struct {
    link_fd: fd_t,

    new_prog_fd: fd_t,

    flags: u32,

    old_prog_fd: fd_t,
};

EnableStatsAttr

new program to update link with extra flags expected link's program fd, it is specified only if BPF_F_REPLACE is set in flags struct used by Cmd.enable_stats command

pub const EnableStatsAttr = extern struct {
    type: u32,
};

IterCreateAttr

struct used by Cmd.iter_create command

pub const IterCreateAttr = extern struct {
    link_fd: fd_t,
    flags: u32,
};

Attr

Mega struct that is passed to the bpf() syscall

pub const Attr = extern union {
    map_create: MapCreateAttr,
    map_elem: MapElemAttr,
    map_batch: MapBatchAttr,
    prog_load: ProgLoadAttr,
    obj: ObjAttr,
    prog_attach: ProgAttachAttr,
    test_run: TestRunAttr,
    get_id: GetIdAttr,
    info: InfoAttr,
    query: QueryAttr,
    raw_tracepoint: RawTracepointAttr,
    btf_load: BtfLoadAttr,
    task_fd_query: TaskFdQueryAttr,
    link_create: LinkCreateAttr,
    link_update: LinkUpdateAttr,
    enable_stats: EnableStatsAttr,
    iter_create: IterCreateAttr,
};

Log

pub const Log = struct {
    level: u32,
    buf: []u8,
};

map_create()

pub fn map_create(map_type: MapType, key_size: u32, value_size: u32, max_entries: u32) !fd_t {
    var attr = Attr{
        .map_create = std.mem.zeroes(MapCreateAttr),
    };

    attr.map_create.map_type = @intFromEnum(map_type);
    attr.map_create.key_size = key_size;
    attr.map_create.value_size = value_size;
    attr.map_create.max_entries = max_entries;

    const rc = linux.bpf(.map_create, &attr, @sizeOf(MapCreateAttr));
    switch (errno(rc)) {
        .SUCCESS => return @as(fd_t, @intCast(rc)),
        .INVAL => return error.MapTypeOrAttrInvalid,
        .NOMEM => return error.SystemResources,
        .PERM => return error.AccessDenied,
        else => |err| return unexpectedErrno(err),
    }
}

Test:

map_create

test "map_create" {
    const map = try map_create(.hash, 4, 4, 32);
    defer std.os.close(map);
}

map_lookup_elem()

pub fn map_lookup_elem(fd: fd_t, key: []const u8, value: []u8) !void {
    var attr = Attr{
        .map_elem = std.mem.zeroes(MapElemAttr),
    };

    attr.map_elem.map_fd = fd;
    attr.map_elem.key = @intFromPtr(key.ptr);
    attr.map_elem.result.value = @intFromPtr(value.ptr);

    const rc = linux.bpf(.map_lookup_elem, &attr, @sizeOf(MapElemAttr));
    switch (errno(rc)) {
        .SUCCESS => return,
        .BADF => return error.BadFd,
        .FAULT => unreachable,
        .INVAL => return error.FieldInAttrNeedsZeroing,
        .NOENT => return error.NotFound,
        .PERM => return error.AccessDenied,
        else => |err| return unexpectedErrno(err),
    }
}

map_update_elem()

pub fn map_update_elem(fd: fd_t, key: []const u8, value: []const u8, flags: u64) !void {
    var attr = Attr{
        .map_elem = std.mem.zeroes(MapElemAttr),
    };

    attr.map_elem.map_fd = fd;
    attr.map_elem.key = @intFromPtr(key.ptr);
    attr.map_elem.result = .{ .value = @intFromPtr(value.ptr) };
    attr.map_elem.flags = flags;

    const rc = linux.bpf(.map_update_elem, &attr, @sizeOf(MapElemAttr));
    switch (errno(rc)) {
        .SUCCESS => return,
        .@"2BIG" => return error.ReachedMaxEntries,
        .BADF => return error.BadFd,
        .FAULT => unreachable,
        .INVAL => return error.FieldInAttrNeedsZeroing,
        .NOMEM => return error.SystemResources,
        .PERM => return error.AccessDenied,
        else => |err| return unexpectedErrno(err),
    }
}

map_delete_elem()

pub fn map_delete_elem(fd: fd_t, key: []const u8) !void {
    var attr = Attr{
        .map_elem = std.mem.zeroes(MapElemAttr),
    };

    attr.map_elem.map_fd = fd;
    attr.map_elem.key = @intFromPtr(key.ptr);

    const rc = linux.bpf(.map_delete_elem, &attr, @sizeOf(MapElemAttr));
    switch (errno(rc)) {
        .SUCCESS => return,
        .BADF => return error.BadFd,
        .FAULT => unreachable,
        .INVAL => return error.FieldInAttrNeedsZeroing,
        .NOENT => return error.NotFound,
        .PERM => return error.AccessDenied,
        else => |err| return unexpectedErrno(err),
    }
}

map_get_next_key()

pub fn map_get_next_key(fd: fd_t, key: []const u8, next_key: []u8) !bool {
    var attr = Attr{
        .map_elem = std.mem.zeroes(MapElemAttr),
    };

    attr.map_elem.map_fd = fd;
    attr.map_elem.key = @intFromPtr(key.ptr);
    attr.map_elem.result.next_key = @intFromPtr(next_key.ptr);

    const rc = linux.bpf(.map_get_next_key, &attr, @sizeOf(MapElemAttr));
    switch (errno(rc)) {
        .SUCCESS => return true,
        .BADF => return error.BadFd,
        .FAULT => unreachable,
        .INVAL => return error.FieldInAttrNeedsZeroing,
        .NOENT => return false,
        .PERM => return error.AccessDenied,
        else => |err| return unexpectedErrno(err),
    }
}

Test:

map lookup, update, and delete

test "map lookup, update, and delete" {
    const key_size = 4;
    const value_size = 4;
    const map = try map_create(.hash, key_size, value_size, 1);
    defer std.os.close(map);

    const key = std.mem.zeroes([key_size]u8);
    var value = std.mem.zeroes([value_size]u8);

    // fails looking up value that doesn't exist
    try expectError(error.NotFound, map_lookup_elem(map, &key, &value));

    // succeed at updating and looking up element
    try map_update_elem(map, &key, &value, 0);
    try map_lookup_elem(map, &key, &value);

    // fails inserting more than max entries
    const second_key = [key_size]u8{ 0, 0, 0, 1 };
    try expectError(error.ReachedMaxEntries, map_update_elem(map, &second_key, &value, 0));

    // succeed at iterating all keys of map
    var lookup_key = [_]u8{ 1, 0, 0, 0 };
    var next_key = [_]u8{ 2, 3, 4, 5 }; // garbage value
    const status = try map_get_next_key(map, &lookup_key, &next_key);
    try expectEqual(status, true);
    try expectEqual(next_key, key);
    lookup_key = next_key;
    const status2 = try map_get_next_key(map, &lookup_key, &next_key);
    try expectEqual(status2, false);

    // succeed at deleting an existing elem
    try map_delete_elem(map, &key);
    try expectError(error.NotFound, map_lookup_elem(map, &key, &value));

    // fail at deleting a non-existing elem
    try expectError(error.NotFound, map_delete_elem(map, &key));
}

prog_load()

pub fn prog_load(
    prog_type: ProgType,
    insns: []const Insn,
    log: ?*Log,
    license: []const u8,
    kern_version: u32,
    flags: u32,
) !fd_t {
    var attr = Attr{
        .prog_load = std.mem.zeroes(ProgLoadAttr),
    };

    attr.prog_load.prog_type = @intFromEnum(prog_type);
    attr.prog_load.insns = @intFromPtr(insns.ptr);
    attr.prog_load.insn_cnt = @as(u32, @intCast(insns.len));
    attr.prog_load.license = @intFromPtr(license.ptr);
    attr.prog_load.kern_version = kern_version;
    attr.prog_load.prog_flags = flags;

    if (log) |l| {
        attr.prog_load.log_buf = @intFromPtr(l.buf.ptr);
        attr.prog_load.log_size = @as(u32, @intCast(l.buf.len));
        attr.prog_load.log_level = l.level;
    }

    const rc = linux.bpf(.prog_load, &attr, @sizeOf(ProgLoadAttr));
    return switch (errno(rc)) {
        .SUCCESS => @as(fd_t, @intCast(rc)),
        .ACCES => error.UnsafeProgram,
        .FAULT => unreachable,
        .INVAL => error.InvalidProgram,
        .PERM => error.AccessDenied,
        else => |err| unexpectedErrno(err),
    };
}

Test:

prog_load

test "prog_load" {
    // this should fail because it does not set r0 before exiting
    const bad_prog = [_]Insn{
        Insn.exit(),
    };

    const good_prog = [_]Insn{
        Insn.mov(.r0, 0),
        Insn.exit(),
    };

    const prog = try prog_load(.socket_filter, &good_prog, null, "MIT", 0, 0);
    defer std.os.close(prog);

    try expectError(error.UnsafeProgram, prog_load(.socket_filter, &bad_prog, null, "MIT", 0, 0));
}