zig/lib/std / coff.zig

const std = @import("std.zig");
const assert = std.debug.assert;
const mem = std.mem;

CoffHeaderFlags

pub const CoffHeaderFlags = packed struct {
    RELOCS_STRIPPED: u1 = 0,

    EXECUTABLE_IMAGE: u1 = 0,

    LINE_NUMS_STRIPPED: u1 = 0,

    LOCAL_SYMS_STRIPPED: u1 = 0,

    AGGRESSIVE_WS_TRIM: u1 = 0,

    LARGE_ADDRESS_AWARE: u1 = 0,

    RESERVED: u1 = 0,

    BYTES_REVERSED_LO: u1 = 0,

    @"32BIT_MACHINE": u1 = 0,

    DEBUG_STRIPPED: u1 = 0,

    REMOVABLE_RUN_FROM_SWAP: u1 = 0,

    NET_RUN_FROM_SWAP: u1 = 0,

    SYSTEM: u1 = 0,

    DLL: u1 = 0,

    UP_SYSTEM_ONLY: u1 = 0,

    BYTES_REVERSED_HI: u1 = 0,
};

CoffHeader

Image only, Windows CE, and Microsoft Windows NT and later. This indicates that the file does not contain base relocations and must therefore be loaded at its preferred base address. If the base address is not available, the loader reports an error. The default behavior of the linker is to strip base relocations from executable (EXE) files. Image only. This indicates that the image file is valid and can be run. If this flag is not set, it indicates a linker error. COFF line numbers have been removed. This flag is deprecated and should be zero. COFF symbol table entries for local symbols have been removed. This flag is deprecated and should be zero. Obsolete. Aggressively trim working set. This flag is deprecated for Windows 2000 and later and must be zero. Application can handle > 2-GB addresses. This flag is reserved for future use. Little endian: the least significant bit (LSB) precedes the most significant bit (MSB) in memory. This flag is deprecated and should be zero. Machine is based on a 32-bit-word architecture. Debugging information is removed from the image file. If the image is on removable media, fully load it and copy it to the swap file. If the image is on network media, fully load it and copy it to the swap file. The image file is a system file, not a user program. The image file is a dynamic-link library (DLL). Such files are considered executable files for almost all purposes, although they cannot be directly run. The file should be run only on a uniprocessor machine. Big endian: the MSB precedes the LSB in memory. This flag is deprecated and should be zero.

pub const CoffHeader = extern struct {
    machine: MachineType,

    number_of_sections: u16,

    time_date_stamp: u32,

    pointer_to_symbol_table: u32,

    number_of_symbols: u32,

    size_of_optional_header: u16,

    flags: CoffHeaderFlags,
};

// OptionalHeader.magic values
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx

IMAGE_NT_OPTIONAL_HDR32_MAGIC

The number that identifies the type of target machine. The number of sections. This indicates the size of the section table, which immediately follows the headers. The low 32 bits of the number of seconds since 00:00 January 1, 1970 (a C run-time time_t value), which indicates when the file was created. The file offset of the COFF symbol table, or zero if no COFF symbol table is present. This value should be zero for an image because COFF debugging information is deprecated. The number of entries in the symbol table. This data can be used to locate the string table, which immediately follows the symbol table. This value should be zero for an image because COFF debugging information is deprecated. The size of the optional header, which is required for executable files but not for object files. This value should be zero for an object file. For a description of the header format, see Optional Header (Image Only). The flags that indicate the attributes of the file.

pub const IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b;

IMAGE_NT_OPTIONAL_HDR64_MAGIC

pub const IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b;

DllFlags

pub const DllFlags = packed struct {
    _reserved_0: u5 = 0,

    HIGH_ENTROPY_VA: u1 = 0,

    DYNAMIC_BASE: u1 = 0,

    FORCE_INTEGRITY: u1 = 0,

    NX_COMPAT: u1 = 0,

    NO_ISOLATION: u1 = 0,

    NO_SEH: u1 = 0,

    NO_BIND: u1 = 0,

    APPCONTAINER: u1 = 0,

    WDM_DRIVER: u1 = 0,

    GUARD_CF: u1 = 0,

    TERMINAL_SERVER_AWARE: u1 = 0,
};

Subsystem

Image can handle a high entropy 64-bit virtual address space. DLL can be relocated at load time. Code Integrity checks are enforced. Image is NX compatible. Isolation aware, but do not isolate the image. Does not use structured exception (SE) handling. No SE handler may be called in this image. Do not bind the image. Image must execute in an AppContainer. A WDM driver. Image supports Control Flow Guard. Terminal Server aware.

pub const Subsystem = enum(u16) {
    UNKNOWN = 0,

    NATIVE = 1,

    WINDOWS_GUI = 2,

    WINDOWS_CUI = 3,

    OS2_CUI = 5,

    POSIX_CUI = 7,

    NATIVE_WINDOWS = 8,

    WINDOWS_CE_GUI = 9,

    EFI_APPLICATION = 10,

    EFI_BOOT_SERVICE_DRIVER = 11,

    EFI_RUNTIME_DRIVER = 12,

    EFI_ROM = 13,

    XBOX = 14,

    WINDOWS_BOOT_APPLICATION = 16,
};

OptionalHeader

An unknown subsystem Device drivers and native Windows processes The Windows graphical user interface (GUI) subsystem The Windows character subsystem The OS/2 character subsystem The Posix character subsystem Native Win9x driver Windows CE An Extensible Firmware Interface (EFI) application An EFI driver with boot services An EFI driver with run-time services An EFI ROM image XBOX Windows boot application

pub const OptionalHeader = extern struct {
    magic: u16,
    major_linker_version: u8,
    minor_linker_version: u8,
    size_of_code: u32,
    size_of_initialized_data: u32,
    size_of_uninitialized_data: u32,
    address_of_entry_point: u32,
    base_of_code: u32,
};

OptionalHeaderPE32

pub const OptionalHeaderPE32 = extern struct {
    magic: u16,
    major_linker_version: u8,
    minor_linker_version: u8,
    size_of_code: u32,
    size_of_initialized_data: u32,
    size_of_uninitialized_data: u32,
    address_of_entry_point: u32,
    base_of_code: u32,
    base_of_data: u32,
    image_base: u32,
    section_alignment: u32,
    file_alignment: u32,
    major_operating_system_version: u16,
    minor_operating_system_version: u16,
    major_image_version: u16,
    minor_image_version: u16,
    major_subsystem_version: u16,
    minor_subsystem_version: u16,
    win32_version_value: u32,
    size_of_image: u32,
    size_of_headers: u32,
    checksum: u32,
    subsystem: Subsystem,
    dll_flags: DllFlags,
    size_of_stack_reserve: u32,
    size_of_stack_commit: u32,
    size_of_heap_reserve: u32,
    size_of_heap_commit: u32,
    loader_flags: u32,
    number_of_rva_and_sizes: u32,
};

OptionalHeaderPE64

pub const OptionalHeaderPE64 = extern struct {
    magic: u16,
    major_linker_version: u8,
    minor_linker_version: u8,
    size_of_code: u32,
    size_of_initialized_data: u32,
    size_of_uninitialized_data: u32,
    address_of_entry_point: u32,
    base_of_code: u32,
    image_base: u64,
    section_alignment: u32,
    file_alignment: u32,
    major_operating_system_version: u16,
    minor_operating_system_version: u16,
    major_image_version: u16,
    minor_image_version: u16,
    major_subsystem_version: u16,
    minor_subsystem_version: u16,
    win32_version_value: u32,
    size_of_image: u32,
    size_of_headers: u32,
    checksum: u32,
    subsystem: Subsystem,
    dll_flags: DllFlags,
    size_of_stack_reserve: u64,
    size_of_stack_commit: u64,
    size_of_heap_reserve: u64,
    size_of_heap_commit: u64,
    loader_flags: u32,
    number_of_rva_and_sizes: u32,
};

IMAGE_NUMBEROF_DIRECTORY_ENTRIES

pub const IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16;

DirectoryEntry

pub const DirectoryEntry = enum(u16) {
    EXPORT = 0,

    IMPORT = 1,

    RESOURCE = 2,

    EXCEPTION = 3,

    SECURITY = 4,

    BASERELOC = 5,

    DEBUG = 6,

    ARCHITECTURE = 7,

    GLOBALPTR = 8,

    TLS = 9,

    LOAD_CONFIG = 10,

    BOUND_IMPORT = 11,

    IAT = 12,

    DELAY_IMPORT = 13,

    COM_DESCRIPTOR = 14,
};

ImageDataDirectory

Export Directory Import Directory Resource Directory Exception Directory Security Directory Base Relocation Table Debug Directory Architecture Specific Data RVA of GP TLS Directory Load Configuration Directory Bound Import Directory in headers Import Address Table Delay Load Import Descriptors COM Runtime descriptor

pub const ImageDataDirectory = extern struct {
    virtual_address: u32,
    size: u32,
};

BaseRelocationDirectoryEntry

pub const BaseRelocationDirectoryEntry = extern struct {
    page_rva: u32,

    block_size: u32,
};

BaseRelocation

The image base plus the page RVA is added to each offset to create the VA where the base relocation must be applied. The total number of bytes in the base relocation block, including the Page RVA and Block Size fields and the Type/Offset fields that follow.

pub const BaseRelocation = packed struct {
    offset: u12,

    type: BaseRelocationType,
};

BaseRelocationType

Stored in the remaining 12 bits of the WORD, an offset from the starting address that was specified in the Page RVA field for the block. This offset specifies where the base relocation is to be applied. Stored in the high 4 bits of the WORD, a value that indicates the type of base relocation to be applied.

pub const BaseRelocationType = enum(u4) {
    ABSOLUTE = 0,

    HIGH = 1,

    LOW = 2,

    HIGHLOW = 3,

    HIGHADJ = 4,

    MIPS_JMPADDR = 5,

    // ARM_MOV32 = 5,

    // RISCV_HIGH20 = 5,

    RESERVED = 6,

    THUMB_MOV32 = 7,

    // RISCV_LOW12I = 7,

    RISCV_LOW12S = 8,

    // LOONGARCH32_MARK_LA = 8,

    // LOONGARCH64_MARK_LA = 8,

    MIPS_JMPADDR16 = 9,

    DIR64 = 10,
};

DebugDirectoryEntry

The base relocation is skipped. This type can be used to pad a block. The base relocation adds the high 16 bits of the difference to the 16-bit field at offset. The 16-bit field represents the high value of a 32-bit word. The base relocation adds the low 16 bits of the difference to the 16-bit field at offset. The 16-bit field represents the low half of a 32-bit word. The base relocation applies all 32 bits of the difference to the 32-bit field at offset. The base relocation adds the high 16 bits of the difference to the 16-bit field at offset. The 16-bit field represents the high value of a 32-bit word. The low 16 bits of the 32-bit value are stored in the 16-bit word that follows this base relocation. This means that this base relocation occupies two slots. When the machine type is MIPS, the base relocation applies to a MIPS jump instruction. This relocation is meaningful only when the machine type is ARM or Thumb. The base relocation applies the 32-bit address of a symbol across a consecutive MOVW/MOVT instruction pair. This relocation is only meaningful when the machine type is RISC-V. The base relocation applies to the high 20 bits of a 32-bit absolute address. Reserved, must be zero. This relocation is meaningful only when the machine type is Thumb. The base relocation applies the 32-bit address of a symbol to a consecutive MOVW/MOVT instruction pair. This relocation is only meaningful when the machine type is RISC-V. The base relocation applies to the low 12 bits of a 32-bit absolute address formed in RISC-V I-type instruction format. This relocation is only meaningful when the machine type is RISC-V. The base relocation applies to the low 12 bits of a 32-bit absolute address formed in RISC-V S-type instruction format. This relocation is only meaningful when the machine type is LoongArch 32-bit. The base relocation applies to a 32-bit absolute address formed in two consecutive instructions. This relocation is only meaningful when the machine type is LoongArch 64-bit. The base relocation applies to a 64-bit absolute address formed in four consecutive instructions. The relocation is only meaningful when the machine type is MIPS. The base relocation applies to a MIPS16 jump instruction. The base relocation applies the difference to the 64-bit field at offset.

pub const DebugDirectoryEntry = extern struct {
    characteristics: u32,
    time_date_stamp: u32,
    major_version: u16,
    minor_version: u16,
    type: DebugType,
    size_of_data: u32,
    address_of_raw_data: u32,
    pointer_to_raw_data: u32,
};

DebugType

pub const DebugType = enum(u32) {
    UNKNOWN = 0,
    COFF = 1,
    CODEVIEW = 2,
    FPO = 3,
    MISC = 4,
    EXCEPTION = 5,
    FIXUP = 6,
    OMAP_TO_SRC = 7,
    OMAP_FROM_SRC = 8,
    BORLAND = 9,
    RESERVED10 = 10,
    VC_FEATURE = 12,
    POGO = 13,
    ILTCG = 14,
    MPX = 15,
    REPRO = 16,
    EX_DLLCHARACTERISTICS = 20,
};

ImportDirectoryEntry

pub const ImportDirectoryEntry = extern struct {
    import_lookup_table_rva: u32,

    time_date_stamp: u32,

    forwarder_chain: u32,

    name_rva: u32,

    import_address_table_rva: u32,
};

ImportLookupEntry32

The RVA of the import lookup table. This table contains a name or ordinal for each import. (The name "Characteristics" is used in Winnt.h, but no longer describes this field.) The stamp that is set to zero until the image is bound. After the image is bound, this field is set to the time/data stamp of the DLL. The index of the first forwarder reference. The address of an ASCII string that contains the name of the DLL. This address is relative to the image base. The RVA of the import address table. The contents of this table are identical to the contents of the import lookup table until the image is bound.

pub const ImportLookupEntry32 = struct {
    pub const ByName = packed struct {
        name_table_rva: u31,
        flag: u1 = 0,
    };

    pub const ByOrdinal = packed struct {
        ordinal_number: u16,
        unused: u15 = 0,
        flag: u1 = 1,
    };

    const mask = 0x80000000;

getImportByName()

    pub fn getImportByName(raw: u32) ?ByName {
        if (mask & raw != 0) return null;
        return @as(ByName, @bitCast(raw));
    }

getImportByOrdinal()

    pub fn getImportByOrdinal(raw: u32) ?ByOrdinal {
        if (mask & raw == 0) return null;
        return @as(ByOrdinal, @bitCast(raw));
    }
};

ImportLookupEntry64

pub const ImportLookupEntry64 = struct {
    pub const ByName = packed struct {
        name_table_rva: u31,
        unused: u32 = 0,
        flag: u1 = 0,
    };

    pub const ByOrdinal = packed struct {
        ordinal_number: u16,
        unused: u47 = 0,
        flag: u1 = 1,
    };

    const mask = 0x8000000000000000;

getImportByName()

    pub fn getImportByName(raw: u64) ?ByName {
        if (mask & raw != 0) return null;
        return @as(ByName, @bitCast(raw));
    }

getImportByOrdinal()

    pub fn getImportByOrdinal(raw: u64) ?ByOrdinal {
        if (mask & raw == 0) return null;
        return @as(ByOrdinal, @bitCast(raw));
    }
};

ImportHintNameEntry

Every name ends with a NULL byte. IF the NULL byte does not fall on 2byte boundary, the entry structure is padded to ensure 2byte alignment.

pub const ImportHintNameEntry = extern struct {
    hint: u16,

    name: [1]u8,
};

SectionHeader

An index into the export name pointer table. A match is attempted first with this value. If it fails, a binary search is performed on the DLL's export name pointer table. Pointer to NULL terminated ASCII name. Variable length...

pub const SectionHeader = extern struct {
    name: [8]u8,
    virtual_size: u32,
    virtual_address: u32,
    size_of_raw_data: u32,
    pointer_to_raw_data: u32,
    pointer_to_relocations: u32,
    pointer_to_linenumbers: u32,
    number_of_relocations: u16,
    number_of_linenumbers: u16,
    flags: SectionHeaderFlags,

getName()

    pub fn getName(self: *align(1) const SectionHeader) ?[]const u8 {
        if (self.name[0] == '/') return null;
        const len = std.mem.indexOfScalar(u8, &self.name, @as(u8, 0)) orelse self.name.len;
        return self.name[0..len];
    }

getNameOffset()

    pub fn getNameOffset(self: SectionHeader) ?u32 {
        if (self.name[0] != '/') return null;
        const len = std.mem.indexOfScalar(u8, &self.name, @as(u8, 0)) orelse self.name.len;
        const offset = std.fmt.parseInt(u32, self.name[1..len], 10) catch unreachable;
        return offset;
    }

getAlignment()

Applicable only to section headers in COFF objects.

    pub fn getAlignment(self: SectionHeader) ?u16 {
        if (self.flags.ALIGN == 0) return null;
        return std.math.powi(u16, 2, self.flags.ALIGN - 1) catch unreachable;
    }

setAlignment()

    pub fn setAlignment(self: *SectionHeader, new_alignment: u16) void {
        assert(new_alignment > 0 and new_alignment <= 8192);
        self.flags.ALIGN = std.math.log2(new_alignment);
    }

isCode()

    pub fn isCode(self: SectionHeader) bool {
        return self.flags.CNT_CODE == 0b1;
    }

isComdat()

    pub fn isComdat(self: SectionHeader) bool {
        return self.flags.LNK_COMDAT == 0b1;
    }
};

SectionHeaderFlags

pub const SectionHeaderFlags = packed struct {
    _reserved_0: u3 = 0,

    TYPE_NO_PAD: u1 = 0,

    _reserved_1: u1 = 0,

    CNT_CODE: u1 = 0,

    CNT_INITIALIZED_DATA: u1 = 0,

    CNT_UNINITIALIZED_DATA: u1 = 0,

    LNK_OTHER: u1 = 0,

    LNK_INFO: u1 = 0,

    _reserverd_2: u1 = 0,

    LNK_REMOVE: u1 = 0,

    LNK_COMDAT: u1 = 0,

    _reserved_3: u2 = 0,

    GPREL: u1 = 0,

    MEM_PURGEABLE: u1 = 0,

    MEM_16BIT: u1 = 0,

    MEM_LOCKED: u1 = 0,

    MEM_PRELOAD: u1 = 0,

    ALIGN: u4 = 0,

    LNK_NRELOC_OVFL: u1 = 0,

    MEM_DISCARDABLE: u1 = 0,

    MEM_NOT_CACHED: u1 = 0,

    MEM_NOT_PAGED: u1 = 0,

    MEM_SHARED: u1 = 0,

    MEM_EXECUTE: u1 = 0,

    MEM_READ: u1 = 0,

    MEM_WRITE: u1 = 0,
};

Symbol

The section should not be padded to the next boundary. This flag is obsolete and is replaced by IMAGE_SCN_ALIGN_1BYTES. This is valid only for object files. The section contains executable code. The section contains initialized data. The section contains uninitialized data. Reserved for future use. The section contains comments or other information. The .drectve section has this type. This is valid for object files only. The section will not become part of the image. This is valid only for object files. The section contains COMDAT data. For more information, see COMDAT Sections (Object Only). This is valid only for object files. The section contains data referenced through the global pointer (GP). Reserved for future use. Reserved for future use. Reserved for future use. Reserved for future use. Takes on multiple values according to flags: pub const IMAGE_SCN_ALIGN_1BYTES: u32 = 0x100000; pub const IMAGE_SCN_ALIGN_2BYTES: u32 = 0x200000; pub const IMAGE_SCN_ALIGN_4BYTES: u32 = 0x300000; pub const IMAGE_SCN_ALIGN_8BYTES: u32 = 0x400000; pub const IMAGE_SCN_ALIGN_16BYTES: u32 = 0x500000; pub const IMAGE_SCN_ALIGN_32BYTES: u32 = 0x600000; pub const IMAGE_SCN_ALIGN_64BYTES: u32 = 0x700000; pub const IMAGE_SCN_ALIGN_128BYTES: u32 = 0x800000; pub const IMAGE_SCN_ALIGN_256BYTES: u32 = 0x900000; pub const IMAGE_SCN_ALIGN_512BYTES: u32 = 0xA00000; pub const IMAGE_SCN_ALIGN_1024BYTES: u32 = 0xB00000; pub const IMAGE_SCN_ALIGN_2048BYTES: u32 = 0xC00000; pub const IMAGE_SCN_ALIGN_4096BYTES: u32 = 0xD00000; pub const IMAGE_SCN_ALIGN_8192BYTES: u32 = 0xE00000; The section contains extended relocations. The section can be discarded as needed. The section cannot be cached. The section is not pageable. The section can be shared in memory. The section can be executed as code. The section can be read. The section can be written to.

pub const Symbol = struct {
    name: [8]u8,
    value: u32,
    section_number: SectionNumber,
    type: SymType,
    storage_class: StorageClass,
    number_of_aux_symbols: u8,

sizeOf()

    pub fn sizeOf() usize {
        return 18;
    }

getName()

    pub fn getName(self: *const Symbol) ?[]const u8 {
        if (std.mem.eql(u8, self.name[0..4], "\x00\x00\x00\x00")) return null;
        const len = std.mem.indexOfScalar(u8, &self.name, @as(u8, 0)) orelse self.name.len;
        return self.name[0..len];
    }

getNameOffset()

    pub fn getNameOffset(self: Symbol) ?u32 {
        if (!std.mem.eql(u8, self.name[0..4], "\x00\x00\x00\x00")) return null;
        const offset = std.mem.readInt(u32, self.name[4..8], .little);
        return offset;
    }
};

SectionNumber

pub const SectionNumber = enum(u16) {
    UNDEFINED = 0,

    ABSOLUTE = 0xffff,

    DEBUG = 0xfffe,
    _,
};

SymType

The symbol record is not yet assigned a section. A value of zero indicates that a reference to an external symbol is defined elsewhere. A value of non-zero is a common symbol with a size that is specified by the value. The symbol has an absolute (non-relocatable) value and is not an address. The symbol provides general type or debugging information but does not correspond to a section. Microsoft tools use this setting along with .file records (storage class FILE).

pub const SymType = packed struct {
    complex_type: ComplexType,
    base_type: BaseType,
};

BaseType

pub const BaseType = enum(u8) {
    NULL = 0,

    VOID = 1,

    CHAR = 2,

    SHORT = 3,

    INT = 4,

    LONG = 5,

    FLOAT = 6,

    DOUBLE = 7,

    STRUCT = 8,

    UNION = 9,

    ENUM = 10,

    MOE = 11,

    BYTE = 12,

    WORD = 13,

    UINT = 14,

    DWORD = 15,
};

ComplexType

No type information or unknown base type. Microsoft tools use this setting No valid type; used with void pointers and functions A character (signed byte) A 2-byte signed integer A natural integer type (normally 4 bytes in Windows) A 4-byte signed integer A 4-byte floating-point number An 8-byte floating-point number A structure A union An enumerated type A member of enumeration (a specified value) A byte; unsigned 1-byte integer A word; unsigned 2-byte integer An unsigned integer of natural size (normally, 4 bytes) An unsigned 4-byte integer

pub const ComplexType = enum(u8) {
    NULL = 0,

    POINTER = 16,

    FUNCTION = 32,

    ARRAY = 48,
};

StorageClass

No derived type; the symbol is a simple scalar variable. The symbol is a pointer to base type. The symbol is a function that returns a base type. The symbol is an array of base type.

pub const StorageClass = enum(u8) {
    END_OF_FUNCTION = 0xff,

    NULL = 0,

    AUTOMATIC = 1,

    EXTERNAL = 2,

    STATIC = 3,

    REGISTER = 4,

    EXTERNAL_DEF = 5,

    LABEL = 6,

    UNDEFINED_LABEL = 7,

    MEMBER_OF_STRUCT = 8,

    ARGUMENT = 9,

    STRUCT_TAG = 10,

    MEMBER_OF_UNION = 11,

    UNION_TAG = 12,

    TYPE_DEFINITION = 13,

    UNDEFINED_STATIC = 14,

    ENUM_TAG = 15,

    MEMBER_OF_ENUM = 16,

    REGISTER_PARAM = 17,

    BIT_FIELD = 18,

    BLOCK = 100,

    FUNCTION = 101,

    END_OF_STRUCT = 102,

    FILE = 103,

    SECTION = 104,

    WEAK_EXTERNAL = 105,

    CLR_TOKEN = 107,
};

FunctionDefinition

A special symbol that represents the end of function, for debugging purposes. No assigned storage class. The automatic (stack) variable. The Value field specifies the stack frame offset. A value that Microsoft tools use for external symbols. The Value field indicates the size if the section number is IMAGE_SYM_UNDEFINED (0). If the section number is not zero, then the Value field specifies the offset within the section. The offset of the symbol within the section. If the Value field is zero, then the symbol represents a section name. A register variable. The Value field specifies the register number. A symbol that is defined externally. A code label that is defined within the module. The Value field specifies the offset of the symbol within the section. A reference to a code label that is not defined. The structure member. The Value field specifies the n th member. A formal argument (parameter) of a function. The Value field specifies the n th argument. The structure tag-name entry. A union member. The Value field specifies the n th member. The Union tag-name entry. A Typedef entry. A static data declaration. An enumerated type tagname entry. A member of an enumeration. The Value field specifies the n th member. A register parameter. A bit-field reference. The Value field specifies the n th bit in the bit field. A .bb (beginning of block) or .eb (end of block) record. The Value field is the relocatable address of the code location. A value that Microsoft tools use for symbol records that define the extent of a function: begin function (.bf ), end function ( .ef ), and lines in function ( .lf ). For .lf records, the Value field gives the number of source lines in the function. For .ef records, the Value field gives the size of the function code. An end-of-structure entry. A value that Microsoft tools, as well as traditional COFF format, use for the source-file symbol record. The symbol is followed by auxiliary records that name the file. A definition of a section (Microsoft tools use STATIC storage class instead). A weak external. For more information, see Auxiliary Format 3: Weak Externals. A CLR token symbol. The name is an ASCII string that consists of the hexadecimal value of the token. For more information, see CLR Token Definition (Object Only).

pub const FunctionDefinition = struct {
    tag_index: u32,

    total_size: u32,

    pointer_to_linenumber: u32,

    pointer_to_next_function: u32,

    unused: [2]u8,
};

SectionDefinition

The symbol-table index of the corresponding .bf (begin function) symbol record. The size of the executable code for the function itself. If the function is in its own section, the SizeOfRawData in the section header is greater or equal to this field, depending on alignment considerations. The file offset of the first COFF line-number entry for the function, or zero if none exists. The symbol-table index of the record for the next function. If the function is the last in the symbol table, this field is set to zero.

pub const SectionDefinition = struct {
    length: u32,

    number_of_relocations: u16,

    number_of_linenumbers: u16,

    checksum: u32,

    number: u16,

    selection: ComdatSelection,

    unused: [3]u8,
};

FileDefinition

The size of section data; the same as SizeOfRawData in the section header. The number of relocation entries for the section. The number of line-number entries for the section. The checksum for communal data. It is applicable if the IMAGE_SCN_LNK_COMDAT flag is set in the section header. One-based index into the section table for the associated section. This is used when the COMDAT selection setting is 5. The COMDAT selection number. This is applicable if the section is a COMDAT section.

pub const FileDefinition = struct {
    file_name: [18]u8,

getFileName()

An ANSI string that gives the name of the source file. This is padded with nulls if it is less than the maximum length.

    pub fn getFileName(self: *const FileDefinition) []const u8 {
        const len = std.mem.indexOfScalar(u8, &self.file_name, @as(u8, 0)) orelse self.file_name.len;
        return self.file_name[0..len];
    }
};

WeakExternalDefinition

pub const WeakExternalDefinition = struct {
    tag_index: u32,

    flag: WeakExternalFlag,

    unused: [10]u8,
};

// https://github.com/tpn/winsdk-10/blob/master/Include/10.0.16299.0/km/ntimage.h

WeakExternalFlag

The symbol-table index of sym2, the symbol to be linked if sym1 is not found. A value of IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY indicates that no library search for sym1 should be performed. A value of IMAGE_WEAK_EXTERN_SEARCH_LIBRARY indicates that a library search for sym1 should be performed. A value of IMAGE_WEAK_EXTERN_SEARCH_ALIAS indicates that sym1 is an alias for sym2.

pub const WeakExternalFlag = enum(u32) {
    SEARCH_NOLIBRARY = 1,
    SEARCH_LIBRARY = 2,
    SEARCH_ALIAS = 3,
    ANTI_DEPENDENCY = 4,
};

ComdatSelection

pub const ComdatSelection = enum(u8) {
    NONE = 0,

    NODUPLICATES = 1,

    ANY = 2,

    SAME_SIZE = 3,

    EXACT_MATCH = 4,

    ASSOCIATIVE = 5,

    LARGEST = 6,
};

DebugInfoDefinition

Not a COMDAT section. If this symbol is already defined, the linker issues a "multiply defined symbol" error. Any section that defines the same COMDAT symbol can be linked; the rest are removed. The linker chooses an arbitrary section among the definitions for this symbol. If all definitions are not the same size, a "multiply defined symbol" error is issued. The linker chooses an arbitrary section among the definitions for this symbol. If all definitions do not match exactly, a "multiply defined symbol" error is issued. The section is linked if a certain other COMDAT section is linked. This other section is indicated by the Number field of the auxiliary symbol record for the section definition. This setting is useful for definitions that have components in multiple sections (for example, code in one and data in another), but where all must be linked or discarded as a set. The other section this section is associated with must be a COMDAT section, which can be another associative COMDAT section. An associative COMDAT section's section association chain can't form a loop. The section association chain must eventually come to a COMDAT section that doesn't have IMAGE_COMDAT_SELECT_ASSOCIATIVE set. The linker chooses the largest definition from among all of the definitions for this symbol. If multiple definitions have this size, the choice between them is arbitrary.

pub const DebugInfoDefinition = struct {
    unused_1: [4]u8,

    linenumber: u16,

    unused_2: [6]u8,

    pointer_to_next_function: u32,

    unused_3: [2]u8,
};

MachineType

The actual ordinal line number (1, 2, 3, and so on) within the source file, corresponding to the .bf or .ef record. The symbol-table index of the next .bf symbol record. If the function is the last in the symbol table, this field is set to zero. It is not used for .ef records.

pub const MachineType = enum(u16) {
    Unknown = 0x0,
    AM33 = 0x1d3,
    X64 = 0x8664,
    ARM = 0x1c0,
    ARM64 = 0xaa64,
    ARMNT = 0x1c4,
    EBC = 0xebc,
    I386 = 0x14c,
    IA64 = 0x200,
    M32R = 0x9041,
    MIPS16 = 0x266,
    MIPSFPU = 0x366,
    MIPSFPU16 = 0x466,
    POWERPC = 0x1f0,
    POWERPCFP = 0x1f1,
    R4000 = 0x166,
    RISCV32 = 0x5032,
    RISCV64 = 0x5064,
    RISCV128 = 0x5128,
    SH3 = 0x1a2,
    SH3DSP = 0x1a3,
    SH4 = 0x1a6,
    SH5 = 0x1a8,
    Thumb = 0x1c2,
    WCEMIPSV2 = 0x169,

fromTargetCpuArch()

Matsushita AM33 x64 ARM little endian ARM64 little endian ARM Thumb-2 little endian EFI byte code Intel 386 or later processors and compatible processors Intel Itanium processor family Mitsubishi M32R little endian MIPS16 MIPS with FPU MIPS16 with FPU Power PC little endian Power PC with floating point support MIPS little endian RISC-V 32-bit address space RISC-V 64-bit address space RISC-V 128-bit address space Hitachi SH3 Hitachi SH3 DSP Hitachi SH4 Hitachi SH5 Thumb MIPS little-endian WCE v2

    pub fn fromTargetCpuArch(arch: std.Target.Cpu.Arch) MachineType {
        return switch (arch) {
            .arm => .ARM,
            .powerpc => .POWERPC,
            .riscv32 => .RISCV32,
            .thumb => .Thumb,
            .x86 => .I386,
            .aarch64 => .ARM64,
            .riscv64 => .RISCV64,
            .x86_64 => .X64,
            // there's cases we don't (yet) handle
            else => unreachable,
        };
    }

toTargetCpuArch()

    pub fn toTargetCpuArch(machine_type: MachineType) ?std.Target.Cpu.Arch {
        return switch (machine_type) {
            .ARM => .arm,
            .POWERPC => .powerpc,
            .RISCV32 => .riscv32,
            .Thumb => .thumb,
            .I386 => .x86,
            .ARM64 => .aarch64,
            .RISCV64 => .riscv64,
            .X64 => .x86_64,
            // there's cases we don't (yet) handle
            else => null,
        };
    }
};

CoffError

pub const CoffError = error{
    InvalidPEMagic,
    InvalidPEHeader,
    InvalidMachine,
    MissingPEHeader,
    MissingCoffSection,
    MissingStringTable,
};

// Official documentation of the format: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format

Coff

pub const Coff = struct {
    data: []const u8,
    // Set if `data` is backed by the image as loaded by the loader
    is_loaded: bool,
    is_image: bool,
    coff_header_offset: usize,

    guid: [16]u8 = undefined,
    age: u32 = undefined,

    // The lifetime of `data` must be longer than the lifetime of the returned Coff

init()

    pub fn init(data: []const u8, is_loaded: bool) !Coff {
        const pe_pointer_offset = 0x3C;
        const pe_magic = "PE\x00\x00";

        var stream = std.io.fixedBufferStream(data);
        const reader = stream.reader();
        try stream.seekTo(pe_pointer_offset);
        var coff_header_offset = try reader.readInt(u32, .little);
        try stream.seekTo(coff_header_offset);
        var buf: [4]u8 = undefined;
        try reader.readNoEof(&buf);
        const is_image = mem.eql(u8, pe_magic, &buf);

        var coff = @This(){
            .data = data,
            .is_image = is_image,
            .is_loaded = is_loaded,
            .coff_header_offset = coff_header_offset,
        };

        // Do some basic validation upfront
        if (is_image) {
            coff.coff_header_offset = coff.coff_header_offset + 4;
            const coff_header = coff.getCoffHeader();
            if (coff_header.size_of_optional_header == 0) return error.MissingPEHeader;
        }

        // JK: we used to check for architecture here and throw an error if not x86 or derivative.
        // However I am willing to take a leap of faith and let aarch64 have a shot also.

        return coff;
    }

getPdbPath()

    pub fn getPdbPath(self: *Coff, buffer: []u8) !?usize {
        assert(self.is_image);

        const data_dirs = self.getDataDirectories();
        if (@intFromEnum(DirectoryEntry.DEBUG) >= data_dirs.len) return null;

        const debug_dir = data_dirs[@intFromEnum(DirectoryEntry.DEBUG)];
        var stream = std.io.fixedBufferStream(self.data);
        const reader = stream.reader();

        if (self.is_loaded) {
            try stream.seekTo(debug_dir.virtual_address);
        } else {
            // Find what section the debug_dir is in, in order to convert the RVA to a file offset
            for (self.getSectionHeaders()) |*sect| {
                if (debug_dir.virtual_address >= sect.virtual_address and debug_dir.virtual_address < sect.virtual_address + sect.virtual_size) {
                    try stream.seekTo(sect.pointer_to_raw_data + (debug_dir.virtual_address - sect.virtual_address));
                    break;
                }
            } else return error.InvalidDebugDirectory;
        }

        // Find the correct DebugDirectoryEntry, and where its data is stored.
        // It can be in any section.
        const debug_dir_entry_count = debug_dir.size / @sizeOf(DebugDirectoryEntry);
        var i: u32 = 0;
        while (i < debug_dir_entry_count) : (i += 1) {
            const debug_dir_entry = try reader.readStruct(DebugDirectoryEntry);
            if (debug_dir_entry.type == .CODEVIEW) {
                const dir_offset = if (self.is_loaded) debug_dir_entry.address_of_raw_data else debug_dir_entry.pointer_to_raw_data;
                try stream.seekTo(dir_offset);
                break;
            }
        } else return null;

        var cv_signature: [4]u8 = undefined; // CodeView signature
        try reader.readNoEof(cv_signature[0..]);
        // 'RSDS' indicates PDB70 format, used by lld.
        if (!mem.eql(u8, &cv_signature, "RSDS"))
            return error.InvalidPEMagic;
        try reader.readNoEof(self.guid[0..]);
        self.age = try reader.readInt(u32, .little);

        // Finally read the null-terminated string.
        var byte = try reader.readByte();
        i = 0;
        while (byte != 0 and i < buffer.len) : (i += 1) {
            buffer[i] = byte;
            byte = try reader.readByte();
        }

        if (byte != 0 and i == buffer.len)
            return error.NameTooLong;

        return @as(usize, i);
    }

getCoffHeader()

    pub fn getCoffHeader(self: Coff) CoffHeader {
        return @as(*align(1) const CoffHeader, @ptrCast(self.data[self.coff_header_offset..][0..@sizeOf(CoffHeader)])).*;
    }

getOptionalHeader()

    pub fn getOptionalHeader(self: Coff) OptionalHeader {
        assert(self.is_image);
        const offset = self.coff_header_offset + @sizeOf(CoffHeader);
        return @as(*align(1) const OptionalHeader, @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeader)])).*;
    }

getOptionalHeader32()

    pub fn getOptionalHeader32(self: Coff) OptionalHeaderPE32 {
        assert(self.is_image);
        const offset = self.coff_header_offset + @sizeOf(CoffHeader);
        return @as(*align(1) const OptionalHeaderPE32, @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeaderPE32)])).*;
    }

getOptionalHeader64()

    pub fn getOptionalHeader64(self: Coff) OptionalHeaderPE64 {
        assert(self.is_image);
        const offset = self.coff_header_offset + @sizeOf(CoffHeader);
        return @as(*align(1) const OptionalHeaderPE64, @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeaderPE64)])).*;
    }

getImageBase()

    pub fn getImageBase(self: Coff) u64 {
        const hdr = self.getOptionalHeader();
        return switch (hdr.magic) {
            IMAGE_NT_OPTIONAL_HDR32_MAGIC => self.getOptionalHeader32().image_base,
            IMAGE_NT_OPTIONAL_HDR64_MAGIC => self.getOptionalHeader64().image_base,
            else => unreachable, // We assume we have validated the header already
        };
    }

getNumberOfDataDirectories()

    pub fn getNumberOfDataDirectories(self: Coff) u32 {
        const hdr = self.getOptionalHeader();
        return switch (hdr.magic) {
            IMAGE_NT_OPTIONAL_HDR32_MAGIC => self.getOptionalHeader32().number_of_rva_and_sizes,
            IMAGE_NT_OPTIONAL_HDR64_MAGIC => self.getOptionalHeader64().number_of_rva_and_sizes,
            else => unreachable, // We assume we have validated the header already
        };
    }

getDataDirectories()

    pub fn getDataDirectories(self: *const Coff) []align(1) const ImageDataDirectory {
        const hdr = self.getOptionalHeader();
        const size: usize = switch (hdr.magic) {
            IMAGE_NT_OPTIONAL_HDR32_MAGIC => @sizeOf(OptionalHeaderPE32),
            IMAGE_NT_OPTIONAL_HDR64_MAGIC => @sizeOf(OptionalHeaderPE64),
            else => unreachable, // We assume we have validated the header already
        };
        const offset = self.coff_header_offset + @sizeOf(CoffHeader) + size;
        return @as([*]align(1) const ImageDataDirectory, @ptrCast(self.data[offset..]))[0..self.getNumberOfDataDirectories()];
    }

getSymtab()

    pub fn getSymtab(self: *const Coff) ?Symtab {
        const coff_header = self.getCoffHeader();
        if (coff_header.pointer_to_symbol_table == 0) return null;

        const offset = coff_header.pointer_to_symbol_table;
        const size = coff_header.number_of_symbols * Symbol.sizeOf();
        return .{ .buffer = self.data[offset..][0..size] };
    }

getStrtab()

    pub fn getStrtab(self: *const Coff) error{InvalidStrtabSize}!?Strtab {
        const coff_header = self.getCoffHeader();
        if (coff_header.pointer_to_symbol_table == 0) return null;

        const offset = coff_header.pointer_to_symbol_table + Symbol.sizeOf() * coff_header.number_of_symbols;
        const size = mem.readInt(u32, self.data[offset..][0..4], .little);
        if ((offset + size) > self.data.len) return error.InvalidStrtabSize;

        return Strtab{ .buffer = self.data[offset..][0..size] };
    }

strtabRequired()

    pub fn strtabRequired(self: *const Coff) bool {
        for (self.getSectionHeaders()) |*sect_hdr| if (sect_hdr.getName() == null) return true;
        return false;
    }

getSectionHeaders()

    pub fn getSectionHeaders(self: *const Coff) []align(1) const SectionHeader {
        const coff_header = self.getCoffHeader();
        const offset = self.coff_header_offset + @sizeOf(CoffHeader) + coff_header.size_of_optional_header;
        return @as([*]align(1) const SectionHeader, @ptrCast(self.data.ptr + offset))[0..coff_header.number_of_sections];
    }

getSectionHeadersAlloc()

    pub fn getSectionHeadersAlloc(self: *const Coff, allocator: mem.Allocator) ![]SectionHeader {
        const section_headers = self.getSectionHeaders();
        const out_buff = try allocator.alloc(SectionHeader, section_headers.len);
        for (out_buff, 0..) |*section_header, i| {
            section_header.* = section_headers[i];
        }

        return out_buff;
    }

getSectionName()

    pub fn getSectionName(self: *const Coff, sect_hdr: *align(1) const SectionHeader) error{InvalidStrtabSize}![]const u8 {
        const name = sect_hdr.getName() orelse blk: {
            const strtab = (try self.getStrtab()).?;
            const name_offset = sect_hdr.getNameOffset().?;
            break :blk strtab.get(name_offset);
        };
        return name;
    }

getSectionByName()

    pub fn getSectionByName(self: *const Coff, comptime name: []const u8) ?*align(1) const SectionHeader {
        for (self.getSectionHeaders()) |*sect| {
            const section_name = self.getSectionName(sect) catch |e| switch (e) {
                error.InvalidStrtabSize => continue, //ignore invalid(?) strtab entries - see also GitHub issue #15238
            };
            if (mem.eql(u8, section_name, name)) {
                return sect;
            }
        }
        return null;
    }

getSectionData()

    pub fn getSectionData(self: *const Coff, sec: *align(1) const SectionHeader) []const u8 {
        const offset = if (self.is_loaded) sec.virtual_address else sec.pointer_to_raw_data;
        return self.data[offset..][0..sec.virtual_size];
    }

getSectionDataAlloc()

    pub fn getSectionDataAlloc(self: *const Coff, sec: *align(1) const SectionHeader, allocator: mem.Allocator) ![]u8 {
        const section_data = self.getSectionData(sec);
        return allocator.dupe(u8, section_data);
    }
};

Symtab

pub const Symtab = struct {
    buffer: []const u8,

len()

    pub fn len(self: Symtab) usize {
        return @divExact(self.buffer.len, Symbol.sizeOf());
    }

    pub const Tag = enum {
        symbol,
        debug_info,
        func_def,
        weak_ext,
        file_def,
        sect_def,
    };

    pub const Record = union(Tag) {
        symbol: Symbol,
        debug_info: DebugInfoDefinition,
        func_def: FunctionDefinition,
        weak_ext: WeakExternalDefinition,
        file_def: FileDefinition,
        sect_def: SectionDefinition,
    };

at()

Lives as long as Symtab instance.

    pub fn at(self: Symtab, index: usize, tag: Tag) Record {
        const offset = index * Symbol.sizeOf();
        const raw = self.buffer[offset..][0..Symbol.sizeOf()];
        return switch (tag) {
            .symbol => .{ .symbol = asSymbol(raw) },
            .debug_info => .{ .debug_info = asDebugInfo(raw) },
            .func_def => .{ .func_def = asFuncDef(raw) },
            .weak_ext => .{ .weak_ext = asWeakExtDef(raw) },
            .file_def => .{ .file_def = asFileDef(raw) },
            .sect_def => .{ .sect_def = asSectDef(raw) },
        };
    }

    fn asSymbol(raw: []const u8) Symbol {
        return .{
            .name = raw[0..8].*,
            .value = mem.readInt(u32, raw[8..12], .little),
            .section_number = @as(SectionNumber, @enumFromInt(mem.readInt(u16, raw[12..14], .little))),
            .type = @as(SymType, @bitCast(mem.readInt(u16, raw[14..16], .little))),
            .storage_class = @as(StorageClass, @enumFromInt(raw[16])),
            .number_of_aux_symbols = raw[17],
        };
    }

    fn asDebugInfo(raw: []const u8) DebugInfoDefinition {
        return .{
            .unused_1 = raw[0..4].*,
            .linenumber = mem.readInt(u16, raw[4..6], .little),
            .unused_2 = raw[6..12].*,
            .pointer_to_next_function = mem.readInt(u32, raw[12..16], .little),
            .unused_3 = raw[16..18].*,
        };
    }

    fn asFuncDef(raw: []const u8) FunctionDefinition {
        return .{
            .tag_index = mem.readInt(u32, raw[0..4], .little),
            .total_size = mem.readInt(u32, raw[4..8], .little),
            .pointer_to_linenumber = mem.readInt(u32, raw[8..12], .little),
            .pointer_to_next_function = mem.readInt(u32, raw[12..16], .little),
            .unused = raw[16..18].*,
        };
    }

    fn asWeakExtDef(raw: []const u8) WeakExternalDefinition {
        return .{
            .tag_index = mem.readInt(u32, raw[0..4], .little),
            .flag = @as(WeakExternalFlag, @enumFromInt(mem.readInt(u32, raw[4..8], .little))),
            .unused = raw[8..18].*,
        };
    }

    fn asFileDef(raw: []const u8) FileDefinition {
        return .{
            .file_name = raw[0..18].*,
        };
    }

    fn asSectDef(raw: []const u8) SectionDefinition {
        return .{
            .length = mem.readInt(u32, raw[0..4], .little),
            .number_of_relocations = mem.readInt(u16, raw[4..6], .little),
            .number_of_linenumbers = mem.readInt(u16, raw[6..8], .little),
            .checksum = mem.readInt(u32, raw[8..12], .little),
            .number = mem.readInt(u16, raw[12..14], .little),
            .selection = @as(ComdatSelection, @enumFromInt(raw[14])),
            .unused = raw[15..18].*,
        };
    }

    pub const Slice = struct {
        buffer: []const u8,
        num: usize,
        count: usize = 0,

next()

Lives as long as Symtab instance.

        pub fn next(self: *Slice) ?Symbol {
            if (self.count >= self.num) return null;
            const sym = asSymbol(self.buffer[0..Symbol.sizeOf()]);
            self.count += 1;
            self.buffer = self.buffer[Symbol.sizeOf()..];
            return sym;
        }
    };

slice()

    pub fn slice(self: Symtab, start: usize, end: ?usize) Slice {
        const offset = start * Symbol.sizeOf();
        const llen = if (end) |e| e * Symbol.sizeOf() else self.buffer.len;
        const num = @divExact(llen - offset, Symbol.sizeOf());
        return Slice{ .buffer = self.buffer[offset..][0..llen], .num = num };
    }
};

Strtab

pub const Strtab = struct {
    buffer: []const u8,

get()

    pub fn get(self: Strtab, off: u32) []const u8 {
        assert(off < self.buffer.len);
        return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.buffer.ptr + off)), 0);
    }
};