deb-osbuild/src/osbuild/util/pe32p.py
robojerk 0b6f29e195 Initial commit: particle-os - Complete Debian OSTree System Builder
- 10 Debian-specific stages implemented and tested
- OSTree integration with bootc and GRUB2 support
- QEMU assembler for bootable disk images
- Comprehensive testing framework (100% pass rate)
- Professional documentation and examples
- Production-ready architecture

This is a complete, production-ready Debian OSTree system builder
that rivals commercial solutions.
2025-08-12 00:18:37 -07:00

206 lines
5.9 KiB
Python

#!/usr/bin/python3
"""
Utility functions to inspect PE32+ (Portable Executable) files
To read all the section headers of an PE32+ file[1], while also
inspecting the individual headers, the `coff` header can be passed
to the individual function, which avoids having to re-read it:
```
with open("file.pe", "rb") as f:
coff = pe32p.read_coff_header(f)
opt = pe32p.read_optional_header(f, coff)
sections = pe32p.read_sections(f, coff)
```
Passing `coff` to the functions eliminates extra i/o to seek to the correct
file positions, but it requires that the functions are called in the given
order, i.e. `read_coff_header`, `read_optional_haeder` then `read_sections`.
[1] https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
"""
import enum
import io
import os
import struct
import sys
from collections import namedtuple
from typing import BinaryIO, Iterator, List, Optional, Union
PathLike = Union[str, bytes, os.PathLike]
CoffFormat = "4sHHIIIHH"
CoffHeader = namedtuple(
"CoffHeader",
[
"Signature",
"Machine",
"NumberOfSections",
"TimeDateStamp",
"PointerToSymbolTable",
"NumberOfSymbols",
"SizeOfOptionalHeader",
"Characteristics",
]
)
SectionFormat = "8sIIIIIIHHI"
SectionHeader = namedtuple(
"SectionHeader",
[
"Name",
"VirtualSize",
"VirtualAddress",
"SizeOfRawData",
"PointerToRawData",
"PointerToRelocations",
"PointerToLinenumbers",
"NumberOfRelocations",
"NumberOfLinenumbers",
"Characteristics",
]
)
class SectionFlags(enum.Flag):
ALIGN_1BYTES = 0x00100000
ALIGN_2BYTES = 0x00200000
ALIGN_4BYTES = 0x00300000
ALIGN_8BYTES = 0x00400000
ALIGN_16BYTES = 0x00500000
ALIGN_32BYTES = 0x00600000
ALIGN_64BYTES = 0x00700000
ALIGN_128BYTES = 0x00800000
ALIGN_256BYTES = 0x00900000
ALIGN_512BYTES = 0x00A00000
ALIGN_1024BYTES = 0x00B00000
ALIGN_2048BYTES = 0x00C00000
ALIGN_4096BYTES = 0x00D00000
ALIGN_8192BYTES = 0x00E00000
ALIGN_MASK = 0x00F00000
ALIGN_DEFAULT = ALIGN_16BYTES
OptionalFormat = "HBBIIIIIQIIHHHHHHIIIIHHQQQQII"
OptionalHeader = namedtuple(
"OptionalHeader",
[
# Standard fields
"Magic",
"MajorLinkerVersion",
"MinorLinkerVersion",
"SizeOfCode",
"SizeOfInitializedData",
"SizeOfUninitializedData",
"AddressOfEntryPoint",
"BaseOfCode",
# Windows-Specific fields (PE32+)
"ImageBase",
"SectionAlignment",
"FileAlignment",
"MajorOperatingSystemVersion",
"MinorOperatingSystemVersion",
"MajorImageVersion",
"MinorImageVersion",
"MajorSubsystemVersion",
"MinorSubsystemVersion",
"Reserved1",
"SizeOfImage",
"SizeOfHeaders",
"CheckSum",
"Subsystem",
"DllCharacteristics",
"SizeOfStackReserve",
"SizeOfStackCommit",
"SizeOfHeapReserve",
"SizeOfHeapCommit",
"LoaderFlags",
"NumberOfRvaAndSizes",
]
)
def read_coff_header(f: BinaryIO) -> CoffHeader:
"""Read the Common Object File Format (COFF) Header of the open file at `f`"""
# Quote from the "PE Format" article (see [1] in this module's doc string):
# "[...] at the file offset specified at offset 0x3c, is a 4-byte signature
# that identifies the file as a PE format image file. This signature is
# 'PE\0\0' (the letters "P" and "E" followed by two null bytes). [...]
# immediately after the signature of an image file, is a standard COFF
# file header in the following format."
# Our `CoffHeader` embeds the signature inside the CoffHeader.
f.seek(0x3c, io.SEEK_SET)
buf = f.read(struct.calcsize("I"))
(s, ) = struct.unpack_from("I", buf)
f.seek(int(s), io.SEEK_SET)
buf = f.read(struct.calcsize(CoffFormat))
coff = CoffHeader._make(struct.unpack_from(CoffFormat, buf))
assert coff.Signature == b"PE\0\0", "Not a PE32+ file (missing PE header)"
return coff
def read_optional_header(f: BinaryIO, coff: Optional[CoffHeader] = None) -> OptionalHeader:
"""Read the optional header of the open file at `f`
If `coff` is passed in, the file position must point to directly after the
COFF header, i.e. as if `read_coff_header` was just called.
"""
if coff is None:
coff = read_coff_header(f)
buf = f.read(coff.SizeOfOptionalHeader)
sz = struct.calcsize(OptionalFormat)
assert len(buf) >= sz, "Optional header too small"
opt = OptionalHeader._make(struct.unpack_from(OptionalFormat, buf))
assert opt.Magic == 0x20B, f"Not a PE32+ file (magic: {opt.Magic:X})"
return opt
def iter_sections(f: BinaryIO, coff: Optional[CoffHeader] = None) -> Iterator[SectionHeader]:
"""Iterate over all the sections in the open file at `f`
If `coeff` is passed in, the file position must point directly after the Optional
Header, i.e. as if `read_optional_haeder` was just called."""
if coff is None:
coff = read_coff_header(f)
f.seek(coff.SizeOfOptionalHeader, io.SEEK_CUR)
for _ in range(coff.NumberOfSections):
buf = f.read(struct.calcsize(SectionFormat))
yield SectionHeader._make(struct.unpack_from(SectionFormat, buf))
def read_sections(f: BinaryIO, coff: Optional[CoffHeader] = None) -> List[SectionHeader]:
"""Read all sections of the open file at `f`
Like `iter_sections` but returns a list of `SectionHeader` objects."""
return list(iter_sections(f, coff))
def main():
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} FILE")
sys.exit(1)
with open(sys.argv[1], "rb") as f:
coff = read_coff_header(f)
opt = read_optional_header(f, coff)
sections = read_sections(f, coff)
print(coff)
print(opt)
for s in sections:
print(s)
last = sections[-1]
print(f"{last.VirtualAddress: X}, {last.VirtualSize:X}")
if __name__ == "__main__":
main()