From ecb24a8eb77a10c276ba668ffb7055df8da48064 Mon Sep 17 00:00:00 2001 From: Christian Kellner Date: Wed, 12 Oct 2022 18:55:26 +0200 Subject: [PATCH] util: add module to parse PE32+ files Add an new module with utility functions to inspect PE32+ files, mainly listing the sections and their addresses and sizes. Include a simple test to check that we can successfully parse the EFI stub contained in systemd (systemd-udev package). --- osbuild/util/pe32p.py | 206 ++++++++++++++++++++++++++++++++++++ test/mod/test_util_pe32p.py | 55 ++++++++++ 2 files changed, 261 insertions(+) create mode 100644 osbuild/util/pe32p.py create mode 100644 test/mod/test_util_pe32p.py diff --git a/osbuild/util/pe32p.py b/osbuild/util/pe32p.py new file mode 100644 index 00000000..0560d3ba --- /dev/null +++ b/osbuild/util/pe32p.py @@ -0,0 +1,206 @@ +#!/usr/bin/python3 +""" +Utility functions to inspect PE32+ (Portable Executable) files + +To read all the section headers of an PE32+ file[1], while also +inspecting the individual headers, the `coff` header can be passed +to the individual function, which avoids having to re-read it: + +``` +with open("file.pe", "rb") as f: + coff = pe32p.read_coff_header(f) + opt = pe32p.read_optional_header(f, coff) + sections = pe32p.read_sections(f, coff) +``` + +Passing `coff` to the functions eliminates extra i/o to seek to the correct +file positions, but it requires that the functions are called in the given +order, i.e. `read_coff_header`, `read_optional_haeder` then `read_sections`. + +[1] https://learn.microsoft.com/en-us/windows/win32/debug/pe-format +""" + +import enum +import io +import os +import struct +import sys +from collections import namedtuple +from typing import BinaryIO, Iterator, List, Optional, Union + +PathLike = Union[str, bytes, os.PathLike] + +CoffFormat = "4sHHIIIHH" +CoffHeader = namedtuple( + "CoffHeader", + [ + "Signature", + "Machine", + "NumberOfSections", + "TimeDateStamp", + "PointerToSymbolTable", + "NumberOfSymbols", + "SizeOfOptionalHeader", + "Characteristics", + ] +) + + +SectionFormat = "8sIIIIIIHHI" +SectionHeader = namedtuple( + "SectionHeader", + [ + "Name", + "VirtualSize", + "VirtualAddress", + "SizeOfRawData", + "PointerToRawData", + "PointerToRelocations", + "PointerToLinenumbers", + "NumberOfRelocations", + "NumberOfLinenumbers", + "Characteristics", + ] +) + + +class SectionFlags(enum.Flag): + ALIGN_1BYTES = 0x00100000 + ALIGN_2BYTES = 0x00200000 + ALIGN_4BYTES = 0x00300000 + ALIGN_8BYTES = 0x00400000 + ALIGN_16BYTES = 0x00500000 + ALIGN_32BYTES = 0x00600000 + ALIGN_64BYTES = 0x00700000 + ALIGN_128BYTES = 0x00800000 + ALIGN_256BYTES = 0x00900000 + ALIGN_512BYTES = 0x00A00000 + ALIGN_1024BYTES = 0x00B00000 + ALIGN_2048BYTES = 0x00C00000 + ALIGN_4096BYTES = 0x00D00000 + ALIGN_8192BYTES = 0x00E00000 + ALIGN_MASK = 0x00F00000 + ALIGN_DEFAULT = ALIGN_16BYTES + + +OptionalFormat = "HBBIIIIIQIIHHHHHHIIIIHHQQQQII" +OptionalHeader = namedtuple( + "OptionalHeader", + [ + # Standard fields + "Magic", + "MajorLinkerVersion", + "MinorLinkerVersion", + "SizeOfCode", + "SizeOfInitializedData", + "SizeOfUninitializedData", + "AddressOfEntryPoint", + "BaseOfCode", + # Windows-Specific fields (PE32+) + "ImageBase", + "SectionAlignment", + "FileAlignment", + "MajorOperatingSystemVersion", + "MinorOperatingSystemVersion", + "MajorImageVersion", + "MinorImageVersion", + "MajorSubsystemVersion", + "MinorSubsystemVersion", + "Reserved1", + "SizeOfImage", + "SizeOfHeaders", + "CheckSum", + "Subsystem", + "DllCharacteristics", + "SizeOfStackReserve", + "SizeOfStackCommit", + "SizeOfHeapReserve", + "SizeOfHeapCommit", + "LoaderFlags", + "NumberOfRvaAndSizes", + ] +) + + +def read_coff_header(f: BinaryIO) -> CoffHeader: + """Read the Common Object File Format (COFF) Header of the open file at `f`""" + + # Quote from the "PE Format" article (see [1] in this module's doc string): + # "[...] at the file offset specified at offset 0x3c, is a 4-byte signature + # that identifies the file as a PE format image file. This signature is + # 'PE\0\0' (the letters "P" and "E" followed by two null bytes). [...] + # immediately after the signature of an image file, is a standard COFF + # file header in the following format." + # Our `CoffHeader` embeds the signature inside the CoffHeader. + + f.seek(0x3c, io.SEEK_SET) + buf = f.read(struct.calcsize("I")) + (s, ) = struct.unpack_from("I", buf) + f.seek(int(s), io.SEEK_SET) + + buf = f.read(struct.calcsize(CoffFormat)) + coff = CoffHeader._make(struct.unpack_from(CoffFormat, buf)) + assert coff.Signature == b"PE\0\0", "Not a PE32+ file (missing PE header)" + return coff + + +def read_optional_header(f: BinaryIO, coff: Optional[CoffHeader] = None) -> OptionalHeader: + """Read the optional header of the open file at `f` + + If `coff` is passed in, the file position must point to directly after the + COFF header, i.e. as if `read_coff_header` was just called. + """ + if coff is None: + coff = read_coff_header(f) + + buf = f.read(coff.SizeOfOptionalHeader) + sz = struct.calcsize(OptionalFormat) + assert len(buf) >= sz, "Optional header too small" + opt = OptionalHeader._make(struct.unpack_from(OptionalFormat, buf)) + assert opt.Magic == 0x20B, f"Not a PE32+ file (magic: {opt.Magic:X})" + return opt + + +def iter_sections(f: BinaryIO, coff: Optional[CoffHeader] = None) -> Iterator[SectionHeader]: + """Iterate over all the sections in the open file at `f` + + If `coeff` is passed in, the file position must point directly after the Optional + Header, i.e. as if `read_optional_haeder` was just called.""" + if coff is None: + coff = read_coff_header(f) + f.seek(coff.SizeOfOptionalHeader, io.SEEK_CUR) + + for _ in range(coff.NumberOfSections): + buf = f.read(struct.calcsize(SectionFormat)) + yield SectionHeader._make(struct.unpack_from(SectionFormat, buf)) + + +def read_sections(f: BinaryIO, coff: Optional[CoffHeader] = None) -> List[SectionHeader]: + """Read all sections of the open file at `f` + + Like `iter_sections` but returns a list of `SectionHeader` objects.""" + return list(iter_sections(f, coff)) + + +def main(): + + if len(sys.argv) != 2: + print(f"usage: {sys.argv[0]} FILE") + sys.exit(1) + + with open(sys.argv[1], "rb") as f: + coff = read_coff_header(f) + opt = read_optional_header(f, coff) + sections = read_sections(f, coff) + + print(coff) + print(opt) + for s in sections: + print(s) + + last = sections[-1] + print(f"{last.VirtualAddress: X}, {last.VirtualSize:X}") + + +if __name__ == "__main__": + main() diff --git a/test/mod/test_util_pe32p.py b/test/mod/test_util_pe32p.py new file mode 100644 index 00000000..07ee476a --- /dev/null +++ b/test/mod/test_util_pe32p.py @@ -0,0 +1,55 @@ +# +# Test for the util.lvm2 module +# + +import io +import os + +import pytest + +from osbuild.util import pe32p + +EFI_STUB = "/usr/lib/systemd/boot/efi/linuxx64.efi.stub" + + +def have_efi_stub() -> bool: + return os.path.exists(EFI_STUB) + + +@pytest.mark.skipif(not have_efi_stub(), reason="require systemd efi stub") +def test_basic(): + with open(EFI_STUB, "rb") as f: + coff = pe32p.read_coff_header(f) + assert coff + opt = pe32p.read_optional_header(f, coff) + assert opt + sections = pe32p.read_sections(f, coff) + + assert sections, "No sections found in stub" + + +@pytest.mark.skipif(not have_efi_stub(), reason="require systemd efi stub") +def test_basic_no_coff(): + # check the API versions that re-reads the CoffHeader + with open(EFI_STUB, "rb") as f: + coff = pe32p.read_coff_header(f) + assert coff + f.seek(0, io.SEEK_SET) + opt = pe32p.read_optional_header(f) + f.seek(0, io.SEEK_SET) + assert opt + f.seek(0, io.SEEK_SET) + sections = pe32p.read_sections(f) + + assert sections, "No sections found in stub" + + with open(EFI_STUB, "rb") as f: + coff_check = pe32p.read_coff_header(f) + assert coff_check + assert coff == coff_check + opt_check = pe32p.read_optional_header(f, coff) + assert opt_check + assert opt == opt_check + sections_check = pe32p.read_sections(f, coff) + assert sections_check + assert sections == sections_check