debian-forge-composer/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
Ondřej Budai 29f66a251f go.mod: update github.com/containers/image/v5
Version 5.22 introduced a new option to /etc/containers/policy.json called
keyPaths, see

https://github.com/containers/image/pull/1609

EL9 immediately took advantage of this new feature and started using it, see
04645c4a84

This quickly became an issue in our code: The go library (containers/image)
parses the configuration file very strictly and refuses to create a client
when policy.json with an unknown key is present on the filesystem. As we
used 5.21.1 that doesn't know the new key, our unit tests started to
failing when containers-common was present.

Reproducer:
podman run --pull=always --rm -it centos:stream9
dnf install -y dnf-plugins-core
dnf config-manager --set-enabled crb
dnf install -y gpgme-devel libassuan-devel krb5-devel golang git-core
git clone https://github.com/osbuild/osbuild-composer
cd osbuild-composer

# install the new containers-common and run the test
dnf install -y https://kojihub.stream.centos.org/kojifiles/packages/containers-common/1/44.el9/x86_64/containers-common-1-44.el9.x86_64.rpm
go test -count 1 ./...

# this returns:
--- FAIL: TestClientResolve (0.00s)
    client_test.go:31:
        	Error Trace:	client_test.go:31
        	Error:      	Received unexpected error:
        	            	Unknown key "keyPaths"
        	            	invalid policy in "/etc/containers/policy.json"
        	            	github.com/containers/image/v5/signature.NewPolicyFromFile
        	            		/osbuild-composer/vendor/github.com/containers/image/v5/signature/policy_config.go:88
        	            	github.com/osbuild/osbuild-composer/internal/container.NewClient
        	            		/osbuild-composer/internal/container/client.go:123
        	            	github.com/osbuild/osbuild-composer/internal/container_test.TestClientResolve
        	            		/osbuild-composer/internal/container/client_test.go:29
        	            	testing.tRunner
        	            		/usr/lib/golang/src/testing/testing.go:1439
        	            	runtime.goexit
        	            		/usr/lib/golang/src/runtime/asm_amd64.s:1571
        	Test:       	TestClientResolve
    client_test.go:32:
        	Error Trace:	client_test.go:32
        	Error:      	Expected value not to be nil.
        	Test:       	TestClientResolve

 When run with an older containers-common, it succeeds:
 dnf install -y https://kojihub.stream.centos.org/kojifiles/packages/containers-common/1/40.el9/x86_64/containers-common-1-40.el9.x86_64.rpm
 go test -count 1 ./...
 PASS

To sum it up, I had to upgrade github.com/containers/image/v5 to v5.22.0.
Unfortunately, this wasn't so simple, see

go get github.com/containers/image/v5@latest
go: github.com/containers/image/v5@v5.22.0 requires
	github.com/letsencrypt/boulder@v0.0.0-20220331220046-b23ab962616e requires
	github.com/honeycombio/beeline-go@v1.1.1 requires
	github.com/gobuffalo/pop/v5@v5.3.1 requires
	github.com/mattn/go-sqlite3@v2.0.3+incompatible: reading github.com/mattn/go-sqlite3/go.mod at revision v2.0.3: unknown revision v2.0.3

It turns out that github.com/mattn/go-sqlite3@v2.0.3+incompatible has been
recently retracted https://github.com/mattn/go-sqlite3/pull/998 and this
broke a ton of packages depending on it. I was able to fix it by adding

exclude github.com/mattn/go-sqlite3 v2.0.3+incompatible

to our go.mod, see
https://github.com/mattn/go-sqlite3/issues/975#issuecomment-955661657

After adding it,
go get github.com/containers/image/v5@latest
succeeded and tools/prepare-source.sh took care of the rest.

Signed-off-by: Ondřej Budai <ondrej@budai.cz>
2022-08-29 10:25:38 +02:00

847 lines
15 KiB
ArmAsm

// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_main_loop_amd64(SB), $0-8
XORQ DX, DX
// Preload values
MOVQ ctx+0(FP), AX
MOVBQZX 8(AX), DI
MOVQ 16(AX), SI
MOVQ 48(AX), BX
MOVQ 24(AX), R9
MOVQ 32(AX), R10
MOVQ (AX), R11
// Main loop
main_loop:
MOVQ SI, R8
CMPQ R8, BX
SETGE DL
// br0.fillFast32()
MOVQ 32(R11), R12
MOVBQZX 40(R11), R13
CMPQ R13, $0x20
JBE skip_fill0
MOVQ 24(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ (R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 24(R11)
ORQ R14, R12
// exhausted = exhausted || (br0.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br0.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
// update the bitreader structure
MOVQ R12, 32(R11)
MOVB R13, 40(R11)
ADDQ R9, R8
// br1.fillFast32()
MOVQ 80(R11), R12
MOVBQZX 88(R11), R13
CMPQ R13, $0x20
JBE skip_fill1
MOVQ 72(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ 48(R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 72(R11)
ORQ R14, R12
// exhausted = exhausted || (br1.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br1.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
// update the bitreader structure
MOVQ R12, 80(R11)
MOVB R13, 88(R11)
ADDQ R9, R8
// br2.fillFast32()
MOVQ 128(R11), R12
MOVBQZX 136(R11), R13
CMPQ R13, $0x20
JBE skip_fill2
MOVQ 120(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ 96(R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 120(R11)
ORQ R14, R12
// exhausted = exhausted || (br2.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br2.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
// update the bitreader structure
MOVQ R12, 128(R11)
MOVB R13, 136(R11)
ADDQ R9, R8
// br3.fillFast32()
MOVQ 176(R11), R12
MOVBQZX 184(R11), R13
CMPQ R13, $0x20
JBE skip_fill3
MOVQ 168(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ 144(R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 168(R11)
ORQ R14, R12
// exhausted = exhausted || (br3.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br3.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
// update the bitreader structure
MOVQ R12, 176(R11)
MOVB R13, 184(R11)
ADDQ $0x02, SI
TESTB DL, DL
JZ main_loop
MOVQ ctx+0(FP), AX
SUBQ 16(AX), SI
SHLQ $0x02, SI
MOVQ SI, 40(AX)
RET
// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
XORQ DX, DX
// Preload values
MOVQ ctx+0(FP), CX
MOVBQZX 8(CX), DI
MOVQ 16(CX), BX
MOVQ 48(CX), SI
MOVQ 24(CX), R9
MOVQ 32(CX), R10
MOVQ (CX), R11
// Main loop
main_loop:
MOVQ BX, R8
CMPQ R8, SI
SETGE DL
// br0.fillFast32()
MOVQ 32(R11), R12
MOVBQZX 40(R11), R13
CMPQ R13, $0x20
JBE skip_fill0
MOVQ 24(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ (R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 24(R11)
ORQ R15, R12
// exhausted = exhausted || (br0.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// val3 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
// update the bitreader structure
MOVQ R12, 32(R11)
MOVB R13, 40(R11)
ADDQ R9, R8
// br1.fillFast32()
MOVQ 80(R11), R12
MOVBQZX 88(R11), R13
CMPQ R13, $0x20
JBE skip_fill1
MOVQ 72(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 48(R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 72(R11)
ORQ R15, R12
// exhausted = exhausted || (br1.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// val3 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
// update the bitreader structure
MOVQ R12, 80(R11)
MOVB R13, 88(R11)
ADDQ R9, R8
// br2.fillFast32()
MOVQ 128(R11), R12
MOVBQZX 136(R11), R13
CMPQ R13, $0x20
JBE skip_fill2
MOVQ 120(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 96(R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 120(R11)
ORQ R15, R12
// exhausted = exhausted || (br2.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// val3 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
// update the bitreader structure
MOVQ R12, 128(R11)
MOVB R13, 136(R11)
ADDQ R9, R8
// br3.fillFast32()
MOVQ 176(R11), R12
MOVBQZX 184(R11), R13
CMPQ R13, $0x20
JBE skip_fill3
MOVQ 168(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 144(R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 168(R11)
ORQ R15, R12
// exhausted = exhausted || (br3.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
// val1 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
// val3 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
// update the bitreader structure
MOVQ R12, 176(R11)
MOVB R13, 184(R11)
ADDQ $0x04, BX
TESTB DL, DL
JZ main_loop
MOVQ ctx+0(FP), AX
SUBQ 16(AX), BX
SHLQ $0x02, BX
MOVQ BX, 40(AX)
RET
// func decompress1x_main_loop_amd64(ctx *decompress1xContext)
TEXT ·decompress1x_main_loop_amd64(SB), $0-8
MOVQ ctx+0(FP), CX
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
JB error_max_decoded_size_exeeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
MOVQ 24(SI), R9
MOVQ 32(SI), R10
MOVBQZX 40(SI), R11
MOVQ 32(CX), SI
MOVBQZX 8(CX), DI
JMP loop_condition
main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
JGE error_max_decoded_size_exeeded
// Decode 4 values
CMPQ R11, $0x20
JL bitReader_fillFast_1_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), R12
MOVQ R11, CX
SHLQ CL, R12
ORQ R12, R10
bitReader_fillFast_1_end:
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
BSWAPL AX
CMPQ R11, $0x20
JL bitReader_fillFast_2_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), R12
MOVQ R11, CX
SHLQ CL, R12
ORQ R12, R10
bitReader_fillFast_2_end:
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
BSWAPL AX
// Store the decoded values
MOVL AX, (DX)
ADDQ $0x04, DX
loop_condition:
CMPQ R9, $0x08
JGE main_loop
// Update ctx structure
MOVQ ctx+0(FP), AX
SUBQ 16(AX), DX
MOVQ DX, 40(AX)
MOVQ (AX), AX
MOVQ R9, 24(AX)
MOVQ R10, 32(AX)
MOVB R11, 40(AX)
RET
// Report error
error_max_decoded_size_exeeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
RET
// func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
// Requires: BMI2
TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
MOVQ ctx+0(FP), CX
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
JB error_max_decoded_size_exeeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
MOVQ 24(SI), R9
MOVQ 32(SI), R10
MOVBQZX 40(SI), R11
MOVQ 32(CX), SI
MOVBQZX 8(CX), DI
JMP loop_condition
main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
JGE error_max_decoded_size_exeeded
// Decode 4 values
CMPQ R11, $0x20
JL bitReader_fillFast_1_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), CX
SHLXQ R11, CX, CX
ORQ CX, R10
bitReader_fillFast_1_end:
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
BSWAPL AX
CMPQ R11, $0x20
JL bitReader_fillFast_2_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), CX
SHLXQ R11, CX, CX
ORQ CX, R10
bitReader_fillFast_2_end:
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
BSWAPL AX
// Store the decoded values
MOVL AX, (DX)
ADDQ $0x04, DX
loop_condition:
CMPQ R9, $0x08
JGE main_loop
// Update ctx structure
MOVQ ctx+0(FP), AX
SUBQ 16(AX), DX
MOVQ DX, 40(AX)
MOVQ (AX), AX
MOVQ R9, 24(AX)
MOVQ R10, 32(AX)
MOVB R11, 40(AX)
RET
// Report error
error_max_decoded_size_exeeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
RET